Gibbs sampling
Joint probability $p(z_1,z_2,…,z_n)$에서 샘플링할 때, 특정 하나의 확률변수($z_i$)와 그것을 제외한 나머지 확률변수($z_{-i}$)를 conditional probability: $p(z_i \mid z_{-i}$)로 샘플링하는 방법
Gibbs sampling 예제 코드
from collections import defaultdict
import random
import pandas as pd
import seaborn as sns
def roll_a_die():
return random.choice([1,2,3,4,5,6])
def direct_sample():
d1 = roll_a_die()
d2 = roll_a_die()
return d1, d1+d2
key
는 0으로 초기화합니다.direct_counts = defaultdict(lambda:0) # initialize 0 for unseen key
n_sample = 7000
for _ in range(n_sample):
direct_counts[direct_sample()]+=1
direct_counts
defaultdict(<function __main__.<lambda>()>,
{(5, 7): 192,
(6, 9): 176,
(5, 8): 183,
(1, 4): 198,
(6, 7): 200,
(2, 5): 191,
(6, 8): 201,
(2, 8): 188,
(2, 4): 196,
(5, 11): 188,
(5, 9): 225,
(6, 10): 177,
(1, 2): 194,
(2, 7): 194,
(4, 5): 186,
(3, 8): 160,
(3, 5): 217,
(3, 6): 197,
(3, 9): 198,
(1, 7): 196,
(3, 7): 189,
(6, 11): 196,
(5, 10): 188,
(2, 6): 189,
(3, 4): 186,
(5, 6): 192,
(4, 9): 196,
(6, 12): 198,
(1, 6): 200,
(4, 10): 201,
(4, 6): 194,
(1, 3): 186,
(4, 7): 204,
(2, 3): 213,
(1, 5): 201,
(4, 8): 210})
direct_df = pd.DataFrame([[var[0], var[1], count ]
for var, count in direct_counts.items()],
columns= ['X','Y', 'Count'])
print(direct_df.head(5))
X Y Count
0 5 7 192
1 6 9 176
2 5 8 183
3 1 4 198
4 6 7 200
direct_array = [item for sublist in
[[[x,y]]*z for x,y,z in direct_df.values]
for item in sublist]
direct_visual_input = pd.DataFrame(direct_array, columns=['X','Y'])
sns.jointplot(x="X", y="Y", data=direct_visual_input, kind="kde")
<seaborn.axisgrid.JointGrid at 0x11733d470>
def random_y_given_x(x):
"""x+1, x+2, x+3, ..."""
return x + roll_a_die()
def random_x_given_y(y):
if y <=7:
"""두개의 주사위 합이 7보다 작거나 같으면, 첫번쨰 주사위는 1~6의 값을 가짐"""
return random.randrange(1,y)
else:
"""두개의 주사위 합이 7보다 크면, 첫번째 주사위는 (y-6)~6의 값을 가짐
예를 들어, 주사위 합이 10이면, 첫번째 주사위는 4~6의 값을 가짐
(3이하의 값은 주어진 10을 표현할 수 없음)"""
return random.randrange(y-6,7)
num_iters
를 너무 작게 하면 분포가 한쪽으로 치우치는 경향이 있어 10번정도 반복하였습니다.def gibbs_sample(num_iters = 10):
x,y = 1, 2 # inititalze possible any numbers
for _ in range(num_iters):
x = random_x_given_y(y)
y = random_y_given_x(x)
return x, y
gibbs_counts = defaultdict(lambda:0) # initialize 0 for unseen key
n_sample = 7000
for _ in range(n_sample):
gibbs_counts[gibbs_sample()]+=1
gibbs_counts
defaultdict(<function __main__.<lambda>()>,
{(5, 7): 211,
(1, 3): 191,
(3, 6): 193,
(2, 7): 211,
(1, 2): 177,
(4, 5): 186,
(3, 4): 197,
(6, 9): 184,
(6, 10): 182,
(4, 10): 182,
(5, 8): 211,
(4, 9): 170,
(3, 8): 208,
(2, 4): 204,
(5, 10): 191,
(1, 7): 185,
(3, 5): 196,
(4, 7): 206,
(3, 7): 201,
(5, 11): 183,
(2, 6): 201,
(1, 4): 196,
(4, 8): 211,
(5, 9): 191,
(3, 9): 184,
(4, 6): 192,
(2, 8): 211,
(6, 7): 197,
(1, 6): 181,
(2, 5): 175,
(1, 5): 202,
(6, 8): 223,
(6, 11): 210,
(5, 6): 188,
(2, 3): 194,
(6, 12): 175})
gibbs_df = pd.DataFrame([[var[0], var[1], count ]
for var, count in gibbs_counts.items()],
columns= ['X','Y', 'Count'])
print(gibbs_df.head(5))
X Y Count
0 5 7 211
1 1 3 191
2 3 6 193
3 2 7 211
4 1 2 177
gibbs_array = [item for sublist in
[[[x,y]]*z for x,y,z in gibbs_df.values]
for item in sublist]
gibbs_visual_input = pd.DataFrame(gibbs_array, columns=['X','Y'])
sns.jointplot(x="X", y="Y", data=gibbs_visual_input, kind="kde");
Comments
Reference