Gibbs sampling
Joint probability $p(z_1,z_2,…,z_n)$에서 샘플링할 때, 특정 하나의 확률변수($z_i$)와 그것을 제외한 나머지 확률변수($z_{-i}$)를 conditional probability: $p(z_i \mid z_{-i}$)로 샘플링하는 방법
Gibbs sampling 예제 코드
from collections import defaultdict
import random
import pandas as pd
import seaborn as sns
def roll_a_die():
    return random.choice([1,2,3,4,5,6])
def direct_sample():
    d1 = roll_a_die()
    d2 = roll_a_die()
    return d1, d1+d2
key는 0으로 초기화합니다.direct_counts = defaultdict(lambda:0) # initialize 0 for unseen key
n_sample = 7000
for _ in range(n_sample):
    direct_counts[direct_sample()]+=1
direct_counts
defaultdict(<function __main__.<lambda>()>,
            {(5, 7): 192,
             (6, 9): 176,
             (5, 8): 183,
             (1, 4): 198,
             (6, 7): 200,
             (2, 5): 191,
             (6, 8): 201,
             (2, 8): 188,
             (2, 4): 196,
             (5, 11): 188,
             (5, 9): 225,
             (6, 10): 177,
             (1, 2): 194,
             (2, 7): 194,
             (4, 5): 186,
             (3, 8): 160,
             (3, 5): 217,
             (3, 6): 197,
             (3, 9): 198,
             (1, 7): 196,
             (3, 7): 189,
             (6, 11): 196,
             (5, 10): 188,
             (2, 6): 189,
             (3, 4): 186,
             (5, 6): 192,
             (4, 9): 196,
             (6, 12): 198,
             (1, 6): 200,
             (4, 10): 201,
             (4, 6): 194,
             (1, 3): 186,
             (4, 7): 204,
             (2, 3): 213,
             (1, 5): 201,
             (4, 8): 210})
direct_df = pd.DataFrame([[var[0], var[1], count ]
                          for var, count in direct_counts.items()],
                        columns= ['X','Y', 'Count'])
print(direct_df.head(5))
   X  Y  Count
0  5  7    192
1  6  9    176
2  5  8    183
3  1  4    198
4  6  7    200
direct_array = [item for sublist in 
                [[[x,y]]*z for x,y,z in direct_df.values] 
                for item in sublist]
direct_visual_input = pd.DataFrame(direct_array, columns=['X','Y'])
sns.jointplot(x="X", y="Y", data=direct_visual_input, kind="kde")
<seaborn.axisgrid.JointGrid at 0x11733d470>

def random_y_given_x(x):
    """x+1, x+2, x+3, ..."""
    return x + roll_a_die()
def random_x_given_y(y):
    if y <=7:
        """두개의 주사위 합이 7보다 작거나 같으면, 첫번쨰 주사위는 1~6의 값을 가짐"""
        return random.randrange(1,y)
    else:
        """두개의 주사위 합이 7보다 크면, 첫번째 주사위는 (y-6)~6의 값을 가짐
        예를 들어, 주사위 합이 10이면, 첫번째 주사위는 4~6의 값을 가짐
        (3이하의 값은 주어진 10을 표현할 수 없음)"""
        return random.randrange(y-6,7)
num_iters를 너무 작게 하면 분포가 한쪽으로 치우치는 경향이 있어 10번정도 반복하였습니다.def gibbs_sample(num_iters = 10):
    x,y = 1, 2  # inititalze possible any numbers
    for _ in range(num_iters):
        x = random_x_given_y(y)
        y = random_y_given_x(x)
    return x, y
gibbs_counts = defaultdict(lambda:0) # initialize 0 for unseen key
n_sample = 7000
for _ in range(n_sample):
    gibbs_counts[gibbs_sample()]+=1
gibbs_counts
defaultdict(<function __main__.<lambda>()>,
            {(5, 7): 211,
             (1, 3): 191,
             (3, 6): 193,
             (2, 7): 211,
             (1, 2): 177,
             (4, 5): 186,
             (3, 4): 197,
             (6, 9): 184,
             (6, 10): 182,
             (4, 10): 182,
             (5, 8): 211,
             (4, 9): 170,
             (3, 8): 208,
             (2, 4): 204,
             (5, 10): 191,
             (1, 7): 185,
             (3, 5): 196,
             (4, 7): 206,
             (3, 7): 201,
             (5, 11): 183,
             (2, 6): 201,
             (1, 4): 196,
             (4, 8): 211,
             (5, 9): 191,
             (3, 9): 184,
             (4, 6): 192,
             (2, 8): 211,
             (6, 7): 197,
             (1, 6): 181,
             (2, 5): 175,
             (1, 5): 202,
             (6, 8): 223,
             (6, 11): 210,
             (5, 6): 188,
             (2, 3): 194,
             (6, 12): 175})
gibbs_df = pd.DataFrame([[var[0], var[1], count ]
                          for var, count in gibbs_counts.items()],
                        columns= ['X','Y', 'Count'])
print(gibbs_df.head(5))
   X  Y  Count
0  5  7    211
1  1  3    191
2  3  6    193
3  2  7    211
4  1  2    177
gibbs_array = [item for sublist in 
                [[[x,y]]*z for x,y,z in gibbs_df.values] 
                for item in sublist]
gibbs_visual_input = pd.DataFrame(gibbs_array, columns=['X','Y'])
sns.jointplot(x="X", y="Y", data=gibbs_visual_input, kind="kde");

Comments
Reference