def getCosine(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
list_avg_diffs= []
avg_t1_a1= 0
avg_t1_a2= 0
avg_t1_diff= 0
for t in t1: #t1= ['brother', 'father', 'uncle', 'grandfather', 'son']
x= w2v[t]
for a in a1: #a1= ['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy']
y= w2v[a]
avg_t1_a1= avg_t1_a1 + getCosine(x,y)
avg_t1_a1= avg_t1_a1/len(a1)
for a in a2: #a2= ['poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']
y= w2v[a]
avg_t1_a2= avg_t1_a2 + getCosine(x,y)
avg_t1_a2= avg_t1_a2/len(a2)
avg_t1_diff= avg_t1_diff + (avg_t1_a1 - avg_t1_a2)
list_avg_diffs.append(avg_t1_a1 - avg_t1_a2)
avg_t1_diff= avg_t1_diff/len(t1)
avg_t2_a1= 0
avg_t2_a2= 0
avg_t2_diff= 0
for t in t2: #t2= ['sister', 'mother', 'aunt', 'grandmother', 'daughter']
x= w2v[t]
for a in a1: #a1= ['science', 'technology', 'physics', 'chemistry', 'Einstein', 'NASA', 'experiment', 'astronomy']
y= w2v[a]
avg_t2_a1= avg_t2_a1 + getCosine(x,y)
avg_t2_a1= avg_t2_a1/len(a1)
for a in a2: #a2= ['poetry', 'art', 'Shakespeare', 'dance', 'literature', 'novel', 'symphony', 'drama']
y= w2v[a]
avg_t2_a2= avg_t2_a2 + getCosine(x,y)
avg_t2_a2= avg_t2_a2/len(a2)
avg_t2_diff= avg_t2_diff + (avg_t2_a1 - avg_t2_a2)
list_avg_diffs.append(avg_t2_a1 - avg_t2_a2)
avg_t2_diff= avg_t2_diff/len(t2)
diff_of_diff= avg_t1_diff - avg_t2_diff
sd= np.std(list_avg_diffs) #use ddof=1 in case you need same sd as in R
weat_effect_size= diff_of_diff/sd
Not sure why this difference of 0.06 in effect size. Request help.