In [5]:
import gensim
from gensim.models import KeyedVectors


Using TensorFlow backend.

In [ ]:
model = KeyedVectors.load_word2vec_format('data/wikipedia-pubmed-and-PMC-w2v.bin',binary=True)

In [ ]:
print(model.most_similar(positive=['mother', 'child']))

In [1]:
import pandas as pd
# df_mnchn = pd.read_csv('data/cycle_2/mnchn_new_AOs.csv')
# df_specpop = pd.read_csv('data/cycle_2/specpop_new_AOs.csv')
df_ado = pd.read_csv('data/cycle_2/adolescent_new_AOs.csv')
# df_ger = pd.read_csv('data/cycle_2/geriatrics_new_AOs.csv')

In [ ]:
df_ado.columns

In [ ]:
import re

# df_mnchn['VEC'] = ''
# df_specpop['VEC'] = ''
df_ado['VEC'] = ''
# df_ger['VEC'] = ''

# for i, r in df_mnchn.iterrows():
#     query = r['Keywords'].split()
#     w = filter(lambda x: x in model.vocab, query)
#     r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
    

# for i, r in df_specpop.iterrows():
#     query = r['Keywords'].split()
#     w = filter(lambda x: x in model.vocab, query)
#     r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
    

for i, r in df_ado.iterrows():
    query = r['Keywords'].split()
    w = filter(lambda x: x in model.vocab, query)
    r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
    

# for i, r in df_ger.iterrows():
#     query = r['Keywords'].split()
#     w = filter(lambda x: x in model.vocab, query)
#     r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])

In [ ]:
# df_mnchn.to_csv('mnchn.csv', index=False)
# df_specpop.to_csv('specpop.csv', index=False)
df_ado.to_csv('adolescent.csv', index=False)
# df_ger.to_csv('geriatrics.csv', index=False)

In [ ]: