In [5]:
import gensim
from gensim.models import KeyedVectors
In [ ]:
model = KeyedVectors.load_word2vec_format('data/wikipedia-pubmed-and-PMC-w2v.bin',binary=True)
In [ ]:
print(model.most_similar(positive=['mother', 'child']))
In [1]:
import pandas as pd
# df_mnchn = pd.read_csv('data/cycle_2/mnchn_new_AOs.csv')
# df_specpop = pd.read_csv('data/cycle_2/specpop_new_AOs.csv')
df_ado = pd.read_csv('data/cycle_2/adolescent_new_AOs.csv')
# df_ger = pd.read_csv('data/cycle_2/geriatrics_new_AOs.csv')
In [ ]:
df_ado.columns
In [ ]:
import re
# df_mnchn['VEC'] = ''
# df_specpop['VEC'] = ''
df_ado['VEC'] = ''
# df_ger['VEC'] = ''
# for i, r in df_mnchn.iterrows():
# query = r['Keywords'].split()
# w = filter(lambda x: x in model.vocab, query)
# r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
# for i, r in df_specpop.iterrows():
# query = r['Keywords'].split()
# w = filter(lambda x: x in model.vocab, query)
# r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
for i, r in df_ado.iterrows():
query = r['Keywords'].split()
w = filter(lambda x: x in model.vocab, query)
r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
# for i, r in df_ger.iterrows():
# query = r['Keywords'].split()
# w = filter(lambda x: x in model.vocab, query)
# r['VEC'] = ' '.join([i[0] for i in model.most_similar(positive=w)])
In [ ]:
# df_mnchn.to_csv('mnchn.csv', index=False)
# df_specpop.to_csv('specpop.csv', index=False)
df_ado.to_csv('adolescent.csv', index=False)
# df_ger.to_csv('geriatrics.csv', index=False)
In [ ]: