In [94]:
import pandas as pd
import numpy as np
import itertools
In [95]:
data=pd.read_csv('savedrecs.txt',sep='\t',engine='python',index_col=False)
In [96]:
def getUniqueWords(allWords) :
uniqueWords = []
for j in allWords:
if j in uniqueWords:
pass
else:
uniqueWords.append(j)
return uniqueWords
In [97]:
text_file = open("country_list.txt")
lines = text_file.read().split(',')
lines
Out[97]:
In [98]:
df= data[data['C1'].notnull()]
df_new=df['C1']
print(df_new[22])
for i in df_new.index:
p=df_new[i].split()
#print(p)
#print(getUniqueWords(p))
countries=['USA','Germany','France','China','Japan','Australia','Canada','Brazil','Mexico','South Africa',
'India','Korea','Israel','Turkey','Saudi Arabia','Iran','Spain','Netherlands','Sweden','Norway',
'Poland','Indonesia','Brazil','Switzerland','Denmark','Singapore','Iceland','Hong Kong','New Zealand','Belgium',
'Austria','Italy','Czech','Greece','Qatar','Portugal','Hungary','Argentina','Romania','England',
'Taiwan','Lithuania','Finland','Russia','Kazakhstan']
#print(countries)
count=[]
#print(countries)
for i in p:
if i not in count:
for j in countries:
if (i==j or i==j+';'):
count.append(i)
#print(count)
country_list=[]
for i in count:
for j in countries:
if (i==j or i==j+';'):
country_list.append(j)
print(list(set(country_list)))
In [99]:
count=[]
countries=['France','USA','Japan','Sweden','Germany']
#print(countries)
for i in p:
if i not in count:
for j in countries:
if (i==j or i==j+';'):
count.append(i)
country_list=[]
for i in count:
for j in countries:
if (i==j or i==j+';'):
country_list.append(j)
print(list(set(country_list)))
In [100]:
country_list=[]
for i in count:
for j in countries:
if (i==j or i==j+';'):
country_list.append(j)
print(list(set(country_list)))
In [ ]:
In [ ]: