In [1]:
import pandas as pd
import numpy as np
In [32]:
#Number of countries and collaboration count for each text file
def collaboration_count(data,country_list,column_name):
"""
Function to count collaborations between 2 countries as well as number of papers from each country specified
"""
count={}
collaboration = np.zeros((len(country_list),len(country_list)))
for i in country_list:
count[i] = 0
for i in range(0,len(data)):
string1 = data[column_name][i]
a=[]
for j in string1.split():
for k in range(len(country_list)):
if j.lower() == country_list[k].lower() or j.lower() == (country_list[k].lower()+';'):
if any(i == country_list[k] for i in a):
pass
else:
count[country_list[k]] += 1
a.append(country_list[k])
if len(a) > 1:
for m in a:
x_index = country_list.index(m)
for n in a:
if m == n:
pass
else:
y_index = country_list.index(n)
collaboration[x_index][y_index] += 1
for i in range(len(country_list)):
collaboration[i][i] = count[country_list[i]]
return collaboration
In [ ]:
def
In [33]:
data=pd.read_csv('savedrecs.txt',sep='\t',engine='python',usecols=['C1'],keep_default_na=False,index_col=False)
data['Year'] = 2000
#List of countries to be counted
country_list = ['USA','Germany','France','China','Japan','Australia','Canada','Brazil','Mexico','South Africa',
'India','Korea','Israel','Turkey','Saudi Arabia','Iran','Spain','Netherlands','Sweden','Norway',
'Poland','Indonesia','Brazil','Switzerland','Denmark','Singapore','Iceland','Hong Kong','New Zealand','Belgium',
'Austria','Italy','Czech','Greece','Qatar','Portugal','Hungary','Argentina','Romania','England',
'Taiwan','Lithuania']
result = collaboration_count(data,country_list,'C1')
In [34]:
result[country_list.index('USA')][country_list.index('Lithuania')]
Out[34]:
In [35]:
print('%d'%2000)
In [36]:
result
Out[36]:
In [43]:
r=result
In [44]:
r
Out[44]:
In [46]:
result += r
In [47]:
result
Out[47]:
In [48]:
a=[]
a.append(result)
In [50]:
a[0]
Out[50]:
In [ ]: