In [1]:
import pandas as pd
import numpy as np

In [32]:
#Number of countries and collaboration count for each text file

def collaboration_count(data,country_list,column_name):
    """
    Function to count collaborations between 2 countries as well as number of papers from each country specified
    """
    count={}
    collaboration = np.zeros((len(country_list),len(country_list)))
    for i in country_list:
        count[i] = 0
    for i in range(0,len(data)):
        string1 = data[column_name][i]
        a=[]
        for j in string1.split():
            for k in range(len(country_list)):
                if j.lower() == country_list[k].lower() or j.lower() == (country_list[k].lower()+';'):
                    if any(i == country_list[k] for i in a):
                        pass
                    else:
                        count[country_list[k]] += 1
                        a.append(country_list[k])
        if len(a) > 1:
            for m in a:
                x_index = country_list.index(m)
                for n in a:
                    if m == n:
                        pass
                    else:
                        y_index = country_list.index(n)
                        collaboration[x_index][y_index] += 1
    for i in range(len(country_list)):
        collaboration[i][i] = count[country_list[i]]
        
    return collaboration

In [ ]:
def

In [33]:
data=pd.read_csv('savedrecs.txt',sep='\t',engine='python',usecols=['C1'],keep_default_na=False,index_col=False)
data['Year'] = 2000
#List of countries to be counted
country_list = ['USA','Germany','France','China','Japan','Australia','Canada','Brazil','Mexico','South Africa',
           'India','Korea','Israel','Turkey','Saudi Arabia','Iran','Spain','Netherlands','Sweden','Norway',
           'Poland','Indonesia','Brazil','Switzerland','Denmark','Singapore','Iceland','Hong Kong','New Zealand','Belgium',
           'Austria','Italy','Czech','Greece','Qatar','Portugal','Hungary','Argentina','Romania','England',
           'Taiwan','Lithuania']
result = collaboration_count(data,country_list,'C1')

In [34]:
result[country_list.index('USA')][country_list.index('Lithuania')]


Out[34]:
1.0

In [35]:
print('%d'%2000)


2000

In [36]:
result


Out[36]:
array([[ 11.,   2.,   1., ...,   0.,   0.,   1.],
       [  2.,  10.,   0., ...,   0.,   0.,   0.],
       [  1.,   0.,   3., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   1.,   0.],
       [  1.,   0.,   0., ...,   0.,   0.,   1.]])

In [43]:
r=result

In [44]:
r


Out[44]:
array([[ 11.,   2.,   1., ...,   0.,   0.,   1.],
       [  2.,  10.,   0., ...,   0.,   0.,   0.],
       [  1.,   0.,   3., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   1.,   0.],
       [  1.,   0.,   0., ...,   0.,   0.,   1.]])

In [46]:
result += r

In [47]:
result


Out[47]:
array([[ 22.,   4.,   2., ...,   0.,   0.,   2.],
       [  4.,  20.,   0., ...,   0.,   0.,   0.],
       [  2.,   0.,   6., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   2.,   0.],
       [  2.,   0.,   0., ...,   0.,   0.,   2.]])

In [48]:
a=[]
a.append(result)

In [50]:
a[0]


Out[50]:
array([[ 22.,   4.,   2., ...,   0.,   0.,   2.],
       [  4.,  20.,   0., ...,   0.,   0.,   0.],
       [  2.,   0.,   6., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   2.,   0.],
       [  2.,   0.,   0., ...,   0.,   0.,   2.]])

In [ ]: