notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np



In [32]:

    
#Number of countries and collaboration count for each text file

def collaboration_count(data,country_list,column_name):
    """
    Function to count collaborations between 2 countries as well as number of papers from each country specified
    """
    count={}
    collaboration = np.zeros((len(country_list),len(country_list)))
    for i in country_list:
        count[i] = 0
    for i in range(0,len(data)):
        string1 = data[column_name][i]
        a=[]
        for j in string1.split():
            for k in range(len(country_list)):
                if j.lower() == country_list[k].lower() or j.lower() == (country_list[k].lower()+';'):
                    if any(i == country_list[k] for i in a):
                        pass
                    else:
                        count[country_list[k]] += 1
                        a.append(country_list[k])
        if len(a) > 1:
            for m in a:
                x_index = country_list.index(m)
                for n in a:
                    if m == n:
                        pass
                    else:
                        y_index = country_list.index(n)
                        collaboration[x_index][y_index] += 1
    for i in range(len(country_list)):
        collaboration[i][i] = count[country_list[i]]
        
    return collaboration



In [ ]:

    
def



In [33]:

    
data=pd.read_csv('savedrecs.txt',sep='\t',engine='python',usecols=['C1'],keep_default_na=False,index_col=False)
data['Year'] = 2000
#List of countries to be counted
country_list = ['USA','Germany','France','China','Japan','Australia','Canada','Brazil','Mexico','South Africa',
           'India','Korea','Israel','Turkey','Saudi Arabia','Iran','Spain','Netherlands','Sweden','Norway',
           'Poland','Indonesia','Brazil','Switzerland','Denmark','Singapore','Iceland','Hong Kong','New Zealand','Belgium',
           'Austria','Italy','Czech','Greece','Qatar','Portugal','Hungary','Argentina','Romania','England',
           'Taiwan','Lithuania']
result = collaboration_count(data,country_list,'C1')



In [34]:

    
result[country_list.index('USA')][country_list.index('Lithuania')]









    Out[34]:





1.0



In [35]:

    
print('%d'%2000)



In [36]:

    
result









    Out[36]:





array([[ 11.,   2.,   1., ...,   0.,   0.,   1.],
       [  2.,  10.,   0., ...,   0.,   0.,   0.],
       [  1.,   0.,   3., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   1.,   0.],
       [  1.,   0.,   0., ...,   0.,   0.,   1.]])



In [43]:

    
r=result



In [44]:

    
r









    Out[44]:





array([[ 11.,   2.,   1., ...,   0.,   0.,   1.],
       [  2.,  10.,   0., ...,   0.,   0.,   0.],
       [  1.,   0.,   3., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   1.,   0.],
       [  1.,   0.,   0., ...,   0.,   0.,   1.]])



In [46]:

    
result += r



In [47]:

    
result









    Out[47]:





array([[ 22.,   4.,   2., ...,   0.,   0.,   2.],
       [  4.,  20.,   0., ...,   0.,   0.,   0.],
       [  2.,   0.,   6., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   2.,   0.],
       [  2.,   0.,   0., ...,   0.,   0.,   2.]])



In [48]:

    
a=[]
a.append(result)



In [50]:

    
a[0]









    Out[50]:





array([[ 22.,   4.,   2., ...,   0.,   0.,   2.],
       [  4.,  20.,   0., ...,   0.,   0.,   0.],
       [  2.,   0.,   6., ...,   0.,   0.,   0.],
       ..., 
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   2.,   0.],
       [  2.,   0.,   0., ...,   0.,   0.,   2.]])



In [ ]: