Example and demo of collaboration_count function


In [1]:
from knetwork import collaboration_count as colcnt
import matplotlib.pyplot as plt

In [2]:
#Enter data_source, year_list, country_list and column_name:
data_source = 'web of science'
year_list = range(2000,2017)
country_list = ['USA','Mexico','Canada','Guatemala','Cuba','Dominican Republic','Haiti','Honduras','El Salvador','Nicaragua','Costa Rica','Panama','Jamaica','Trinidad',
                'Brazil','Colombia','Argentina','Venezuela','Peru','Chile','Ecuador','Bolivia','Paraguay','Uruguay',
                'Nigeria','Algeria','Congo','Sudan','Chad','Niger','Angola','Mali','South Africa','Ethiopia','Egypt','Tanzania','Morocco','Kenya','Uganda','Ghana','Mozambique','Madagascar','Cameroon','Ivory Coast','Zambia','Zimbabwe','Malawi','Senegal','Somalia',
                'China','India','Indonesia','Pakistan','Bangladesh','Russia','Japan','Philippines','Vietnam','Turkey','Iran','Thailand','Myanmar','Korea','Iraq','Arabia','Malaysia','Uzbekistan','Nepal','Afghanistan','Yemen','Syria','Sri Lanka','Cambodia','Azerbaijan','Emirates','Tajikistan','Israel','Laos','Jordan','Singapore','Lebanon','Kuwait','Oman','Qatar','Bahrain','Taiwan',
                'Germany','France','Kingdom','Italy','UK','Spain','Ukraine','Poland','Romania','Netherlands','Belgium','Greece','Czech','Portugal','Hungary','Sweden','Austria','Belarus','Switzerland','Bulgaria','Denmark','Slovakia','Finland','Norway','Georgia','Ireland','Croatia','Bosnia','Moldova','Lithuania','Latvia','Macedonia','Slovenia','Estonia','Cyprus','Montenegro','Luxembourg','Malta','Iceland','Andorra','Liechtenstein','San Marino','Monaco','Vatican',
                'Australia','New Zealand','Papau New Guinea'
                ]
column_name = 'C1'
continent={}

In [3]:
for i in range(country_list.index('USA'),country_list.index('Trinidad')+1):
    continent[country_list[i]]='North America'
for i in range(country_list.index('Brazil'),country_list.index('Uruguay')+1):
    continent[country_list[i]]='South America'
for i in range(country_list.index('Nigeria'),country_list.index('Somalia')+1):
    continent[country_list[i]]='Africa'
for i in range(country_list.index('China'),country_list.index('Taiwan')+1):
    continent[country_list[i]]='Asia'
for i in range(country_list.index('Germany'),country_list.index('Vatican')+1):
    continent[country_list[i]]='Europe'
for i in range(country_list.index('Australia'),country_list.index('Papau New Guinea')+1):
    continent[country_list[i]]='Oceania'

In [4]:
list_of_year_wise_results = colcnt.count_all_years(data_source,year_list,country_list,column_name)

In [5]:
#Year wise results
#Enter any year between 2000-2016 to see result matrix
a = 2003
#Get results
b=list_of_year_wise_results[a-2000]

In [6]:
list_of_year_wise_results[2][0][0]


Out[6]:
290.0

In [24]:
yearwise_count={}
country=[]
edge_label=[]
edge_weight=[]
node_weight=[]
year=[]
year_total=[]
cont=[]
percent_of_total_pubs=[]
rank=[]
c=0

#Rank of country
v={}
for c in year_list:
    val1[c-2000]={}
    for i in range(len(country_list)):
        val1[c-2000][country_list[i]]=list_of_year_wise_results[c-2000][i][i]
    v[c-2000]={key: rank for rank, key in enumerate(sorted(val1[c-2000], key=val1[c-2000].get, reverse=True), 1)}

#Total count in a year
for c in year_list:
    val=0
    for i in range(len(country_list)):
        val=val+list_of_year_wise_results[c-2000][i][i]
    yearwise_count[c]=val

#Creating columns of csv
for c in year_list:
    for i in range(len(country_list)):
        for j in range(i+1,len(country_list)):
            country.append(country_list[i])
            country.append(country_list[j])
            edge_label.append('%s & %s'%(country_list[i],country_list[j]))
            edge_label.append('%s & %s'%(country_list[i],country_list[j]))
            edge_weight.append(list_of_year_wise_results[c-2000][i][j])
            edge_weight.append(list_of_year_wise_results[c-2000][i][j])
            node_weight.append(list_of_year_wise_results[c-2000][i][i])
            node_weight.append(list_of_year_wise_results[c-2000][j][j])
            year.append(c)
            year.append(c)
            year_total.append(yearwise_count[c])
            year_total.append(yearwise_count[c])
            cont.append(continent[country_list[i]])
            cont.append(continent[country_list[j]])
            percent_of_total_pubs.append(list_of_year_wise_results[c-2000][i][i]*100/yearwise_count[c])
            percent_of_total_pubs.append(list_of_year_wise_results[c-2000][j][j]*100/yearwise_count[c])
            rank.append('%d/%d'%(v[c-2000][country_list[i]],len(country_list)))
            rank.append('%d/%d'%(v[c-2000][country_list[j]],len(country_list)))
import pandas as pd
df=pd.DataFrame(data=[year,country,country,edge_weight,node_weight,edge_label,year_total,cont,percent_of_total_pubs,rank]).T
df.rename(columns={0:'Year',1:'Country',2:'Country1',3:'No. of Collaborations',4:'No. of Publications',5:'Collaborators',6:'Total No. of Publications',7:'Continent',8:'Percent of Total Publications',9:'Rank'},inplace=True)
df=df.loc[(df!=0).all(1)]
df.to_csv('knetwork1.csv')

In [22]:
df


Out[22]:
Year Country 1 Country 2 No. of Collaborations No. of Publications of Country 1 No. of Publications of Country 2 Collaborators Total No. of Publications Continent of Country 1 Continent of Country 2 Percent of Total Publications of Country 1 Percent of Total Publications of Country 2 Rank of Country 1 Rank of Country 2
0 2000 USA Mexico 5 286 8 USA & Mexico 1391 North America North America 20.5607 0.575126 1/133 25/133
1 2000 USA Canada 5 286 33 USA & Canada 1391 North America North America 20.5607 2.37239 1/133 10/133
16 2000 USA Venezuela 1 286 1 USA & Venezuela 1391 North America South America 20.5607 0.0718907 1/133 57/133
33 2000 USA Egypt 1 286 8 USA & Egypt 1391 North America Africa 20.5607 0.575126 1/133 26/133
48 2000 USA China 3 286 27 USA & China 1391 North America Asia 20.5607 1.94105 1/133 12/133
53 2000 USA Russia 14 286 110 USA & Russia 1391 North America Asia 20.5607 7.90798 1/133 4/133
54 2000 USA Japan 20 286 205 USA & Japan 1391 North America Asia 20.5607 14.7376 1/133 2/133
61 2000 USA Korea 1 286 47 USA & Korea 1391 North America Asia 20.5607 3.37886 1/133 7/133
63 2000 USA Arabia 1 286 2 USA & Arabia 1391 North America Asia 20.5607 0.143781 1/133 43/133
75 2000 USA Israel 3 286 15 USA & Israel 1391 North America Asia 20.5607 1.07836 1/133 17/133
85 2000 USA Germany 25 286 163 USA & Germany 1391 North America Europe 20.5607 11.7182 1/133 3/133
86 2000 USA France 8 286 102 USA & France 1391 North America Europe 20.5607 7.33285 1/133 5/133
88 2000 USA Italy 4 286 47 USA & Italy 1391 North America Europe 20.5607 3.37886 1/133 6/133
92 2000 USA Poland 2 286 10 USA & Poland 1391 North America Europe 20.5607 0.718907 1/133 23/133
93 2000 USA Romania 1 286 11 USA & Romania 1391 North America Europe 20.5607 0.790798 1/133 22/133
94 2000 USA Netherlands 1 286 16 USA & Netherlands 1391 North America Europe 20.5607 1.15025 1/133 16/133
95 2000 USA Belgium 2 286 34 USA & Belgium 1391 North America Europe 20.5607 2.44428 1/133 9/133
99 2000 USA Hungary 3 286 12 USA & Hungary 1391 North America Europe 20.5607 0.862689 1/133 20/133
103 2000 USA Switzerland 3 286 19 USA & Switzerland 1391 North America Europe 20.5607 1.36592 1/133 14/133
108 2000 USA Norway 1 286 5 USA & Norway 1391 North America Europe 20.5607 0.359454 1/133 31/133
109 2000 USA Georgia 4 286 5 USA & Georgia 1391 North America Europe 20.5607 0.359454 1/133 30/133
114 2000 USA Lithuania 1 286 1 USA & Lithuania 1391 North America Europe 20.5607 0.0718907 1/133 49/133
127 2000 USA Monaco 1 286 1 USA & Monaco 1391 North America Europe 20.5607 0.0718907 1/133 50/133
129 2000 USA Australia 3 286 12 USA & Australia 1391 North America Oceania 20.5607 0.862689 1/133 19/133
184 2000 Mexico Russia 1 8 110 Mexico & Russia 1391 North America Asia 0.575126 7.90798 25/133 4/133
322 2000 Canada Korea 1 33 47 Canada & Korea 1391 North America Asia 2.37239 3.37886 10/133 7/133
325 2000 Canada Malaysia 1 33 1 Canada & Malaysia 1391 North America Asia 2.37239 0.0718907 10/133 55/133
341 2000 Canada Kuwait 1 33 1 Canada & Kuwait 1391 North America Asia 2.37239 0.0718907 10/133 54/133
346 2000 Canada Germany 2 33 163 Canada & Germany 1391 North America Europe 2.37239 11.7182 10/133 3/133
354 2000 Canada Romania 1 33 11 Canada & Romania 1391 North America Europe 2.37239 0.790798 10/133 22/133
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
148730 2016 Sweden Austria 2 36 31 Sweden & Austria 2568 Europe Europe 1.40187 1.20717 16/133 19/133
148732 2016 Sweden Switzerland 3 36 44 Sweden & Switzerland 2568 Europe Europe 1.40187 1.7134 16/133 15/133
148736 2016 Sweden Finland 1 36 13 Sweden & Finland 2568 Europe Europe 1.40187 0.506231 16/133 32/133
148743 2016 Sweden Lithuania 1 36 9 Sweden & Lithuania 2568 Europe Europe 1.40187 0.350467 16/133 39/133
148746 2016 Sweden Slovenia 2 36 15 Sweden & Slovenia 2568 Europe Europe 1.40187 0.584112 16/133 29/133
148750 2016 Sweden Luxembourg 3 36 6 Sweden & Luxembourg 2568 Europe Europe 1.40187 0.233645 16/133 47/133
148762 2016 Austria Switzerland 6 31 44 Austria & Switzerland 2568 Europe Europe 1.20717 1.7134 19/133 15/133
148764 2016 Austria Denmark 1 31 6 Austria & Denmark 2568 Europe Europe 1.20717 0.233645 19/133 44/133
148770 2016 Austria Croatia 2 31 3 Austria & Croatia 2568 Europe Europe 1.20717 0.116822 19/133 56/133
148776 2016 Austria Slovenia 2 31 15 Austria & Slovenia 2568 Europe Europe 1.20717 0.584112 19/133 29/133
148780 2016 Austria Luxembourg 1 31 6 Austria & Luxembourg 2568 Europe Europe 1.20717 0.233645 19/133 47/133
148786 2016 Austria Monaco 1 31 3 Austria & Monaco 2568 Europe Europe 1.20717 0.116822 19/133 58/133
148788 2016 Austria Australia 3 31 23 Austria & Australia 2568 Europe Oceania 1.20717 0.895639 19/133 22/133
148821 2016 Switzerland Denmark 1 44 6 Switzerland & Denmark 2568 Europe Europe 1.7134 0.233645 15/133 44/133
148824 2016 Switzerland Norway 2 44 7 Switzerland & Norway 2568 Europe Europe 1.7134 0.272586 15/133 42/133
148826 2016 Switzerland Ireland 1 44 6 Switzerland & Ireland 2568 Europe Europe 1.7134 0.233645 15/133 46/133
148827 2016 Switzerland Croatia 2 44 3 Switzerland & Croatia 2568 Europe Europe 1.7134 0.116822 15/133 56/133
148837 2016 Switzerland Luxembourg 1 44 6 Switzerland & Luxembourg 2568 Europe Europe 1.7134 0.233645 15/133 47/133
148845 2016 Switzerland Australia 3 44 23 Switzerland & Australia 2568 Europe Oceania 1.7134 0.895639 15/133 22/133
148859 2016 Bulgaria Macedonia 1 10 1 Bulgaria & Macedonia 2568 Europe Europe 0.389408 0.0389408 36/133 82/133
148864 2016 Bulgaria Luxembourg 1 10 6 Bulgaria & Luxembourg 2568 Europe Europe 0.389408 0.233645 36/133 47/133
148879 2016 Denmark Ireland 1 6 6 Denmark & Ireland 2568 Europe Europe 0.233645 0.233645 44/133 46/133
148891 2016 Denmark Malta 1 6 1 Denmark & Malta 2568 Europe Europe 0.233645 0.0389408 44/133 75/133
148921 2016 Slovakia Monaco 1 18 3 Slovakia & Monaco 2568 Europe Europe 0.700935 0.116822 26/133 58/133
149006 2016 Ireland Malta 1 6 1 Ireland & Malta 2568 Europe Europe 0.233645 0.0389408 46/133 75/133
149021 2016 Croatia Slovenia 1 3 15 Croatia & Slovenia 2568 Europe Europe 0.116822 0.584112 56/133 29/133
149025 2016 Croatia Luxembourg 1 3 6 Croatia & Luxembourg 2568 Europe Europe 0.116822 0.233645 56/133 47/133
149033 2016 Croatia Australia 2 3 23 Croatia & Australia 2568 Europe Oceania 0.116822 0.895639 56/133 22/133
149075 2016 Lithuania Slovenia 1 9 15 Lithuania & Slovenia 2568 Europe Europe 0.350467 0.584112 39/133 29/133
149178 2016 Luxembourg Australia 1 6 23 Luxembourg & Australia 2568 Europe Oceania 0.233645 0.895639 47/133 22/133

5387 rows × 14 columns


In [53]:
#Function to create and export dataframe as CSV
def get_csv(list_of_year_wise_results,country_list):
    x=[]
    columns=[]
    for i in range(len(country_list)):
        for j in range(len(country_list)):
            x.append([list_of_year_wise_results[k][country_list.index(country_list[i])][country_list.index(country_list[j])] for k in range(len(year_list))])
            if i == j:
                columns.append('%s'%(country_list[i]))
            else:
                columns.append('%s & %s'%(country_list[i],country_list[j]))
    data = pd.DataFrame(x,columns=range(2000,2017)).T
    data.columns = [columns]
    data.index.name = 'Year'
    data.to_csv('collaboration_data.csv')

In [54]:
get_csv(list_of_year_wise_results,country_list)

In [6]:
#Simple bar graph for any 4 countries
#Enter 4 countries as a list
country_names = ['USA','China','India','Russia']

#Code to plot bar graphs of 4 countries specified above
country1 = [list_of_year_wise_results[i][country_list.index(country_names[0])][country_list.index(country_names[0])] for i in range(len(year_list))]
country2 = [list_of_year_wise_results[i][country_list.index(country_names[1])][country_list.index(country_names[1])] for i in range(len(year_list))]
country3 = [list_of_year_wise_results[i][country_list.index(country_names[2])][country_list.index(country_names[2])] for i in range(len(year_list))]
country4 = [list_of_year_wise_results[i][country_list.index(country_names[3])][country_list.index(country_names[3])] for i in range(len(year_list))]

fig = plt.figure(figsize=(12,6))
fig.suptitle('Year-wise papers in Nuclear Science and Technology', fontsize=14, fontweight='bold')
plt.rcParams.update({'font.size': 8})

axes = fig.add_subplot(221)
axes.bar(year_list, country1, 0.5, color='r')
axes.set_ylabel('Publications')
axes.set_ylim(0,500)
axes.set_title('%s'%country_names[0])

axes = fig.add_subplot(222)
axes.bar(year_list, country2, 0.5, color='b')
axes.set_ylabel('Publications')
axes.set_ylim(0,500)
axes.set_title('%s'%country_names[1])

axes = fig.add_subplot(223)
axes.bar(year_list, country3, 0.5, color='g')
axes.set_ylabel('Publications')
axes.set_ylim(0,500)
axes.set_title('%s'%country_names[2])

axes = fig.add_subplot(224)
axes.bar(year_list, country4, 0.5, color='y')
axes.set_ylabel('Publications')
axes.set_ylim(0,500)
axes.set_title('%s'%country_names[3])

plt.tight_layout(pad=4, w_pad=4)
plt.show()



In [7]:
#Collaboration between 3 countries
#Enter 3 countries as a list:
coll_countries = ['USA','China','India']
count1 = [list_of_year_wise_results[i][country_list.index(coll_countries[0])][country_list.index(coll_countries[1])] for i in range(len(year_list))]
count2 = [list_of_year_wise_results[i][country_list.index(coll_countries[1])][country_list.index(coll_countries[2])] for i in range(len(year_list))]
count3 = [list_of_year_wise_results[i][country_list.index(coll_countries[2])][country_list.index(coll_countries[0])] for i in range(len(year_list))]

fig = plt.figure(figsize=(12,3))
fig.suptitle('Year-wise collaboration in Nuclear Science and Technology', fontsize=14, fontweight='bold')
plt.rcParams.update({'font.size': 8})

axes = fig.add_subplot(131)
axes.bar(year_list, count1, 0.5, color='r')
axes.set_ylabel('Publications')
axes.set_ylim(0,max(count1))
axes.set_title('%s and %s'%(coll_countries[0],coll_countries[1]))

axes = fig.add_subplot(132)
axes.bar(year_list, count2, 0.5, color='r')
axes.set_ylabel('Publications')
axes.set_ylim(0,max(count2))
axes.set_title('%s and %s'%(coll_countries[1],coll_countries[2]))

axes = fig.add_subplot(133)
axes.bar(year_list, count3, 0.5, color='r')
axes.set_ylabel('Publications')
axes.set_ylim(0,max(count3))
axes.set_title('%s and %s'%(coll_countries[2],coll_countries[0]))

plt.tight_layout(pad=4, w_pad=2)
plt.show()



In [ ]: