In [8]:
import pandas as pd
import math
import matplotlib.pyplot as plt
import numpy as np

In [4]:
data = pd.read_csv('knetwork2.csv')

In [5]:
data


Out[5]:
Unnamed: 0 Year Country 1 Country 2 No. of Collaborations No. of Publications of Country 1 No. of Publications of Country 2 Collaborators Total No. of Publications Continent of Country 1 Continent of Country 2 Percent of Total Publications of Country 1 Percent of Total Publications of Country 2 Rank of Country 1 Rank of Country 2
0 0 2000 USA Mexico 5.0 286.0 8.0 USA & Mexico 1391.0 North America North America 20.560748 0.575126 1/133 25/133
1 1 2000 USA Canada 5.0 286.0 33.0 USA & Canada 1391.0 North America North America 20.560748 2.372394 1/133 10/133
2 16 2000 USA Venezuela 1.0 286.0 1.0 USA & Venezuela 1391.0 North America South America 20.560748 0.071891 1/133 57/133
3 33 2000 USA Egypt 1.0 286.0 8.0 USA & Egypt 1391.0 North America Africa 20.560748 0.575126 1/133 26/133
4 48 2000 USA China 3.0 286.0 27.0 USA & China 1391.0 North America Asia 20.560748 1.941050 1/133 12/133
5 53 2000 USA Russia 14.0 286.0 110.0 USA & Russia 1391.0 North America Asia 20.560748 7.907980 1/133 4/133
6 54 2000 USA Japan 20.0 286.0 205.0 USA & Japan 1391.0 North America Asia 20.560748 14.737599 1/133 2/133
7 61 2000 USA Korea 1.0 286.0 47.0 USA & Korea 1391.0 North America Asia 20.560748 3.378864 1/133 7/133
8 63 2000 USA Arabia 1.0 286.0 2.0 USA & Arabia 1391.0 North America Asia 20.560748 0.143781 1/133 43/133
9 75 2000 USA Israel 3.0 286.0 15.0 USA & Israel 1391.0 North America Asia 20.560748 1.078361 1/133 17/133
10 85 2000 USA Germany 25.0 286.0 163.0 USA & Germany 1391.0 North America Europe 20.560748 11.718188 1/133 3/133
11 86 2000 USA France 8.0 286.0 102.0 USA & France 1391.0 North America Europe 20.560748 7.332854 1/133 5/133
12 88 2000 USA Italy 4.0 286.0 47.0 USA & Italy 1391.0 North America Europe 20.560748 3.378864 1/133 6/133
13 92 2000 USA Poland 2.0 286.0 10.0 USA & Poland 1391.0 North America Europe 20.560748 0.718907 1/133 23/133
14 93 2000 USA Romania 1.0 286.0 11.0 USA & Romania 1391.0 North America Europe 20.560748 0.790798 1/133 22/133
15 94 2000 USA Netherlands 1.0 286.0 16.0 USA & Netherlands 1391.0 North America Europe 20.560748 1.150252 1/133 16/133
16 95 2000 USA Belgium 2.0 286.0 34.0 USA & Belgium 1391.0 North America Europe 20.560748 2.444285 1/133 9/133
17 99 2000 USA Hungary 3.0 286.0 12.0 USA & Hungary 1391.0 North America Europe 20.560748 0.862689 1/133 20/133
18 103 2000 USA Switzerland 3.0 286.0 19.0 USA & Switzerland 1391.0 North America Europe 20.560748 1.365924 1/133 14/133
19 108 2000 USA Norway 1.0 286.0 5.0 USA & Norway 1391.0 North America Europe 20.560748 0.359454 1/133 31/133
20 109 2000 USA Georgia 4.0 286.0 5.0 USA & Georgia 1391.0 North America Europe 20.560748 0.359454 1/133 30/133
21 114 2000 USA Lithuania 1.0 286.0 1.0 USA & Lithuania 1391.0 North America Europe 20.560748 0.071891 1/133 49/133
22 127 2000 USA Monaco 1.0 286.0 1.0 USA & Monaco 1391.0 North America Europe 20.560748 0.071891 1/133 50/133
23 129 2000 USA Australia 3.0 286.0 12.0 USA & Australia 1391.0 North America Oceania 20.560748 0.862689 1/133 19/133
24 184 2000 Mexico Russia 1.0 8.0 110.0 Mexico & Russia 1391.0 North America Asia 0.575126 7.907980 25/133 4/133
25 322 2000 Canada Korea 1.0 33.0 47.0 Canada & Korea 1391.0 North America Asia 2.372394 3.378864 10/133 7/133
26 325 2000 Canada Malaysia 1.0 33.0 1.0 Canada & Malaysia 1391.0 North America Asia 2.372394 0.071891 10/133 55/133
27 341 2000 Canada Kuwait 1.0 33.0 1.0 Canada & Kuwait 1391.0 North America Asia 2.372394 0.071891 10/133 54/133
28 346 2000 Canada Germany 2.0 33.0 163.0 Canada & Germany 1391.0 North America Europe 2.372394 11.718188 10/133 3/133
29 354 2000 Canada Romania 1.0 33.0 11.0 Canada & Romania 1391.0 North America Europe 2.372394 0.790798 10/133 22/133
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5357 148730 2016 Sweden Austria 2.0 36.0 31.0 Sweden & Austria 2568.0 Europe Europe 1.401869 1.207165 16/133 19/133
5358 148732 2016 Sweden Switzerland 3.0 36.0 44.0 Sweden & Switzerland 2568.0 Europe Europe 1.401869 1.713396 16/133 15/133
5359 148736 2016 Sweden Finland 1.0 36.0 13.0 Sweden & Finland 2568.0 Europe Europe 1.401869 0.506231 16/133 32/133
5360 148743 2016 Sweden Lithuania 1.0 36.0 9.0 Sweden & Lithuania 2568.0 Europe Europe 1.401869 0.350467 16/133 39/133
5361 148746 2016 Sweden Slovenia 2.0 36.0 15.0 Sweden & Slovenia 2568.0 Europe Europe 1.401869 0.584112 16/133 29/133
5362 148750 2016 Sweden Luxembourg 3.0 36.0 6.0 Sweden & Luxembourg 2568.0 Europe Europe 1.401869 0.233645 16/133 47/133
5363 148762 2016 Austria Switzerland 6.0 31.0 44.0 Austria & Switzerland 2568.0 Europe Europe 1.207165 1.713396 19/133 15/133
5364 148764 2016 Austria Denmark 1.0 31.0 6.0 Austria & Denmark 2568.0 Europe Europe 1.207165 0.233645 19/133 44/133
5365 148770 2016 Austria Croatia 2.0 31.0 3.0 Austria & Croatia 2568.0 Europe Europe 1.207165 0.116822 19/133 56/133
5366 148776 2016 Austria Slovenia 2.0 31.0 15.0 Austria & Slovenia 2568.0 Europe Europe 1.207165 0.584112 19/133 29/133
5367 148780 2016 Austria Luxembourg 1.0 31.0 6.0 Austria & Luxembourg 2568.0 Europe Europe 1.207165 0.233645 19/133 47/133
5368 148786 2016 Austria Monaco 1.0 31.0 3.0 Austria & Monaco 2568.0 Europe Europe 1.207165 0.116822 19/133 58/133
5369 148788 2016 Austria Australia 3.0 31.0 23.0 Austria & Australia 2568.0 Europe Oceania 1.207165 0.895639 19/133 22/133
5370 148821 2016 Switzerland Denmark 1.0 44.0 6.0 Switzerland & Denmark 2568.0 Europe Europe 1.713396 0.233645 15/133 44/133
5371 148824 2016 Switzerland Norway 2.0 44.0 7.0 Switzerland & Norway 2568.0 Europe Europe 1.713396 0.272586 15/133 42/133
5372 148826 2016 Switzerland Ireland 1.0 44.0 6.0 Switzerland & Ireland 2568.0 Europe Europe 1.713396 0.233645 15/133 46/133
5373 148827 2016 Switzerland Croatia 2.0 44.0 3.0 Switzerland & Croatia 2568.0 Europe Europe 1.713396 0.116822 15/133 56/133
5374 148837 2016 Switzerland Luxembourg 1.0 44.0 6.0 Switzerland & Luxembourg 2568.0 Europe Europe 1.713396 0.233645 15/133 47/133
5375 148845 2016 Switzerland Australia 3.0 44.0 23.0 Switzerland & Australia 2568.0 Europe Oceania 1.713396 0.895639 15/133 22/133
5376 148859 2016 Bulgaria Macedonia 1.0 10.0 1.0 Bulgaria & Macedonia 2568.0 Europe Europe 0.389408 0.038941 36/133 82/133
5377 148864 2016 Bulgaria Luxembourg 1.0 10.0 6.0 Bulgaria & Luxembourg 2568.0 Europe Europe 0.389408 0.233645 36/133 47/133
5378 148879 2016 Denmark Ireland 1.0 6.0 6.0 Denmark & Ireland 2568.0 Europe Europe 0.233645 0.233645 44/133 46/133
5379 148891 2016 Denmark Malta 1.0 6.0 1.0 Denmark & Malta 2568.0 Europe Europe 0.233645 0.038941 44/133 75/133
5380 148921 2016 Slovakia Monaco 1.0 18.0 3.0 Slovakia & Monaco 2568.0 Europe Europe 0.700935 0.116822 26/133 58/133
5381 149006 2016 Ireland Malta 1.0 6.0 1.0 Ireland & Malta 2568.0 Europe Europe 0.233645 0.038941 46/133 75/133
5382 149021 2016 Croatia Slovenia 1.0 3.0 15.0 Croatia & Slovenia 2568.0 Europe Europe 0.116822 0.584112 56/133 29/133
5383 149025 2016 Croatia Luxembourg 1.0 3.0 6.0 Croatia & Luxembourg 2568.0 Europe Europe 0.116822 0.233645 56/133 47/133
5384 149033 2016 Croatia Australia 2.0 3.0 23.0 Croatia & Australia 2568.0 Europe Oceania 0.116822 0.895639 56/133 22/133
5385 149075 2016 Lithuania Slovenia 1.0 9.0 15.0 Lithuania & Slovenia 2568.0 Europe Europe 0.350467 0.584112 39/133 29/133
5386 149178 2016 Luxembourg Australia 1.0 6.0 23.0 Luxembourg & Australia 2568.0 Europe Oceania 0.233645 0.895639 47/133 22/133

5387 rows × 15 columns


In [7]:
data['No. of Publications of Country 1'] - data['No. of Publications of Country 2']


Out[7]:
0       278.0
1       253.0
2       285.0
3       278.0
4       259.0
5       176.0
6        81.0
7       239.0
8       284.0
9       271.0
10      123.0
11      184.0
12      239.0
13      276.0
14      275.0
15      270.0
16      252.0
17      274.0
18      267.0
19      281.0
20      281.0
21      285.0
22      285.0
23      274.0
24     -102.0
25      -14.0
26       32.0
27       32.0
28     -130.0
29       22.0
        ...  
5357      5.0
5358     -8.0
5359     23.0
5360     27.0
5361     21.0
5362     30.0
5363    -13.0
5364     25.0
5365     28.0
5366     16.0
5367     25.0
5368     28.0
5369      8.0
5370     38.0
5371     37.0
5372     38.0
5373     41.0
5374     38.0
5375     21.0
5376      9.0
5377      4.0
5378      0.0
5379      5.0
5380     15.0
5381      5.0
5382    -12.0
5383     -3.0
5384    -20.0
5385     -6.0
5386    -17.0
dtype: float64

In [39]:
"""Here, I try to make a histogram and normal distribution of the difference between countries in number of publications"""


Out[39]:
'Here, I try to make a histogram and normal distribution of the difference between countries in number of publications'

In [9]:
pub_diff = data['No. of Publications of Country 1'] - data['No. of Publications of Country 2']

In [34]:
abs_pub_diff = abs(pub_diff)

In [35]:
mu = np.mean(abs_pub_diff)

In [36]:
mu


Out[36]:
90.04418043437906

In [37]:
np.std(abs_pub_diff)


Out[37]:
95.31926897474308

In [38]:
from scipy.stats import norm

# Fit a normal distribution to the data:
mu, std = norm.fit(abs_pub_diff)

# Plot the histogram.
plt.hist(abs_pub_diff, bins=len(abs_pub_diff), normed=True, alpha=0.6, color='g')

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
plt.title(title)

plt.show()



In [40]:
"""Next, I'm going to filter out all of the countries that are above the mean difference."""


Out[40]:
"Next, I'm going to filter out all of the countries that are above the mean difference."

In [94]:
x = (np.where(abs_pub_diff < mu))
x = np.reshape(x,np.size(x))
y = x.tolist()

In [95]:
data['Collaborators'][y]


Out[95]:
6                    USA & Japan
25                Canada & Korea
26             Canada & Malaysia
27               Canada & Kuwait
29              Canada & Romania
30               Canada & Sweden
31          Canada & Switzerland
32               Canada & Monaco
33                 Brazil & Peru
36               Brazil & France
37                Brazil & Italy
38          Brazil & Netherlands
39                Brazil & Czech
40              Brazil & Hungary
43             Venezuela & Italy
44           Venezuela & Romania
46                Sudan & Sweden
48                China & Russia
50                 China & Italy
51               China & Belgium
52               China & Hungary
53           China & Switzerland
56                Russia & Korea
57              Russia & Germany
58               Russia & France
59                Russia & Italy
60                Russia & Spain
63              Russia & Belgium
66               Russia & Sweden
72               Japan & Germany
                  ...           
5357            Sweden & Austria
5358        Sweden & Switzerland
5359            Sweden & Finland
5360          Sweden & Lithuania
5361           Sweden & Slovenia
5362         Sweden & Luxembourg
5363       Austria & Switzerland
5364           Austria & Denmark
5365           Austria & Croatia
5366          Austria & Slovenia
5367        Austria & Luxembourg
5368            Austria & Monaco
5369         Austria & Australia
5370       Switzerland & Denmark
5371        Switzerland & Norway
5372       Switzerland & Ireland
5373       Switzerland & Croatia
5374    Switzerland & Luxembourg
5375     Switzerland & Australia
5376        Bulgaria & Macedonia
5377       Bulgaria & Luxembourg
5378           Denmark & Ireland
5379             Denmark & Malta
5380           Slovakia & Monaco
5381             Ireland & Malta
5382          Croatia & Slovenia
5383        Croatia & Luxembourg
5384         Croatia & Australia
5385        Lithuania & Slovenia
5386      Luxembourg & Australia
Name: Collaborators, dtype: object

In [56]:
x


Out[56]:
(array([   6,   25,   26, ..., 5384, 5385, 5386]),)

In [ ]: