notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random



In [2]:

    
data = pd.read_csv(r'C:\Users\hrao\Documents\Personal\HK\Python\world-development-indicators\Indicators.csv')



In [3]:

    
data.shape









    Out[3]:





(5656458, 6)



In [4]:

    
data.head(n=10)









    Out[4]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      0
      Arab World
      ARB
      Adolescent fertility rate (births per 1,000 wo...
      SP.ADO.TFRT
      1960
      1.335609e+02
    
    
      1
      Arab World
      ARB
      Age dependency ratio (% of working-age populat...
      SP.POP.DPND
      1960
      8.779760e+01
    
    
      2
      Arab World
      ARB
      Age dependency ratio, old (% of working-age po...
      SP.POP.DPND.OL
      1960
      6.634579e+00
    
    
      3
      Arab World
      ARB
      Age dependency ratio, young (% of working-age ...
      SP.POP.DPND.YG
      1960
      8.102333e+01
    
    
      4
      Arab World
      ARB
      Arms exports (SIPRI trend indicator values)
      MS.MIL.XPRT.KD
      1960
      3.000000e+06
    
    
      5
      Arab World
      ARB
      Arms imports (SIPRI trend indicator values)
      MS.MIL.MPRT.KD
      1960
      5.380000e+08
    
    
      6
      Arab World
      ARB
      Birth rate, crude (per 1,000 people)
      SP.DYN.CBRT.IN
      1960
      4.769789e+01
    
    
      7
      Arab World
      ARB
      CO2 emissions (kt)
      EN.ATM.CO2E.KT
      1960
      5.956399e+04
    
    
      8
      Arab World
      ARB
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1960
      6.439635e-01
    
    
      9
      Arab World
      ARB
      CO2 emissions from gaseous fuel consumption (%...
      EN.ATM.CO2E.GF.ZS
      1960
      5.041292e+00



In [5]:

    
len(data['CountryName'].unique().tolist())









    Out[5]:





247



In [6]:

    
len(data['CountryCode'].unique().tolist())









    Out[6]:





247



In [7]:

    
len(data['IndicatorCode'].unique().tolist())









    Out[7]:





1344



In [8]:

    
len(data['Year'].unique().tolist())









    Out[8]:





56



In [9]:

    
print('Range of Years:', min(data['Year']),'to',max(data['Year']))









    



Range of Years: 1960 to 2015



In [10]:

    
hist_country = 'USA'
mask1 = data['CountryCode'].str.contains(hist_country)

hist_indicator = 'CO2 emissions \(metric'
mask2 = data['IndicatorName'].str.contains(hist_indicator)

stage = data[mask1 & mask2]



In [11]:

    
stage.head()









    Out[11]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      22232
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1960
      15.999779
    
    
      48708
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1961
      15.681256
    
    
      77087
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1962
      16.013937
    
    
      105704
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1963
      16.482762
    
    
      134742
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1964
      16.968119



In [12]:

    
years = stage['Year'].values
co2 = stage['Value'].values



In [13]:

    
plt.bar(years, co2)
plt.show()



In [14]:

    
plt.plot(stage['Year'].values, stage['Value'].values)
plt.xlabel('Year')
plt.ylabel(stage['IndicatorName'].iloc[0])
plt.axis([1959, 2011, 0, 25])
plt.title('CO2 Emissions in USA')
plt.show()



In [15]:

    
hist_data = stage['Value'].values



In [16]:

    
len(hist_data)









    Out[16]:





52



In [17]:

    
plt.hist(hist_data, 10, normed = False, facecolor='green')
plt.xlabel(stage['IndicatorName'].iloc[0])
plt.ylabel('# of Years')
plt.title('Histogram Example')
plt.grid(True)
plt.show()



In [18]:

    
hist_indicator = 'CO2 emissions \(metric'
hist_year = 2011

mask1 = data['IndicatorName'].str.contains(hist_indicator)
mask2 = data['Year'].isin([hist_year])

co2_2011 = data[mask1 & mask2]
co2_2011.head()









    Out[18]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      5026275
      Arab World
      ARB
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      4.724500
    
    
      5026788
      Caribbean small states
      CSS
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      9.692960
    
    
      5027295
      Central Europe and the Baltics
      CEB
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      6.911131
    
    
      5027870
      East Asia & Pacific (all income levels)
      EAS
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      5.859548
    
    
      5028456
      East Asia & Pacific (developing only)
      EAP
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      5.302499



In [19]:

    
len(co2_2011)









    Out[19]:





232



In [20]:

    
fig, ax = plt.subplots()

ax.annotate("USA", xy=(18,5), xycoords='data',
            xytext = (18,30), textcoords = 'data',
            arrowprops = dict(arrowstyle='->',
            connectionstyle='arc3'),)

plt.hist(co2_2011['Value'], 10, normed=False, facecolor='green')
plt.ylabel('# of Countries')
plt.title('Histogram of CO2 Emissions Per Capita')
plt.grid(True)

plt.show()



In [21]:

    
hist_indicator = 'GDP per capita \(constant 2005'
hist_country = 'USA'

mask1 = data['IndicatorName'].str.contains(hist_indicator)
mask2 = data['CountryCode'].str.contains(hist_country)

gdp_stage = data[mask1 & mask2]



In [22]:

    
gdp_stage.head(n = 5)









    Out[22]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      22282
      United States
      USA
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1960
      15482.707760
    
    
      48759
      United States
      USA
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1961
      15578.409657
    
    
      77142
      United States
      USA
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1962
      16276.426685
    
    
      105760
      United States
      USA
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1963
      16749.789436
    
    
      134798
      United States
      USA
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1964
      17476.822248



In [23]:

    
stage.head(n = 5)









    Out[23]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      22232
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1960
      15.999779
    
    
      48708
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1961
      15.681256
    
    
      77087
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1962
      16.013937
    
    
      105704
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1963
      16.482762
    
    
      134742
      United States
      USA
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1964
      16.968119



In [24]:

    
plt.plot(gdp_stage['Year'].values, gdp_stage['Value'].values)

plt.xlabel('Year')
plt.ylabel(gdp_stage['IndicatorName'].iloc[0])

plt.title('GDP Per Capita USA')

plt.show()



In [25]:

    
print('GDP Min Year',min(gdp_stage['Year']), "Max: ", max(gdp_stage['Year']))
print('CO2 Min Year',min(stage['Year']), "Max: ", max(stage['Year']))









    



GDP Min Year 1960 Max:  2014
CO2 Min Year 1960 Max:  2011



In [26]:

    
gdp_stage_trunc = gdp_stage[gdp_stage['Year'] < 2012]
print(len(gdp_stage_trunc))
print(len(stage))



In [27]:

    
%matplotlib inline
import matplotlib.pyplot as plt

fig, axis = plt.subplots()

axis.yaxis.grid(True)
axis.set_title('CO2 Emissions vs GDP \(per capita\)', fontsize=10)
axis.set_xlabel(gdp_stage_trunc['IndicatorName'].iloc[0], fontsize=10)
axis.set_ylabel(stage['IndicatorName'].iloc[0], fontsize=10)

X = gdp_stage_trunc['Value']
Y = stage['Value']

axis.scatter(X,Y)

plt.show()



In [28]:

    
np.corrcoef(gdp_stage_trunc['Value'], stage['Value'])









    Out[28]:





array([[ 1.        ,  0.07676005],
       [ 0.07676005,  1.        ]])

Correlation analysis for different countries



In [29]:

    
data.head(n=10)









    Out[29]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      0
      Arab World
      ARB
      Adolescent fertility rate (births per 1,000 wo...
      SP.ADO.TFRT
      1960
      1.335609e+02
    
    
      1
      Arab World
      ARB
      Age dependency ratio (% of working-age populat...
      SP.POP.DPND
      1960
      8.779760e+01
    
    
      2
      Arab World
      ARB
      Age dependency ratio, old (% of working-age po...
      SP.POP.DPND.OL
      1960
      6.634579e+00
    
    
      3
      Arab World
      ARB
      Age dependency ratio, young (% of working-age ...
      SP.POP.DPND.YG
      1960
      8.102333e+01
    
    
      4
      Arab World
      ARB
      Arms exports (SIPRI trend indicator values)
      MS.MIL.XPRT.KD
      1960
      3.000000e+06
    
    
      5
      Arab World
      ARB
      Arms imports (SIPRI trend indicator values)
      MS.MIL.MPRT.KD
      1960
      5.380000e+08
    
    
      6
      Arab World
      ARB
      Birth rate, crude (per 1,000 people)
      SP.DYN.CBRT.IN
      1960
      4.769789e+01
    
    
      7
      Arab World
      ARB
      CO2 emissions (kt)
      EN.ATM.CO2E.KT
      1960
      5.956399e+04
    
    
      8
      Arab World
      ARB
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1960
      6.439635e-01
    
    
      9
      Arab World
      ARB
      CO2 emissions from gaseous fuel consumption (%...
      EN.ATM.CO2E.GF.ZS
      1960
      5.041292e+00



In [30]:

    
hist_indicator = 'CO2 emissions \(metric'
hist_country = 'IND'

mask1 = data['IndicatorName'].str.contains(hist_indicator)
mask2 = data['CountryCode'].str.contains(hist_country)

ind_data = data[mask1 & mask2]



In [31]:

    
ind_data.head()









    Out[31]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      11577
      India
      IND
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1960
      0.268161
    
    
      36513
      India
      IND
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1961
      0.284292
    
    
      64049
      India
      IND
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1962
      0.306519
    
    
      92493
      India
      IND
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1963
      0.322533
    
    
      121290
      India
      IND
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      1964
      0.308900



In [32]:

    
plt.hist(ind_data['Value'], 10, normed=False, facecolor='green')

plt.xlabel(ind_data['IndicatorName'].iloc[0])
plt.ylabel('# of Years')

plt.title('Histogram of CO2 Emissions')

plt.grid()
plt.show()



In [33]:

    
hist_indicator = 'CO2 emissions \(metric'
hist_year = 2011

mask1 = data['IndicatorName'].str.contains(hist_indicator)
mask2 = data['Year'].isin([hist_year])

co2_2011_dev = data[mask1 & mask2]



In [34]:

    
co2_2011_dev.head()









    Out[34]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      5026275
      Arab World
      ARB
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      4.724500
    
    
      5026788
      Caribbean small states
      CSS
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      9.692960
    
    
      5027295
      Central Europe and the Baltics
      CEB
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      6.911131
    
    
      5027870
      East Asia & Pacific (all income levels)
      EAS
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      5.859548
    
    
      5028456
      East Asia & Pacific (developing only)
      EAP
      CO2 emissions (metric tons per capita)
      EN.ATM.CO2E.PC
      2011
      5.302499



In [35]:

    
plt.hist(co2_2011_dev['Value'], 10, normed=False, facecolor='green')

plt.xlabel(co2_2011_dev['IndicatorName'].iloc[0])
plt.ylabel('# of Countries')
plt.title('Histogram of CO2 Emissions Per Capita')

plt.annotate("IND", 
             xy=(0.3, 135), xycoords='data',
            xytext=(0.3,160), textcoords='data',
            arrowprops=dict(arrowstyle='->',
                           connectionstyle='arc3'),)

plt.grid(True)
plt.show()



In [36]:

    
hist_indicator = 'GDP per capita \(constant 2005'
hist_country = 'IND'

mask1 = data['IndicatorName'].str.contains(hist_indicator)
mask2 = data['CountryCode'].str.contains(hist_country)

gdp_dev_stage = data[mask1 & mask2]



In [37]:

    
gdp_dev_stage.head()









    Out[37]:







  
    
      
      CountryName
      CountryCode
      IndicatorName
      IndicatorCode
      Year
      Value
    
  
  
    
      11616
      India
      IND
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1960
      228.304470
    
    
      36555
      India
      IND
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1961
      232.142053
    
    
      64095
      India
      IND
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1962
      234.166685
    
    
      92540
      India
      IND
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1963
      243.176418
    
    
      121337
      India
      IND
      GDP per capita (constant 2005 US$)
      NY.GDP.PCAP.KD
      1964
      255.963668



In [38]:

    
plt.plot(gdp_dev_stage['Year'], gdp_dev_stage['Value'])
plt.xlabel('Year')
plt.ylabel(gdp_dev_stage['IndicatorName'].iloc[0])
plt.title('GDP Per Capita IND')

plt.show()



In [39]:

    
print('GDP Min Year = ', min(gdp_dev_stage['Year']), 'Max: ', gdp_dev_stage['Year'].max())
print('GDP Min Year = ', min(ind_data['Year']), 'Max: ', ind_data['Year'].max())









    



GDP Min Year =  1960 Max:  2014
GDP Min Year =  1960 Max:  2011



In [40]:

    
gdp_dev_stage_trunc = gdp_dev_stage[gdp_dev_stage['Year'] < 2012]



In [41]:

    
print(len(gdp_dev_stage_trunc))
print(len(ind_data))



In [46]:

    
%matplotlib inline
import matplotlib.pyplot as plt

fig, axis = plt.subplots()

axis.yaxis.grid(True)
axis.set_xlabel(gdp_dev_stage_trunc['IndicatorName'].iloc[0], fontsize=12)
axis.set_ylabel(ind_data['IndicatorName'].iloc[0], fontsize=12)
axis.set_title('CO2 Emissions vs GDP (per capita)', fontsize = 12)

X = gdp_dev_stage_trunc['Value']
Y = ind_data['Value']

axis.scatter(X, Y)
plt.show()



In [43]:

    
np.corrcoef(gdp_dev_stage_trunc['Value'], ind_data['Value'])









    Out[43]:





array([[ 1.        ,  0.96753758],
       [ 0.96753758,  1.        ]])

	CountryName	CountryCode	IndicatorName	IndicatorCode	Year	Value
0	Arab World	ARB	Adolescent fertility rate (births per 1,000 wo...	SP.ADO.TFRT	1960	1.335609e+02
1	Arab World	ARB	Age dependency ratio (% of working-age populat...	SP.POP.DPND	1960	8.779760e+01
2	Arab World	ARB	Age dependency ratio, old (% of working-age po...	SP.POP.DPND.OL	1960	6.634579e+00
3	Arab World	ARB	Age dependency ratio, young (% of working-age ...	SP.POP.DPND.YG	1960	8.102333e+01
4	Arab World	ARB	Arms exports (SIPRI trend indicator values)	MS.MIL.XPRT.KD	1960	3.000000e+06
5	Arab World	ARB	Arms imports (SIPRI trend indicator values)	MS.MIL.MPRT.KD	1960	5.380000e+08
6	Arab World	ARB	Birth rate, crude (per 1,000 people)	SP.DYN.CBRT.IN	1960	4.769789e+01
7	Arab World	ARB	CO2 emissions (kt)	EN.ATM.CO2E.KT	1960	5.956399e+04
8	Arab World	ARB	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1960	6.439635e-01
9	Arab World	ARB	CO2 emissions from gaseous fuel consumption (%...	EN.ATM.CO2E.GF.ZS	1960	5.041292e+00

	CountryName	CountryCode	IndicatorName	IndicatorCode	Year	Value
22232	United States	USA	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1960	15.999779
48708	United States	USA	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1961	15.681256
77087	United States	USA	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1962	16.013937
105704	United States	USA	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1963	16.482762
134742	United States	USA	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1964	16.968119

	CountryName	CountryCode	IndicatorName	IndicatorCode	Year	Value
5026275	Arab World	ARB	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	2011	4.724500
5026788	Caribbean small states	CSS	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	2011	9.692960
5027295	Central Europe and the Baltics	CEB	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	2011	6.911131
5027870	East Asia & Pacific (all income levels)	EAS	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	2011	5.859548
5028456	East Asia & Pacific (developing only)	EAP	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	2011	5.302499

	CountryName	CountryCode	IndicatorName	IndicatorCode	Year	Value
22282	United States	USA	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1960	15482.707760
48759	United States	USA	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1961	15578.409657
77142	United States	USA	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1962	16276.426685
105760	United States	USA	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1963	16749.789436
134798	United States	USA	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1964	17476.822248

	CountryName	CountryCode	IndicatorName	IndicatorCode	Year	Value
11577	India	IND	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1960	0.268161
36513	India	IND	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1961	0.284292
64049	India	IND	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1962	0.306519
92493	India	IND	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1963	0.322533
121290	India	IND	CO2 emissions (metric tons per capita)	EN.ATM.CO2E.PC	1964	0.308900

	CountryName	CountryCode	IndicatorName	IndicatorCode	Year	Value
11616	India	IND	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1960	228.304470
36555	India	IND	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1961	232.142053
64095	India	IND	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1962	234.166685
92540	India	IND	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1963	243.176418
121337	India	IND	GDP per capita (constant 2005 US$)	NY.GDP.PCAP.KD	1964	255.963668