In [79]:
import pandas as pd
import numpy as np

In [210]:
#'data/geo/pickles/cities_light_frac.pickle'
df = pd.read_pickle('data/geo/pickles/cities_light_frac.pickle')
df.rename(columns={'ISO_A3':'code'}, inplace=True)
df.set_index('code', inplace=True)
df.head(3)


Out[210]:
city F101992 F101993 F101994 F121994 F121995 F121996 F121997 F121998 F121999 ... F162004 F162005 F162006 F162007 F162008 F162009 F182010 F182011 F182012 F182013
code
AUS Adelaide 0.040712 0.038999 0.038227 0.037661 0.033461 0.034791 0.033779 0.033385 0.035139 ... 0.032392 0.035174 0.033167 0.031126 0.032087 0.032131 0.025771 0.029548 0.026893 0.028003
USA Mesa 0.004626 0.004192 0.004260 0.004590 0.004301 0.004289 0.004533 0.004601 0.004417 ... 0.005367 0.005948 0.005952 0.005300 0.005392 0.005644 0.004456 0.005139 0.005326 0.005178
ARE Sharjah 0.745420 0.732680 0.713109 0.745305 0.710744 0.692191 0.687522 0.669494 0.679610 ... 0.690925 0.718230 0.707320 0.695539 0.704375 0.722402 0.646997 0.649524 0.649606 0.651584

3 rows × 35 columns


In [172]:
# Load, clean, and normalize wb data
wb = pd.read_csv('data/econ/wb.csv')
label = 'GDP, PPP (constant 2011 international $)'
wb = wb[wb['Series Name'] == label]
wb.drop(['Country Name', 'Series Name', 'Series Code', '2014', '2015'], axis=1, inplace=True)
wb.rename(columns={'Country Code': 'code'}, inplace=True)
wb.set_index('code', inplace=True)
wb.dropna(axis=0, inplace=True); wb.head(3)


Out[172]:
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 ... 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
code
ALB 9.241349e+09 1.012852e+10 1.096919e+10 1.242809e+10 1.355904e+10 1.208953e+10 1.317866e+10 1.495794e+10 1.595513e+10 1.722202e+10 ... 2.007121e+10 2.121945e+10 2.237188e+10 2.369182e+10 2.547581e+10 2.632925e+10 2.730607e+10 2.800237e+10 2.840000e+10 2.871524e+10
DZA 2.681550e+11 2.625240e+11 2.601610e+11 2.700470e+11 2.811190e+11 2.842110e+11 2.987060e+11 3.082650e+11 3.150460e+11 3.295780e+11 ... 3.891360e+11 4.120950e+11 4.191010e+11 4.333500e+11 4.420170e+11 4.490890e+11 4.652570e+11 4.782840e+11 4.940680e+11 5.079020e+11
ATG 1.092734e+09 1.151634e+09 1.224505e+09 1.173359e+09 1.252439e+09 1.313631e+09 1.371531e+09 1.428060e+09 1.500060e+09 1.452144e+09 ... 1.666678e+09 1.768066e+09 2.004569e+09 2.194982e+09 2.196543e+09 1.932167e+09 1.794152e+09 1.761975e+09 1.832799e+09 1.826453e+09

3 rows × 22 columns


In [264]:
# join lights dataframe to wb dataframe
df_join = df.join(wb); df_join.head(3)
df_join.dropna(axis=0, inplace=True)
df_join.reset_index(inplace=True)

# split dataframes
df_lights = df_join.ix[:,2:36]
df_lights.columns = [x[3:] for x in df_lights.columns.values]
df_lights = df_lights.transpose()
df_lights = df_lights.groupby(df_lights.index).first()
df_lights = df_lights.transpose()
df_wb = df_join.ix[:,36:].sort_index()

# multiply dataframes
df_city_gdps = pd.DataFrame(df_lights.values*df_wb.values, columns=df_lights.columns)
df_code_city = df_join.ix[:,:2]
df_city_gdps = df_code_city.join(df_city_gdps)
df_city_gdps.set_index('city', inplace=True)
#df_city_gdps.loc['Luanda']

In [146]:
lights_np = lights.as_matrix()
gdp_1992_np = gdp_1992.as_matrix()
stats_np = stats.as_matrix()
beta_np = np.reshape(stats_np[:,0], (stats_np.shape[0],1))
intercept_np = np.reshape(stats_np[:,1], (stats_np.shape[0],1))
# np.reshape(

In [149]:
lights_np


Out[149]:
array([[ 0.04071183,  0.04325927,  0.04193492, ...,  0.04596263,
         0.04703926,  0.04707436],
       [ 0.08446857,  0.08565994,  0.08483239, ...,  0.11110264,
         0.11476315,  0.11280903],
       [ 0.06411873,  0.06609107,  0.06402153, ...,  0.08946085,
         0.09193809,  0.09196374],
       ..., 
       [ 0.10897563,  0.11443353,  0.12276331, ...,  0.15414695,
         0.17665531,  0.16203917],
       [ 0.37395513,  0.40177739,  0.39183458, ...,  0.56355477,
         0.61078751,  0.65828421],
       [ 0.77056735,  0.69304461,  0.7293547 , ...,  0.8820615 ,
         0.78520572,  0.80322217]])

In [155]:
df = pd.DataFrame((lights_np * beta_np + intercept_np) * gdp_1992_np, columns=lights_years)
df.shape


Out[155]:
(452, 34)

In [ ]:
lights_np * stats_np

In [115]:
for idx, i in lights.head(10).iterrows():
    print idx, i
    print


0 1992    0.040712
1993    0.043259
1994    0.041935
1994    0.042028
1995    0.043854
1996    0.044374
1997    0.043695
1998    0.045311
1999    0.045818
1997    0.040493
1998    0.042393
1999    0.041959
2000    0.041510
2001    0.041687
2002    0.042320
2003    0.042345
2000    0.044700
2001    0.042688
2002    0.044373
2003    0.038819
2004    0.039902
2005    0.040474
2006    0.039873
2007    0.039024
2004    0.042178
2005    0.039266
2006    0.041544
2007    0.042365
2008    0.043124
2009    0.043410
2010    0.046229
2011    0.045963
2012    0.047039
2013    0.047074
Name: 0, dtype: float64
1 1992    0.084469
1993    0.085660
1994    0.084832
1994    0.098226
1995    0.094979
1996    0.096808
1997    0.099221
1998    0.101455
1999    0.101246
1997    0.088436
1998    0.090062
1999    0.090697
2000    0.093763
2001    0.096711
2002    0.098950
2003    0.098417
2000    0.100010
2001    0.098563
2002    0.101961
2003    0.086165
2004    0.090672
2005    0.091150
2006    0.094357
2007    0.092065
2004    0.100202
2005    0.089895
2006    0.099447
2007    0.103671
2008    0.106120
2009    0.107497
2010    0.115798
2011    0.111103
2012    0.114763
2013    0.112809
Name: 1, dtype: float64
2 1992    0.064119
1993    0.066091
1994    0.064022
1994    0.071494
1995    0.070122
1996    0.071937
1997    0.072184
1998    0.074502
1999    0.077142
1997    0.063524
1998    0.067270
1999    0.068813
2000    0.070790
2001    0.071618
2002    0.073212
2003    0.074103
2000    0.076346
2001    0.075792
2002    0.079549
2003    0.069846
2004    0.070539
2005    0.071969
2006    0.073974
2007    0.068623
2004    0.075629
2005    0.069868
2006    0.076431
2007    0.081374
2008    0.080118
2009    0.081378
2010    0.091827
2011    0.089461
2012    0.091938
2013    0.091964
Name: 2, dtype: float64
3 1992    0.109045
1993    0.109643
1994    0.108800
1994    0.114542
1995    0.115425
1996    0.115182
1997    0.117026
1998    0.121824
1999    0.124807
1997    0.106841
1998    0.111201
1999    0.113844
2000    0.111529
2001    0.115572
2002    0.119037
2003    0.121578
2000    0.119081
2001    0.120104
2002    0.122334
2003    0.111005
2004    0.112002
2005    0.115410
2006    0.115076
2007    0.111508
2004    0.119090
2005    0.114561
2006    0.120989
2007    0.127388
2008    0.127682
2009    0.129103
2010    0.138927
2011    0.133095
2012    0.138358
2013    0.139307
Name: 3, dtype: float64
4 1992    0.106770
1993    0.104045
1994    0.102461
1994    0.112204
1995    0.107487
1996    0.108180
1997    0.109550
1998    0.110458
1999    0.113464
1997    0.103836
1998    0.101809
1999    0.105922
2000    0.103838
2001    0.108047
2002    0.110756
2003    0.111272
2000    0.109317
2001    0.108704
2002    0.107243
2003    0.098295
2004    0.100270
2005    0.102788
2006    0.104027
2007    0.102118
2004    0.108792
2005    0.104779
2006    0.109242
2007    0.108618
2008    0.111215
2009    0.111624
2010    0.118657
2011    0.111315
2012    0.116590
2013    0.116264
Name: 4, dtype: float64
5 1992    0.004626
1993    0.004430
1994    0.004471
1994    0.004689
1995    0.004915
1996    0.004930
1997    0.005163
1998    0.005220
1999    0.005412
1997    0.004813
1998    0.004848
1999    0.005015
2000    0.005197
2001    0.005261
2002    0.005456
2003    0.005535
2000    0.005449
2001    0.005457
2002    0.005627
2003    0.005362
2004    0.005479
2005    0.005652
2006    0.005857
2007    0.005712
2004    0.005712
2005    0.005637
2006    0.005975
2007    0.006199
2008    0.006200
2009    0.006165
2010    0.006452
2011    0.006307
2012    0.006394
2013    0.006423
Name: 5, dtype: float64
6 1992    0.013443
1993    0.013001
1994    0.013058
1994    0.013323
1995    0.013407
1996    0.013194
1997    0.013464
1998    0.013592
1999    0.013691
1997    0.012841
1998    0.013058
1999    0.012999
2000    0.013281
2001    0.013132
2002    0.013267
2003    0.013296
2000    0.013557
2001    0.013508
2002    0.013605
2003    0.012881
2004    0.012909
2005    0.012915
2006    0.013331
2007    0.013010
2004    0.013359
2005    0.012912
2006    0.013529
2007    0.013845
2008    0.013791
2009    0.013473
2010    0.014005
2011    0.013701
2012    0.014080
2013    0.014070
Name: 6, dtype: float64
7 1992    0.006323
1993    0.006264
1994    0.006308
1994    0.006562
1995    0.006676
1996    0.006666
1997    0.006740
1998    0.006944
1999    0.006812
1997    0.006466
1998    0.006373
1999    0.006566
2000    0.006679
2001    0.006695
2002    0.006651
2003    0.006727
2000    0.006781
2001    0.006720
2002    0.006739
2003    0.006374
2004    0.006498
2005    0.006318
2006    0.006648
2007    0.006469
2004    0.006689
2005    0.006454
2006    0.006790
2007    0.006914
2008    0.006847
2009    0.006768
2010    0.007243
2011    0.006934
2012    0.007093
2013    0.007035
Name: 7, dtype: float64
8 1992    0.013443
1993    0.013001
1994    0.013058
1994    0.013323
1995    0.013407
1996    0.013194
1997    0.013464
1998    0.013592
1999    0.013691
1997    0.012841
1998    0.013058
1999    0.012999
2000    0.013281
2001    0.013132
2002    0.013267
2003    0.013296
2000    0.013557
2001    0.013508
2002    0.013605
2003    0.012881
2004    0.012909
2005    0.012915
2006    0.013331
2007    0.013010
2004    0.013359
2005    0.012912
2006    0.013529
2007    0.013845
2008    0.013791
2009    0.013473
2010    0.014005
2011    0.013701
2012    0.014080
2013    0.014070
Name: 8, dtype: float64
9 1992    0.016597
1993    0.017412
1994    0.017653
1994    0.015125
1995    0.016275
1996    0.016647
1997    0.016870
1998    0.016974
1999    0.017238
1997    0.015266
1998    0.015349
1999    0.015175
2000    0.016283
2001    0.016965
2002    0.016195
2003    0.016373
2000    0.015800
2001    0.016024
2002    0.016139
2003    0.014890
2004    0.014404
2005    0.014660
2006    0.014303
2007    0.014142
2004    0.015476
2005    0.014385
2006    0.015040
2007    0.015783
2008    0.015026
2009    0.016317
2010    0.017617
2011    0.016871
2012    0.015474
2013    0.015973
Name: 9, dtype: float64

In [ ]: