notebook.community

Edit and run



In [151]:

    
import geopandas as gpd
import pandas as pd
import pickle
import numpy as np



In [228]:

    
# load city lights dataframe
with open('data/geo/pickles/zonal_stats_m.pickle') as f:
    df_cities_geo = pickle.load(f)
df_cities_geo.rename(columns={'adm0_a3':'ISO_A3'}, inplace=True)
df_cities = pd.DataFrame(df_cities_geo)

# load country lights dataframe
with open('data/geo/pickles/zonal_stats_c.pickle') as f:
    df_countries_geo = pickle.load(f)
df_countries_geo.reset_index(inplace=True)
df_countries = pd.DataFrame(df_countries_geo)

# merge dataframes
df_merge = pd.merge(df_cities, df_countries, how='left', on='ISO_A3', suffixes=('m', 'c')); df_merge.head(3)









    Out[228]:






  
    
      
      ISO_A3
      cartodb_id
      geometrym
      nameascii
      pop_max
      F101992m
      F101993m
      F101994m
      F121994m
      F121995m
      ...
      F162004c
      F162005c
      F162006c
      F162007c
      F162008c
      F162009c
      F182010c
      F182011c
      F182012c
      F182013c
    
  
  
    
      0
      AUS
      1198
      POLYGON ((138.513149 -34.991254, 138.504815 -3...
      Adelaide
      1145000
      60314.0
      64088.0
      62126.0
      62264.0
      64969.0
      ...
      1929061.0
      1653849.0
      1855688.0
      2016436.0
      1991068.0
      2001535.0
      2657549.0
      2304520.0
      2591313.0
      2490423.0
    
    
      1
      USA
      37873
      POLYGON ((-111.785275 33.163921, -111.785275 3...
      Mesa
      1085394
      267889.0
      256514.0
      258932.0
      271544.0
      284599.0
      ...
      61624213.0
      54883977.0
      58134101.0
      67727412.0
      66596415.0
      63256383.0
      83859358.0
      71072373.0
      69517416.0
      71832002.0
    
    
      2
      ARE
      131
      POLYGON ((55.100937 24.147646, 55.100937 24.13...
      Sharjah
      1103027
      346648.0
      351177.0
      339926.0
      374813.0
      382730.0
      ...
      782052.0
      708632.0
      798291.0
      881028.0
      877970.0
      857934.0
      1109724.0
      1076830.0
      1146627.0
      1165915.0
    
  

3 rows × 81 columns



In [260]:

    
# compute fraction of light for each city within its country
df_cities_years = df_merge.ix[:, 5:39]
np_cities_years = df_cities_years.as_matrix()
df_countries_years = df_merge.ix[:, 47:]
np_countries_years = df_countries_years.as_matrix()
np_cities_light_fractions = np_cities_years / np_countries_years



In [275]:

    
# join with city name and country code
df_cities_years_cols = [x[:7] for x in df_cities_years.columns.values]
df_cities_truncated = df_cities.ix[:, :5]
df_cities_light_fractions = pd.DataFrame(np_cities_light_fractions, columns=df_cities_years_cols)
df_cities_light_fractions = df_cities_truncated.join(df_cities_light_fractions)
df_cities_light_fractions.drop(['cartodb_id', 'geometry', 'pop_max'], inplace=True, axis=1)
df_cities_light_fractions.rename(columns={'nameascii': 'city'}, inplace=True)
df_cities_light_fractions.head(3)









    Out[275]:






  
    
      
      ISO_A3
      city
      F101992
      F101993
      F101994
      F121994
      F121995
      F121996
      F121997
      F121998
      ...
      F162004
      F162005
      F162006
      F162007
      F162008
      F162009
      F182010
      F182011
      F182012
      F182013
    
  
  
    
      0
      AUS
      Adelaide
      0.040712
      0.038999
      0.038227
      0.037661
      0.033461
      0.034791
      0.033779
      0.033385
      ...
      0.032392
      0.035174
      0.033167
      0.031126
      0.032087
      0.032131
      0.025771
      0.029548
      0.026893
      0.028003
    
    
      1
      USA
      Mesa
      0.004626
      0.004192
      0.004260
      0.004590
      0.004301
      0.004289
      0.004533
      0.004601
      ...
      0.005367
      0.005948
      0.005952
      0.005300
      0.005392
      0.005644
      0.004456
      0.005139
      0.005326
      0.005178
    
    
      2
      ARE
      Sharjah
      0.745420
      0.732680
      0.713109
      0.745305
      0.710744
      0.692191
      0.687522
      0.669494
      ...
      0.690925
      0.718230
      0.707320
      0.695539
      0.704375
      0.722402
      0.646997
      0.649524
      0.649606
      0.651584
    
  

3 rows × 36 columns



In [276]:

    
# pickle dataframe
df_cities_light_fractions.to_pickle('data/geo/pickles/cities_light_frac.pickle')



In [ ]:

    
# load 1992 dataframe and rename 1992 light column
#with open('data/geo/pickles/zonal_stats_c92.pickle') as f:
    #df_1992 = pickle.load(f)
#df_1992.rename(columns={'F101992':'c_F101992'}, inplace=True)

# drop dups and rename cols
#df_input = pd.DataFrame(df_input)
#df_input = df_input.drop_duplicates(subset='nameascii')
#df_input.rename(columns={'adm0_a3':'ISO_A3', 'nameascii':'city'}, inplace=True)

# join dataset on ISO_A3 column
#df_join = pd.merge(df_input, df_1992, on='ISO_A3')

#df_join



In [ ]:



In [ ]:

    
df_normalizer = np.reshape((df_join.c_F101992).as_matrix(), (df_join.shape[0],1))
df_drop_cols = df_join.drop(['ISO_A3', 'cartodb_id', 'geometry', 'pop_max', 'city', 'WB_A3', 'c_F101992'], axis=1)
light_years = df_drop_cols.columns.values
df_np = df_drop_cols.as_matrix()
df_np_normed = np.divide(df_np, df_normalizer)
df_tojoin = df_join[['WB_A3', 'city', 'pop_max']]
df_normed = pd.DataFrame(df_np_normed, columns=light_years)
df_rejoined = df_tojoin.join(df_normed, how='inner')
df_rejoined.head(3)



In [176]:

    
# Load, clean, and normalize wb data
wb = pd.read_csv('data/econ/wb.csv')

# wb = wb[wb['Series Name'] == 'GDP at market prices (constant 2005 US$)']
# df = pd.merge(df_tojoin, df_normed, left_index=True, right_index=True); df
label = 'GDP, PPP (constant 2011 international $)'
wb = wb[wb['Series Name'] == label]
wb.drop(['Country Name', 'Series Name', 'Series Code', '2014', '2015'], axis=1, inplace=True)
wb.rename(columns={'Country Code': 'WB_A3'}, inplace=True)
wb.dropna(axis=0, inplace=True)
wb = wb.set_index('WB_A3')
wb_normalizer = (wb['1992']).as_matrix()
wb_normed = wb.divide(wb_normalizer, axis=0)
wb_normed = wb_normed.reset_index()
wb_1992 = wb.reset_index()[['WB_A3', '1992']]
wb_1992 = wb_1992.rename(columns={'1992': 'wb_1992'}); wb_1992.head(3)









    Out[176]:






  
    
      
      WB_A3
      wb_1992
    
  
  
    
      0
      ALB
      9.241349e+09
    
    
      1
      DZA
      2.681550e+11
    
    
      2
      ATG
      1.092734e+09



In [277]:

    
country_stats = pd.read_csv('data/country_stats.csv')
country_stats.rename(columns={'country': 'WB_A3'}, inplace=True)
beta_intercept = country_stats[['WB_A3','beta','intercept']]
beta_intercept.head(100)









    Out[277]:






  
    
      
      WB_A3
      beta
      intercept
    
  
  
    
      0
      CHN
      2.106011
      -1.055173
    
    
      1
      ETH
      0.762989
      0.156017
    
    
      2
      OMN
      0.260797
      0.905775
    
    
      3
      BTN
      0.485263
      0.695238
    
    
      4
      VNM
      0.359649
      0.769938
    
    
      5
      EGY
      0.875287
      0.086420
    
    
      6
      MLI
      0.308667
      0.715733
    
    
      7
      MAR
      0.516291
      0.583528
    
    
      8
      DJI
      0.311363
      0.573508
    
    
      9
      BFA
      0.585040
      0.450565
    
    
      10
      BWA
      0.499806
      0.497423
    
    
      11
      SAU
      0.734662
      0.327779
    
    
      12
      COG
      0.400989
      0.703327
    
    
      13
      MYS
      0.498236
      0.463380
    
    
      14
      BOL
      0.489625
      0.607063
    
    
      15
      SLE
      0.392889
      0.621196
    
    
      16
      SEN
      0.433899
      0.554424
    
    
      17
      ALB
      0.178532
      0.697791
    
    
      18
      ARE
      1.010802
      0.114130
    
    
      19
      MOZ
      0.724434
      0.933208
    
    
      20
      HND
      0.269812
      0.667162
    
    
      21
      LAO
      0.257198
      1.128858
    
    
      22
      PER
      0.686604
      0.288633
    
    
      23
      JOR
      0.936640
      0.190353
    
    
      24
      IRN
      0.669335
      0.291526
    
    
      25
      CHL
      0.542952
      0.499104
    
    
      26
      GTM
      0.317933
      0.712941
    
    
      27
      ECU
      0.370728
      0.591036
    
    
      28
      CPV
      0.895292
      0.256561
    
    
      29
      LSO
      0.423998
      0.669042
    
    
      ...
      ...
      ...
      ...
    
    
      70
      GUY
      0.368851
      0.773972
    
    
      71
      NIC
      0.397468
      0.674422
    
    
      72
      BDI
      0.171867
      0.640075
    
    
      73
      NAM
      0.741438
      0.322613
    
    
      74
      SLV
      0.385471
      0.722142
    
    
      75
      KOR
      1.404341
      -0.388770
    
    
      76
      GRD
      0.203077
      0.905171
    
    
      77
      LCA
      0.291475
      0.791884
    
    
      78
      IRL
      1.108894
      0.394137
    
    
      79
      MWI
      0.645996
      0.573877
    
    
      80
      POL
      0.394865
      0.892755
    
    
      81
      CRI
      0.505775
      0.550680
    
    
      82
      ZAF
      0.807949
      0.304618
    
    
      83
      KIR
      0.037727
      1.134288
    
    
      84
      TZA
      0.716174
      0.312445
    
    
      85
      ESP
      0.613172
      0.490604
    
    
      86
      GNQ
      0.345546
      16.965988
    
    
      87
      CMR
      0.522463
      0.637246
    
    
      88
      ZWE
      0.596383
      0.343050
    
    
      89
      NGA
      1.484300
      0.086446
    
    
      90
      PHL
      0.504959
      0.484162
    
    
      91
      SLB
      0.109842
      0.921580
    
    
      92
      MEX
      0.485721
      0.613901
    
    
      93
      SYC
      0.688008
      0.572607
    
    
      94
      IRQ
      1.922867
      0.855745
    
    
      95
      BRB
      0.574109
      0.566662
    
    
      96
      CUB
      0.257764
      0.626746
    
    
      97
      CHE
      0.332418
      0.737247
    
    
      98
      VEN
      0.549649
      0.432586
    
    
      99
      UGA
      0.721246
      0.568564
    
  

100 rows × 3 columns



In [179]:

    
# join lights and country stats dataframes
df_plus_1992 = df_rejoined.merge(wb_1992, on='WB_A3', how='inner')
df = df_plus_1992.merge(beta_intercept, on='WB_A3', how='inner')
# pickle joined dataframe
df.to_pickle('data/cleaned_df_cities.pickle'); df.head(3)









    Out[179]:






  
    
      
      WB_A3
      city
      pop_max
      F101992
      F101993
      F101994
      F121994
      F121995
      F121996
      F121997
      ...
      F162007
      F162008
      F162009
      F182010
      F182011
      F182012
      F182013
      wb_1992
      beta
      intercept
    
  
  
    
      0
      AUS
      Adelaide
      1145000
      0.040712
      0.043259
      0.041935
      0.042028
      0.043854
      0.044374
      0.043695
      ...
      0.042365
      0.043124
      0.043410
      0.046229
      0.045963
      0.047039
      0.047074
      4.876710e+11
      0.891693
      0.344698
    
    
      1
      AUS
      Brisbane
      1860000
      0.084469
      0.085660
      0.084832
      0.098226
      0.094979
      0.096808
      0.099221
      ...
      0.103671
      0.106120
      0.107497
      0.115798
      0.111103
      0.114763
      0.112809
      4.876710e+11
      0.891693
      0.344698
    
    
      2
      AUS
      Perth
      1532000
      0.064119
      0.066091
      0.064022
      0.071494
      0.070122
      0.071937
      0.072184
      ...
      0.081374
      0.080118
      0.081378
      0.091827
      0.089461
      0.091938
      0.091964
      4.876710e+11
      0.891693
      0.344698
    
  

3 rows × 40 columns



In [ ]:



In [181]:

    
df.to_csv('test.csv', encoding='latin-1')



In [ ]:

	ISO_A3	cartodb_id	geometrym	nameascii	pop_max	F101992m	F101993m	F101994m	F121994m	F121995m	...	F162004c	F162005c	F162006c	F162007c	F162008c	F162009c	F182010c	F182011c	F182012c	F182013c
0	AUS	1198	POLYGON ((138.513149 -34.991254, 138.504815 -3...	Adelaide	1145000	60314.0	64088.0	62126.0	62264.0	64969.0	...	1929061.0	1653849.0	1855688.0	2016436.0	1991068.0	2001535.0	2657549.0	2304520.0	2591313.0	2490423.0
1	USA	37873	POLYGON ((-111.785275 33.163921, -111.785275 3...	Mesa	1085394	267889.0	256514.0	258932.0	271544.0	284599.0	...	61624213.0	54883977.0	58134101.0	67727412.0	66596415.0	63256383.0	83859358.0	71072373.0	69517416.0	71832002.0
2	ARE	131	POLYGON ((55.100937 24.147646, 55.100937 24.13...	Sharjah	1103027	346648.0	351177.0	339926.0	374813.0	382730.0	...	782052.0	708632.0	798291.0	881028.0	877970.0	857934.0	1109724.0	1076830.0	1146627.0	1165915.0

	ISO_A3	city	F101992	F101993	F101994	F121994	F121995	F121996	F121997	F121998	...	F162004	F162005	F162006	F162007	F162008	F162009	F182010	F182011	F182012	F182013
0	AUS	Adelaide	0.040712	0.038999	0.038227	0.037661	0.033461	0.034791	0.033779	0.033385	...	0.032392	0.035174	0.033167	0.031126	0.032087	0.032131	0.025771	0.029548	0.026893	0.028003
1	USA	Mesa	0.004626	0.004192	0.004260	0.004590	0.004301	0.004289	0.004533	0.004601	...	0.005367	0.005948	0.005952	0.005300	0.005392	0.005644	0.004456	0.005139	0.005326	0.005178
2	ARE	Sharjah	0.745420	0.732680	0.713109	0.745305	0.710744	0.692191	0.687522	0.669494	...	0.690925	0.718230	0.707320	0.695539	0.704375	0.722402	0.646997	0.649524	0.649606	0.651584

	WB_A3	beta	intercept
0	CHN	2.106011	-1.055173
1	ETH	0.762989	0.156017
2	OMN	0.260797	0.905775
3	BTN	0.485263	0.695238
4	VNM	0.359649	0.769938
5	EGY	0.875287	0.086420
6	MLI	0.308667	0.715733
7	MAR	0.516291	0.583528
8	DJI	0.311363	0.573508
9	BFA	0.585040	0.450565
10	BWA	0.499806	0.497423
11	SAU	0.734662	0.327779
12	COG	0.400989	0.703327
13	MYS	0.498236	0.463380
14	BOL	0.489625	0.607063
15	SLE	0.392889	0.621196
16	SEN	0.433899	0.554424
17	ALB	0.178532	0.697791
18	ARE	1.010802	0.114130
19	MOZ	0.724434	0.933208
20	HND	0.269812	0.667162
21	LAO	0.257198	1.128858
22	PER	0.686604	0.288633
23	JOR	0.936640	0.190353
24	IRN	0.669335	0.291526
25	CHL	0.542952	0.499104
26	GTM	0.317933	0.712941
27	ECU	0.370728	0.591036
28	CPV	0.895292	0.256561
29	LSO	0.423998	0.669042
...	...	...	...
70	GUY	0.368851	0.773972
71	NIC	0.397468	0.674422
72	BDI	0.171867	0.640075
73	NAM	0.741438	0.322613
74	SLV	0.385471	0.722142
75	KOR	1.404341	-0.388770
76	GRD	0.203077	0.905171
77	LCA	0.291475	0.791884
78	IRL	1.108894	0.394137
79	MWI	0.645996	0.573877
80	POL	0.394865	0.892755
81	CRI	0.505775	0.550680
82	ZAF	0.807949	0.304618
83	KIR	0.037727	1.134288
84	TZA	0.716174	0.312445
85	ESP	0.613172	0.490604
86	GNQ	0.345546	16.965988
87	CMR	0.522463	0.637246
88	ZWE	0.596383	0.343050
89	NGA	1.484300	0.086446
90	PHL	0.504959	0.484162
91	SLB	0.109842	0.921580
92	MEX	0.485721	0.613901
93	SYC	0.688008	0.572607
94	IRQ	1.922867	0.855745
95	BRB	0.574109	0.566662
96	CUB	0.257764	0.626746
97	CHE	0.332418	0.737247
98	VEN	0.549649	0.432586
99	UGA	0.721246	0.568564