In [151]:
import geopandas as gpd
import pandas as pd
import pickle
import numpy as np
In [228]:
# load city lights dataframe
with open('data/geo/pickles/zonal_stats_m.pickle') as f:
df_cities_geo = pickle.load(f)
df_cities_geo.rename(columns={'adm0_a3':'ISO_A3'}, inplace=True)
df_cities = pd.DataFrame(df_cities_geo)
# load country lights dataframe
with open('data/geo/pickles/zonal_stats_c.pickle') as f:
df_countries_geo = pickle.load(f)
df_countries_geo.reset_index(inplace=True)
df_countries = pd.DataFrame(df_countries_geo)
# merge dataframes
df_merge = pd.merge(df_cities, df_countries, how='left', on='ISO_A3', suffixes=('m', 'c')); df_merge.head(3)
Out[228]:
In [260]:
# compute fraction of light for each city within its country
df_cities_years = df_merge.ix[:, 5:39]
np_cities_years = df_cities_years.as_matrix()
df_countries_years = df_merge.ix[:, 47:]
np_countries_years = df_countries_years.as_matrix()
np_cities_light_fractions = np_cities_years / np_countries_years
In [275]:
# join with city name and country code
df_cities_years_cols = [x[:7] for x in df_cities_years.columns.values]
df_cities_truncated = df_cities.ix[:, :5]
df_cities_light_fractions = pd.DataFrame(np_cities_light_fractions, columns=df_cities_years_cols)
df_cities_light_fractions = df_cities_truncated.join(df_cities_light_fractions)
df_cities_light_fractions.drop(['cartodb_id', 'geometry', 'pop_max'], inplace=True, axis=1)
df_cities_light_fractions.rename(columns={'nameascii': 'city'}, inplace=True)
df_cities_light_fractions.head(3)
Out[275]:
In [276]:
# pickle dataframe
df_cities_light_fractions.to_pickle('data/geo/pickles/cities_light_frac.pickle')
In [ ]:
# load 1992 dataframe and rename 1992 light column
#with open('data/geo/pickles/zonal_stats_c92.pickle') as f:
#df_1992 = pickle.load(f)
#df_1992.rename(columns={'F101992':'c_F101992'}, inplace=True)
# drop dups and rename cols
#df_input = pd.DataFrame(df_input)
#df_input = df_input.drop_duplicates(subset='nameascii')
#df_input.rename(columns={'adm0_a3':'ISO_A3', 'nameascii':'city'}, inplace=True)
# join dataset on ISO_A3 column
#df_join = pd.merge(df_input, df_1992, on='ISO_A3')
#df_join
In [ ]:
In [ ]:
df_normalizer = np.reshape((df_join.c_F101992).as_matrix(), (df_join.shape[0],1))
df_drop_cols = df_join.drop(['ISO_A3', 'cartodb_id', 'geometry', 'pop_max', 'city', 'WB_A3', 'c_F101992'], axis=1)
light_years = df_drop_cols.columns.values
df_np = df_drop_cols.as_matrix()
df_np_normed = np.divide(df_np, df_normalizer)
df_tojoin = df_join[['WB_A3', 'city', 'pop_max']]
df_normed = pd.DataFrame(df_np_normed, columns=light_years)
df_rejoined = df_tojoin.join(df_normed, how='inner')
df_rejoined.head(3)
In [176]:
# Load, clean, and normalize wb data
wb = pd.read_csv('data/econ/wb.csv')
# wb = wb[wb['Series Name'] == 'GDP at market prices (constant 2005 US$)']
# df = pd.merge(df_tojoin, df_normed, left_index=True, right_index=True); df
label = 'GDP, PPP (constant 2011 international $)'
wb = wb[wb['Series Name'] == label]
wb.drop(['Country Name', 'Series Name', 'Series Code', '2014', '2015'], axis=1, inplace=True)
wb.rename(columns={'Country Code': 'WB_A3'}, inplace=True)
wb.dropna(axis=0, inplace=True)
wb = wb.set_index('WB_A3')
wb_normalizer = (wb['1992']).as_matrix()
wb_normed = wb.divide(wb_normalizer, axis=0)
wb_normed = wb_normed.reset_index()
wb_1992 = wb.reset_index()[['WB_A3', '1992']]
wb_1992 = wb_1992.rename(columns={'1992': 'wb_1992'}); wb_1992.head(3)
Out[176]:
In [277]:
country_stats = pd.read_csv('data/country_stats.csv')
country_stats.rename(columns={'country': 'WB_A3'}, inplace=True)
beta_intercept = country_stats[['WB_A3','beta','intercept']]
beta_intercept.head(100)
Out[277]:
In [179]:
# join lights and country stats dataframes
df_plus_1992 = df_rejoined.merge(wb_1992, on='WB_A3', how='inner')
df = df_plus_1992.merge(beta_intercept, on='WB_A3', how='inner')
# pickle joined dataframe
df.to_pickle('data/cleaned_df_cities.pickle'); df.head(3)
Out[179]:
In [ ]:
In [181]:
df.to_csv('test.csv', encoding='latin-1')
In [ ]: