In [79]:
import pandas as pd
import numpy as np
In [210]:
#'data/geo/pickles/cities_light_frac.pickle'
df = pd.read_pickle('data/geo/pickles/cities_light_frac.pickle')
df.rename(columns={'ISO_A3':'code'}, inplace=True)
df.set_index('code', inplace=True)
df.head(3)
Out[210]:
In [172]:
# Load, clean, and normalize wb data
wb = pd.read_csv('data/econ/wb.csv')
label = 'GDP, PPP (constant 2011 international $)'
wb = wb[wb['Series Name'] == label]
wb.drop(['Country Name', 'Series Name', 'Series Code', '2014', '2015'], axis=1, inplace=True)
wb.rename(columns={'Country Code': 'code'}, inplace=True)
wb.set_index('code', inplace=True)
wb.dropna(axis=0, inplace=True); wb.head(3)
Out[172]:
In [264]:
# join lights dataframe to wb dataframe
df_join = df.join(wb); df_join.head(3)
df_join.dropna(axis=0, inplace=True)
df_join.reset_index(inplace=True)
# split dataframes
df_lights = df_join.ix[:,2:36]
df_lights.columns = [x[3:] for x in df_lights.columns.values]
df_lights = df_lights.transpose()
df_lights = df_lights.groupby(df_lights.index).first()
df_lights = df_lights.transpose()
df_wb = df_join.ix[:,36:].sort_index()
# multiply dataframes
df_city_gdps = pd.DataFrame(df_lights.values*df_wb.values, columns=df_lights.columns)
df_code_city = df_join.ix[:,:2]
df_city_gdps = df_code_city.join(df_city_gdps)
df_city_gdps.set_index('city', inplace=True)
#df_city_gdps.loc['Luanda']
In [146]:
lights_np = lights.as_matrix()
gdp_1992_np = gdp_1992.as_matrix()
stats_np = stats.as_matrix()
beta_np = np.reshape(stats_np[:,0], (stats_np.shape[0],1))
intercept_np = np.reshape(stats_np[:,1], (stats_np.shape[0],1))
# np.reshape(
In [149]:
lights_np
Out[149]:
In [155]:
df = pd.DataFrame((lights_np * beta_np + intercept_np) * gdp_1992_np, columns=lights_years)
df.shape
Out[155]:
In [ ]:
lights_np * stats_np
In [115]:
for idx, i in lights.head(10).iterrows():
print idx, i
print
In [ ]: