In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns;
from numpy import nan
from math import sqrt, sin, cos, atan2, log
plt.style.use('ggplot')
from IPython.core.display import HTML
css = open('style-table.css').read()+ open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[1]:
In [2]:
inner_data = pd.read_csv('inner_data.csv')
In [167]:
#full_data = full_data[full_data['ENT']!=0]
In [4]:
secciones = ['seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9']
resumen = ['ENT']
resumen.extend(secciones)
resumen.extend(['cal_final'])
indicadores = ['seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9', 'cal_final']
max_vals = np.array([11, 13, 13, 11, 14, 17, 2, 11, 8])
In [169]:
calificaciones = full_data[secciones].sum(axis=1)
full_data['cal_final'] = calificaciones
In [149]:
full_norm = full_data[resumen]
In [170]:
#full_norm[secciones] = full_norm[secciones]/max_vals
full_agg=full_data[resumen].groupby(['ENT']).agg('mean')
full_agg_median = full_data[resumen].groupby(['ENT']).agg('median')
In [171]:
#full_data = full_data[['ENT', 'MUN', 'LOC', 'AGEB', 'MZA', 'ID_INM', 'seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9', 'X', 'Y', 'cal_final']]
full_data[full_data['ENT']==1][indicadores].hist()
Out[171]:
In [37]:
full_data.to_csv('full_data.csv')
In [5]:
sinaloa = inner_data[inner_data['ENT']==25]
In [159]:
full_data#int(sqrt(len(sinaloa['cal_final'])))
Out[159]:
In [153]:
#sinaloa[secciones].boxplot(figsize=(40,100))#hist(bins=int(sqrt(len(sinaloa['cal_final']))))
sinaloa[indicadores].hist(figsize=(20,15))
sinaloa.hist()
Out[153]:
In [173]:
#full_data[full_data['ENT']==25][indicadores].plot(kind='box', figsize=(20,10), ylim=(0,1.01), showmeans=True, bootstrap=1000)
for i in range(1,33):
ax = full_data[full_data['ENT']==i][indicadores].plot(kind='box', figsize=(20,10), ylim=(0,1.01), showmeans=True, bootstrap=1000)
fig = ax.get_figure()
fig.savefig('estado_'+str(i)+'.png', bbox_inches='tight')
In [163]:
coef = 1000
estados = ['Aguascalientes', 'Baja California', 'Baja California Sur']
#COLORS
colors = ['peru', 'hotpink', 'crimson', 'darkslategray', 'lightsalmon', 'gray', 'firebrick', 'lightgreen', 'olive', 'y']
#colors = [[color[0]]*9, [color[1]]*9, [color[2]]*9 ]
#for i in range(3):
# colors[i].extend('y')
#GRID
fig = plt.figure(figsize=(10,30))
ax = fig.add_subplot(1,1,1)
ax.set_axis_bgcolor('white')
major_yticks = np.arange(0, 33, 5)
ax.set_yticks(major_yticks)
major_xticks = np.arange(0, 11, 5)
ax.set_xticks(major_xticks)
minor_yticks = np.arange(0, 33, 1)
ax.set_yticks(minor_yticks, minor=True)
minor_xticks = np.arange(0, 11, 1)
ax.set_xticks(minor_xticks, minor=True)
#ax.grid(which='major', c = 'white', linestyle='-')
ax.grid(which='minor', c = 'green', linestyle=':')
for i in full_agg.index.values:
plt.scatter(range(1,11),[i]*10, s=coef, c = colors, alpha=0.1, edgecolor='k', lw=1)
plt.scatter(range(1,11), [i]*10, s=full_agg.ix[i].map(lambda x: coef*x), c = colors)
plt.savefig('bubble_plot.png', bbox_inches='tight')
plt.show()