In [1]:
import sys
sys.path.append('..')
In [2]:
from src.carregamento.dados import todas_escolas_pd
/Users/fmmartin/.pyenv/versions/3.4.4/envs/dados_env/lib/python3.4/site-packages/pandas/core/indexing.py:465: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
self.obj[item] = s
In [3]:
import pandas as pd
In [6]:
medias = ['MEDIA_5EF_LP', 'MEDIA_5EF_MT', 'MEDIA_9EF_LP', 'MEDIA_9EF_MT']
todas_escolas_pd[todas_escolas_pd['ID_ESCOLA'] == 26121786][medias]
Out[6]:
MEDIA_5EF_LP
MEDIA_5EF_MT
MEDIA_9EF_LP
MEDIA_9EF_MT
17802
165.17
179.46
210.9
220.18
In [10]:
todas_escolas_pd.groupby('NIVEL_SOCIO_ECONOMICO')[medias].describe()
/Users/fmmartin/.pyenv/versions/3.4.4/envs/dados_env/lib/python3.4/site-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
RuntimeWarning)
Out[10]:
MEDIA_5EF_LP
MEDIA_5EF_MT
MEDIA_9EF_LP
MEDIA_9EF_MT
NIVEL_SOCIO_ECONOMICO
Grupo 1
count
1377.000000
1377.000000
2009.000000
2009.000000
mean
153.572353
163.518010
214.262907
218.958006
std
21.301175
26.573684
13.840801
14.419636
min
104.180000
99.890000
155.180000
158.380000
25%
NaN
NaN
207.850000
212.770000
50%
NaN
NaN
217.670000
221.825000
75%
NaN
NaN
217.670000
221.825000
max
295.550000
302.880000
297.840000
362.810000
Grupo 2
count
4086.000000
4086.000000
5809.000000
5809.000000
mean
164.013199
176.383715
219.590728
224.182196
std
22.585799
27.231967
13.625636
14.781723
min
107.990000
104.160000
162.150000
165.150000
25%
NaN
NaN
217.670000
221.825000
50%
NaN
NaN
217.670000
221.825000
75%
NaN
NaN
222.330000
226.550000
max
288.840000
318.200000
303.140000
389.750000
Grupo 3
count
8501.000000
8501.000000
11797.000000
11797.000000
mean
173.029606
186.758512
233.624326
237.583730
std
19.944722
23.487421
13.944117
14.627241
min
115.720000
114.150000
158.840000
166.800000
25%
NaN
NaN
226.590000
229.360000
50%
NaN
NaN
239.210000
243.000000
75%
NaN
NaN
239.210000
243.000000
max
274.980000
315.340000
302.360000
335.950000
Grupo 4
count
11859.000000
11859.000000
17131.000000
17131.000000
mean
190.802356
207.084591
238.825501
243.176357
std
19.434745
22.674312
12.949732
13.895179
min
109.230000
108.720000
167.710000
179.310000
25%
NaN
NaN
235.960000
239.180000
50%
NaN
NaN
239.210000
243.000000
75%
NaN
NaN
241.160000
244.820000
max
275.430000
306.700000
305.030000
318.660000
Grupo 5
count
10941.000000
10941.000000
15607.000000
15607.000000
mean
206.152986
223.283476
243.778632
248.344079
std
17.669872
19.963588
13.402277
14.223368
min
130.960000
111.220000
169.210000
178.110000
25%
NaN
NaN
239.210000
243.000000
50%
NaN
NaN
239.210000
243.000000
75%
NaN
NaN
250.000000
254.645000
max
272.110000
303.470000
321.870000
334.290000
Grupo 6
count
1194.000000
1194.000000
1471.000000
1471.000000
mean
227.063107
244.404966
264.992869
271.089963
std
15.557346
17.431775
12.515898
14.194340
min
169.500000
180.460000
190.450000
200.850000
25%
NaN
NaN
262.685000
268.320000
50%
NaN
NaN
265.370000
270.285000
75%
NaN
NaN
267.740000
273.220000
max
280.910000
307.160000
331.790000
358.110000
Grupo 7
count
4.000000
4.000000
4.000000
4.000000
mean
241.320000
265.507500
282.320000
299.060000
std
5.401870
7.790397
20.224451
34.649122
min
236.090000
259.630000
265.370000
270.285000
25%
238.047500
260.087500
265.370000
270.285000
50%
240.295000
262.975000
279.200000
293.042500
75%
243.567500
268.395000
296.150000
321.817500
max
248.600000
276.450000
305.510000
339.870000
Content source: ffmmjj/desafio-dados-2016
Similar notebooks: