In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('/home/anderson/Desktop/Sao-Paulo-Crime-Study/output.csv', encoding = 'latin1')


/home/anderson/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (22,23,24,28,30) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [4]:
#first checking my df 
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 793050 entries, 0 to 793049
Data columns (total 31 columns):
ID_DELEGACIA               793050 non-null int64
NOME_DEPARTAMENTO          793050 non-null object
NOME_SECCIONAL             793050 non-null object
NOME_DELEGACIA             793050 non-null object
CIDADE                     793050 non-null object
ANO_BO                     793050 non-null int64
NUM_BO                     793050 non-null int64
NOME_DEPARTAMENTO_CIRC     793050 non-null object
NOME_SECCIONAL_CIRC        793050 non-null object
NOME_DELEGACIA_CIRC        793050 non-null object
NOME_MUNICIPIO_CIRC        793050 non-null object
DESCR_TIPO_BO              793050 non-null object
DATA_OCORRENCIA_BO         793050 non-null object
HORA_OCORRENCIA_BO         712129 non-null object
DATAHORA_COMUNICACAO_BO    0 non-null float64
FLAG_STATUS                793050 non-null object
RUBRICA                    793050 non-null object
DESCR_CONDUTA              607623 non-null object
DESDOBRAMENTO              44298 non-null object
DESCR_TIPOLOCAL            793050 non-null object
DESCR_SUBTIPOLOCAL         793050 non-null object
LOGRADOURO                 793050 non-null object
NUMERO_LOGRADOURO          739249 non-null object
LATITUDE                   463935 non-null object
LONGITUDE                  463927 non-null object
DESCR_TIPO_PESSOA          793037 non-null object
FLAG_VITIMA_FATAL          35424 non-null object
SEXO_PESSOA                793031 non-null object
IDADE_PESSOA               782791 non-null object
COR_CUTIS                  791967 non-null object
Unnamed: 30                17 non-null object
dtypes: float64(1), int64(3), object(27)
memory usage: 187.6+ MB

In [5]:
#Now, creating differente dataframes according to the crime. Crime is in RUBRICA
df['RUBRICA'].head()


Out[5]:
0                   Lesão corporal (art. 129)
1    Lesão corporal culposa (art. 129. §6o.)
2                   Lesão corporal (art. 129)
3                             Furto (art. 155)
4          Furto qualificado (art. 155, §4o.)
Name: RUBRICA, dtype: object

In [6]:
#And we have 20 different crimes
df['RUBRICA'].nunique()


Out[6]:
20

In [ ]:
i = {}
for crime in df['RUBRICA']:
    i[crime] = df[df['RUBRICA'] == crime]

In [ ]:
print("oi")

In [ ]: