Para el siguiente ejercicio vamos a utilizar la base de datos de los crimenes en Chicago de 2001 a presente del siguiente Link
In [1]:
# librerias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')
In [3]:
data = pd.read_csv('../data/CrimesNoNA.csv')
In [4]:
# checar la informacion del dataframe
data.head()
Out[4]:
In [5]:
# información del dataframe
data.info()
In [6]:
# columnas que existen en el dataframe
data.columns
Out[6]:
In [7]:
data.count()
Out[7]:
In [8]:
# eliminar datos nulos
data = data.dropna()
In [9]:
# data.to_csv('../data/CrimesNoNA.csv')
In [10]:
data.describe()
Out[10]:
In [11]:
data.groupby('Year').count()['Arrest']
Out[11]:
In [12]:
data.groupby('Year').count()['Arrest'].plot.bar()
Out[12]:
In [13]:
data.head()
Out[13]:
In [15]:
len(data[data['Arrest']==True])
Out[15]:
In [16]:
len(data[data['Arrest']==False])
Out[16]:
In [17]:
data['Arrest'].count()
Out[17]:
In [18]:
data.groupby(['Year','Arrest']).count()['ID'].plot.bar(figsize=(20,5))
Out[18]:
In [19]:
data.columns
Out[19]:
In [20]:
data2 = data.filter(items=['Year','Primary Type', 'Arrest','Location Description','District'])
In [21]:
data2.head()
Out[21]:
In [22]:
for i in np.unique(data2['Primary Type']):
data2[i] = [1 if x==i else 0 for x in data2['Primary Type']]
In [23]:
data2.head()
Out[23]:
In [24]:
data2.groupby('Year').sum()[['PROSTITUTION','BATTERY']].plot.bar()
Out[24]:
In [26]:
data2.columns
Out[26]:
In [27]:
data2.groupby('Year').sum()[['PROSTITUTION','CRIM SEXUAL ASSAULT']].plot.bar()
Out[27]:
In [ ]: