In [1]:
import datetime
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as pyplot
In [2]:
parser = lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
In [3]:
df = pd.read_excel('/home/copelco/Desktop/MD/2013.xlsx',
parse_dates={'datetime': ['Date of Stop']})
In [4]:
list(df.columns.values)
Out[4]:
In [65]:
len(df.index)
Out[65]:
In [4]:
df.Agency.value_counts()
Out[4]:
In [25]:
is_search = df['Search'].notnull()
len(df[is_search].index)
Out[25]:
In [49]:
df[is_search].groupby(df.Agency)['Agency'].count().sort_values(ascending=False)
Out[49]:
In [50]:
len(df[is_search].index)/len(df.index)
Out[50]:
In [6]:
df.Race.unique()
Out[6]:
In [7]:
df.Outcome.unique()
Out[7]:
In [8]:
df.Search.unique()
Out[8]:
In [13]:
df.Disposition.unique()
Out[13]:
In [5]:
df.Gender.unique()
Out[5]:
In [12]:
df['State of Registration'].unique()
Out[12]:
In [13]:
df['State of Residence'].unique()
Out[13]:
In [14]:
df['County of Residence'].unique()
Out[14]:
In [15]:
df['Stop Reason'].unique()
Out[15]:
In [16]:
df['Search Reason'].unique()
Out[16]:
In [17]:
df['Search Conducted'].unique()
Out[17]:
In [45]:
df[df['Agency'].str.contains('Montgomery')]['Agency'].unique()
Out[45]:
In [6]:
in_montgomery = df['Agency'] == 'Montgomery County Police Department'
missing_race = df['Race'].isnull()
did_search = df.Search.notnull()
valid_time_value = df.datetime.dt.time > datetime.time(0, 0)
In [67]:
len(df[in_montgomery].index)
Out[67]:
In [38]:
len(df[in_montgomery & missing_race].index)
Out[38]:
In [39]:
df[in_montgomery].Race.value_counts()
Out[39]:
In [40]:
df[in_montgomery].Race.value_counts().sum()
Out[40]:
In [41]:
df[in_montgomery].Search.count()
Out[41]:
In [42]:
df[in_montgomery].Search.count()/df[in_montgomery].Race.value_counts().sum()
Out[42]:
In [43]:
df[in_montgomery].Search.value_counts()
Out[43]:
In [44]:
df[in_montgomery & did_search].Race.value_counts()
Out[44]:
In [51]:
df[in_montgomery & did_search].Race.value_counts().sum()
Out[51]:
In [52]:
p = df[in_montgomery & valid_time_value]['datetime'].groupby(df.datetime.dt.hour).count().plot(kind='bar',
title="Montgomery Hour of Day Stops")
p.set_xlabel("Hour of the Day")
p.set_ylabel("Stops")
Out[52]:
In [53]:
p = df[in_montgomery & valid_time_value].groupby(pd.Grouper(key='datetime', freq='1M')).count()['Location'].plot(kind='bar',
title="Montgomery Stops By Month")
p.set_xlabel("Month")
p.set_ylabel("Stops")
Out[53]:
In [65]:
p = df[in_montgomery & valid_time_value].groupby([pd.Grouper(key='datetime', freq='1M'), 'Race'])['Race'].count()
In [ ]: