In [43]:
DATA_PATH = "/home/datascience/project/data/sfpd_incident_" # Make this the /path/to/the/data
import pandas as pd
data_2014 = pd.read_csv(DATA_PATH + "2013.csv", na_values=['-'])
data_2014.head()
Out[43]:
In [9]:
data_2014.keys()
Out[9]:
In [44]:
#data_2014 = data_2014[['Category', 'Descript', 'Date', 'PdDistrict', 'Resolution']]
#data_2014['Date'] = pd.to_datetime(pd.Series(data_2014['Date']))
#data_2014['Month'] = data_2014.apply(lambda row: row['Date'].split('/')[0], axis=1)
data_2014['DateTime'] = pd.to_datetime(data_2014.apply(lambda row: row['Date'] + ' ' + row['Time'], axis = 1))
data_2014 = data_2014[['Category', 'Descript', 'DayOfWeek', 'DateTime', 'PdDistrict', 'Resolution', 'Location', 'X', 'Y']]
data_2014.head()
Out[44]:
In [45]:
data_2014.to_csv('2013_data_911.csv')
In [ ]:
df2 = pd.DateFrame()
byDistrict = data_2014.groupby('PdDistrict')
for name, group in byDistrict:
byDate = group.groupby('Date')
for name1, group1 in byDate:
count = len(group1.index)
In [13]:
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from datetime import *
result = data_2014.apply(lambda x: matplotlib.dates.date2num(datetime.strptime(x['Date'], '%m/%d/%Y')), axis =1)
plt.plot_date(result, np.arange(result.shape[0]))
plt.show()
In [ ]:
grouped = data_2014.groupby('PdDistrict')
crime_groups = {}
for name, group in grouped:
crime_groups[name] = group.groupby("Month")
x = crime_groups['BAYVIEW'].size()
plt.plot(x)
plt.show()
In [ ]:
x
In [ ]: