In [43]:
DATA_PATH = "/home/datascience/project/data/sfpd_incident_" # Make this the /path/to/the/data

import pandas as pd

data_2014 = pd.read_csv(DATA_PATH + "2013.csv", na_values=['-'])

data_2014.head()


Out[43]:
IncidntNum Category Descript DayOfWeek Date Time PdDistrict Resolution Location X Y
0 121047551 RECOVERED VEHICLE VEHICLE, RECOVERED, AUTO Saturday 01/05/2013 12:18 BAYVIEW ARREST, BOOKED 900.0 Block of CONNECTICUT ST -122.397780 37.753970
1 13670901 LARCENY/THEFT PETTY THEFT OF PROPERTY Monday 04/15/2013 16:15 INGLESIDE NONE SILVER AV / MISSION ST -122.431295 37.728730
2 60847080 WARRANTS WARRANT ARREST Sunday 03/24/2013 17:33 SOUTHERN ARREST, BOOKED 800.0 Block of BRYANT ST -122.403675 37.775178
3 71276391 WARRANTS ENROUTE TO OUTSIDE JURISDICTION Wednesday 02/13/2013 22:41 NORTHERN ARREST, BOOKED CHESTNUT ST / FILLMORE ST -122.436275 37.800813
4 71276391 WARRANTS WARRANT ARREST Wednesday 02/13/2013 22:41 NORTHERN ARREST, BOOKED CHESTNUT ST / FILLMORE ST -122.436275 37.800813

In [9]:
data_2014.keys()


Out[9]:
Index([u'IncidntNum', u'Category', u'Descript', u'DayOfWeek', u'Date', u'Time', u'PdDistrict', u'Resolution', u'Location', u'X', u'Y'], dtype='object')

In [44]:
#data_2014 = data_2014[['Category', 'Descript', 'Date', 'PdDistrict', 'Resolution']]   
#data_2014['Date'] = pd.to_datetime(pd.Series(data_2014['Date']))
#data_2014['Month'] = data_2014.apply(lambda row: row['Date'].split('/')[0], axis=1)
data_2014['DateTime'] = pd.to_datetime(data_2014.apply(lambda row: row['Date'] + ' ' + row['Time'], axis = 1))
data_2014 = data_2014[['Category', 'Descript', 'DayOfWeek', 'DateTime', 'PdDistrict', 'Resolution', 'Location', 'X', 'Y']]

data_2014.head()


Out[44]:
Category Descript DayOfWeek DateTime PdDistrict Resolution Location X Y
0 RECOVERED VEHICLE VEHICLE, RECOVERED, AUTO Saturday 2013-01-05 12:18:00 BAYVIEW ARREST, BOOKED 900.0 Block of CONNECTICUT ST -122.397780 37.753970
1 LARCENY/THEFT PETTY THEFT OF PROPERTY Monday 2013-04-15 16:15:00 INGLESIDE NONE SILVER AV / MISSION ST -122.431295 37.728730
2 WARRANTS WARRANT ARREST Sunday 2013-03-24 17:33:00 SOUTHERN ARREST, BOOKED 800.0 Block of BRYANT ST -122.403675 37.775178
3 WARRANTS ENROUTE TO OUTSIDE JURISDICTION Wednesday 2013-02-13 22:41:00 NORTHERN ARREST, BOOKED CHESTNUT ST / FILLMORE ST -122.436275 37.800813
4 WARRANTS WARRANT ARREST Wednesday 2013-02-13 22:41:00 NORTHERN ARREST, BOOKED CHESTNUT ST / FILLMORE ST -122.436275 37.800813

In [45]:
data_2014.to_csv('2013_data_911.csv')

In [ ]:
df2 = pd.DateFrame()

byDistrict = data_2014.groupby('PdDistrict')

for name, group in byDistrict:
    byDate = group.groupby('Date')
    for name1, group1 in byDate:
        count = len(group1.index)

In [13]:
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from datetime import *

result = data_2014.apply(lambda x: matplotlib.dates.date2num(datetime.strptime(x['Date'], '%m/%d/%Y')), axis =1)
plt.plot_date(result, np.arange(result.shape[0]))
plt.show()

In [ ]:
grouped = data_2014.groupby('PdDistrict')
crime_groups = {}
for name, group in grouped:
    crime_groups[name] = group.groupby("Month")
x = crime_groups['BAYVIEW'].size()

plt.plot(x)
plt.show()

In [ ]:
x

In [ ]: