In [1]:
    
import vincent
import pandas as pd
from vincent import AxisProperties, PropertySet, ValueRef
from vincent import Map
vincent.core.initialize_notebook()
    
    
In [2]:
    
incidents = pd.read_csv('sanfrancisco_incidents_summer_2014.csv')
    
Changing the column labels of the data set
In [3]:
    
incidents.columns = ['Id'
                    ,'Category'
                    ,'Description'
                    ,'DayOfWeek'
                    ,'Date'
                    ,'Time'
                    ,'District'
                    ,'Resolution'
                    ,'Address'
                    ,'Longitude'
                    ,'Latitude'
                    ,'Location'
                    ,'PdId']
    
The date and time of incident are in two separate columns. Combining them into a DateTime column
In [4]:
    
# the date and time of incident are in two separate columns
# combining them into a date_time column
incidents['DateTime'] = pd.to_datetime(incidents['Date'] + ' ' + incidents['Time'])
date_idx = pd.DatetimeIndex(incidents['DateTime'])
incidents['Date'] = date_idx.date.astype('datetime64')
incidents['Hour'] = date_idx.hour
incidents['Year'] = date_idx.year
incidents['Month'] = date_idx.month
incidents['Weekday'] = date_idx.weekday
    
In [5]:
    
count_by_category = pd.DataFrame({'count' : incidents.groupby( ['Category'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['Category']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='Category')
graph.legend(title='Category')
graph.axis_titles(x='Category', y='Incident Count')
ax = AxisProperties(
    labels=PropertySet(
        angle=ValueRef(value=270),
        align=ValueRef(value='right')
        )
    )
graph.axes[0].properties = ax
graph.display()
#ax = count_by_category.plot(kind="barh",x='Category', y='count',sort_columns=True)
    
    
In [6]:
    
by_year = incidents.pivot_table('Id'
                                , aggfunc='count'
                                , index='Weekday'
                                , columns='District')
    
In [7]:
    
graph = vincent.Line(by_year)
graph.legend(title='District')
graph.axis_titles(x='Weekday', y='Incident Count')
graph.display()
    
    
In [8]:
    
filtered = incidents[incidents['District'] == 'MISSION']
filtered = filtered[~filtered['Category'].isin(['LARCENY/THEFT'
                                               ,'NON-CRIMINAL'
                                               ,'OTHER OFFENSES'
                                               ,'WARRANTS'])]
    
In [9]:
    
count_by_category = pd.DataFrame({'count' : filtered.groupby( ['Category'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['Category']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='Category')
graph.legend(title='Category')
graph.axis_titles(x='Category', y='Incident Count')
ax = AxisProperties(
    labels=PropertySet(
        angle=ValueRef(value=270),
        align=ValueRef(value='right')
        )
    )
graph.axes[0].properties = ax
graph.display()
    
    
In [10]:
    
filter_by_category = 'MISSING PERSON'
    
In [11]:
    
filtered = incidents[incidents['Category'] == filter_by_category]
by_hour = filtered.pivot_table('Id'
                                , aggfunc='count'
                                , index='Hour'
                                , columns='District')
graph = vincent.Line(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
    
    
In [12]:
    
count_by_category = pd.DataFrame({'count' : filtered.groupby( ['District'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['District']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='District')
graph.legend(title='District')
graph.axis_titles(x='District', y='Incident Count')
ax = AxisProperties(
    labels=PropertySet(
        angle=ValueRef(value=270),
        align=ValueRef(value='right')
        )
    )
graph.axes[0].properties = ax
graph.display()
    
    
In [13]:
    
filter_by_districts = ['MISSION','SOUTHERN','PARK','BAYVIEW']
    
In [14]:
    
filtered = incidents[incidents['Category'] == filter_by_category]
filtered = filtered[filtered['District'].isin(filter_by_districts)]
by_hour = filtered.pivot_table('Id'
                                , aggfunc='count'
                                , index='Hour'
                                , columns='District')
graph = vincent.Line(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
    
    
In [15]:
    
filter_by_district = 'MISSION'
filter_by_category = 'MISSING PERSON'
filtered = incidents[incidents['Category'] == filter_by_category]
filtered = filtered[filtered['District'] == filter_by_district]
    
In [16]:
    
by_hour = filtered.pivot_table('Id'
                              , aggfunc='count'
                              , index='Hour'
                              , columns='Category')
graph = vincent.Line(by_hour) 
graph.legend(title='Category')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
    
    
In [17]:
    
by_hour = filtered.pivot_table('Id'
                                , aggfunc='count'
                                , index='Hour'
                                , columns='District')
graph = vincent.Bar(by_hour) 
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
    
    
In [18]:
    
by_weekday = filtered.pivot_table('Id'
                                , aggfunc='count'
                                , index='Weekday'
                                , columns='District')
graph = vincent.Bar(by_weekday) 
graph.legend(title='District')
graph.axis_titles(x='Weekday', y='Incident Count')
graph.display()
    
    
In [19]:
    
filtered = filtered[filtered['Weekday'] == 4]
by_hour = filtered.pivot_table('Id'
                                , aggfunc='count'
                                , index='Hour'
                                , columns='District')
graph = vincent.Bar(by_hour) 
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
    
    
In [ ]: