Load up packages.


In [8]:
%matplotlib inline
import numpy as np
import pandas as pd
import urllib2

Read in data.


In [11]:
data = pd.read_csv(urllib2.urlopen('http://forever.codeforamerica.org/fellowship-2015-tech-interview/Violations-2012.csv'))
data.head()


Out[11]:
violation_id inspection_id violation_category violation_date violation_date_closed violation_type
0 204851 261019 Garbage and Refuse 2012-01-03 00:00:00 2012-02-02 00:00:00 Refuse Accumulation
1 204852 261019 Unsanitary Conditions 2012-01-03 00:00:00 2012-02-02 00:00:00 Unsanitary conditions, not specified
2 204853 261023 Garbage and Refuse 2012-01-03 00:00:00 2012-01-17 00:00:00 Refuse Accumulation
3 204854 261023 Garbage and Refuse 2012-01-03 00:00:00 2012-01-17 00:00:00 Refuse Accumulation
4 204858 261029 Garbage and Refuse 2012-01-03 00:00:00 2012-03-12 00:00:00 Refuse Accumulation

In [7]:
# Group data on `violation_category'
data_gb_violation_category = data.groupby('violation_category')

# Use aggregation function on GroupBy object to compute summary statistics
violations = data_gb_violation_category.agg({'violation_date':
                                {'num':len,
                                 'min_date':np.min,
                                 'max_date':np.max}})
                                 
# Drop extraneous column header
violations.columns = violations.columns.droplevel(0)                                 

# Write out results to file
violations.to_csv('violations_summary.csv',encoding='utf8')

# Print out summary statistics
violations


Out[7]:
num max_date min_date
violation_category
Air Pollutants and Odors 2 2012-12-19 00:00:00 2012-12-05 00:00:00
Animals and Pests 180 2012-12-28 00:00:00 2012-01-03 00:00:00
Biohazards 7 2012-12-18 00:00:00 2012-04-13 00:00:00
Building Conditions 62 2012-12-26 00:00:00 2012-01-12 00:00:00
Chemical Hazards 17 2012-12-06 00:00:00 2012-02-08 00:00:00
Garbage and Refuse 126 2012-12-21 00:00:00 2012-01-03 00:00:00
Retail Food 1 2012-12-20 00:00:00 2012-12-20 00:00:00
Unsanitary Conditions 83 2012-12-19 00:00:00 2012-01-03 00:00:00
Vegetation 67 2012-12-05 00:00:00 2012-02-01 00:00:00

In [ ]: