Load up packages.
In [8]:
%matplotlib inline
import numpy as np
import pandas as pd
import urllib2
Read in data.
In [11]:
data = pd.read_csv(urllib2.urlopen('http://forever.codeforamerica.org/fellowship-2015-tech-interview/Violations-2012.csv'))
data.head()
Out[11]:
In [7]:
# Group data on `violation_category'
data_gb_violation_category = data.groupby('violation_category')
# Use aggregation function on GroupBy object to compute summary statistics
violations = data_gb_violation_category.agg({'violation_date':
{'num':len,
'min_date':np.min,
'max_date':np.max}})
# Drop extraneous column header
violations.columns = violations.columns.droplevel(0)
# Write out results to file
violations.to_csv('violations_summary.csv',encoding='utf8')
# Print out summary statistics
violations
Out[7]:
In [ ]: