In [1]:
import pandas as pd
from matplotlib.pyplot import pie, axis, show
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
In [2]:
fine_df_file = '../data/interim/fine_enriched_parking_violations.tsv'
In [3]:
df = pd.read_csv(fine_df_file, sep='\t', parse_dates=['ticket_issue_datetime'])
df['counter'] = 1
df['dmv_area'] = (df.rp_plate_state=='DC') | (df.rp_plate_state=='MD') | (df.rp_plate_state=='VA')
In [4]:
dmv_df = df[(df.rp_plate_state.isin(set(['DC', 'MD', 'VA'])))]
In [5]:
vc_df = dmv_df.groupby(['violation_code']).counter.sum().reset_index('violation_code')
counter_codes_15 = vc_df.sort_values(by='counter', ascending=False)[:15].violation_code
In [6]:
top_codes = dmv_df[dmv_df.violation_code.isin(counter_codes_15)]
In [7]:
top_violation_by_state = top_codes.groupby(['violation_description', 'rp_plate_state']).counter.sum() #.unstack().unstack().unstack()
In [8]:
top_violation_by_state.unstack().plot.barh()
Out[8]:
In [9]:
top_violation_by_state_revenue = top_codes.groupby(['violation_description', 'rp_plate_state']).fine.sum()
In [73]:
ax = top_violation_by_state_revenue.unstack().plot.barh(legend=True)
ax.xaxis.set_major_formatter(plt.FormatStrFormatter('%.0f'))
plt.draw()
In [ ]:
In [ ]: