In [7]:
import pandas as pd
In [8]:
df = pd.read_pickle('parking_eda.pkle')
df['total'] = 1
In [9]:
df.head()
Out[9]:
In [92]:
df.ticket_issue_datetime.sort_values()
Out[92]:
In [ ]:
In [ ]:
In [8]:
violation_state_df = df.groupby(['violation_code', 'rp_plate_state']).holiday.count().reset_index()
In [32]:
# violation_state_df.sort_values('holiday', ascending=False)
In [53]:
# http://opendata.dc.gov/datasets/aa514416aaf74fdc94748f1e56e7cc8a_0.csv
dc_address_df = pd.read_csv('../data/dc_address_id.csv', encoding="utf-8-sig")
dc_address_df.columns = [col.lower() for col in dc_address_df.columns]
In [6]:
dc_address_df
In [59]:
dc_address_df_selected = dc_address_df[['address_id', 'x', 'y', 'fulladdress']]
In [ ]:
In [75]:
fines_df = pd.read_csv('../cleaned_data/fine.csv', encoding="utf-8-sig")
fines_df.columns = [col.lower() for col in fines_df.columns]
fines_df.drop('shortdesc', axis=1, inplace=True)
fines_df.rename(columns={'code': 'violation_code'}, inplace=True)
fines_df.head(1)
Out[75]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [60]:
# pd.groupby(b,by=[b.index.month,b.index.year])
df_address_id = df.groupby(['address_id']).total.count().reset_index()
df_address_id = df_address_id.sort_values('total', ascending=False).reset_index(drop=True)
In [94]:
df_address_id = df_address_id.merge(dc_address_df_selected, on='address_id')
df_address_id.to_csv('../cleaned_data/most_tickets_by_address_id_all_loaded_data.tsv', sep='\t', index=False)
In [95]:
df_address_id
Out[95]:
In [ ]:
In [ ]:
# vehicle versus everything else
In [ ]:
In [78]:
violation_code_by_state_df = df.groupby(['violation_code', 'rp_plate_state']).total.count().reset_index()
violation_code_by_state_df = violation_code_by_state_df.sort_values('total', ascending=False).reset_index(drop=True)
violation_code_by_state_df = violation_code_by_state_df.merge(fines_df, on='violation_code')
violation_code_by_state_df['estimated_fine_total'] = violation_code_by_state_df.total * violation_code_by_state_df.fine
violation_code_by_state_df.to_csv('../cleaned_data/violation_code_by_state_play_with_fine_info.tsv', sep='\t', index=False)
In [ ]:
In [82]:
holiday_df = df[df.holiday==True]
In [86]:
holiday_df_violation_code = holiday_df.groupby(['violation_code', 'rp_plate_state']).total.count().reset_index()
holiday_df_violation_code = holiday_df_violation_code.sort_values('total', ascending=False).reset_index(drop=True)
holiday_df_violation_code = holiday_df_violation_code.merge(fines_df, on='violation_code')
holiday_df_violation_code['estimated_fine_total'] = holiday_df_violation_code.total * holiday_df_violation_code.fine
holiday_df_violation_code.to_csv('../cleaned_data/on_holiday_violation_code_by_state_play_with_fine_info.tsv', sep='\t', index=False)
In [ ]:
In [ ]:
In [88]:
body_style_df_violation_code = df.groupby(['violation_code', 'body_style']).total.count().reset_index()
body_style_df_violation_code = body_style_df_violation_code.sort_values('total', ascending=False).reset_index(drop=True)
body_style_df_violation_code = body_style_df_violation_code.merge(fines_df, on='violation_code')
body_style_df_violation_code['estimated_fine_total'] = body_style_df_violation_code.total * body_style_df_violation_code.fine
body_style_df_violation_code.to_csv('../cleaned_data/body_style_violation_code_with_fine_info.tsv', sep='\t', index=False)
In [ ]:
In [ ]:
In [90]:
violation_code_df = df.groupby(['violation_code']).total.count().reset_index()
violation_code_df = violation_code_df.sort_values('total', ascending=False).reset_index(drop=True)
violation_code_df = violation_code_df.merge(fines_df, on='violation_code')
violation_code_df['estimated_fine_total'] = violation_code_df.total * violation_code_df.fine
# violation_code_df
violation_code_df.to_csv('../cleaned_data/violation_code_with_fine_info.tsv', sep='\t', index=False)
In [ ]: