In [7]:
import pandas as pd

In [8]:
df = pd.read_pickle('parking_eda.pkle')
df['total'] = 1

In [9]:
df.head()


Out[9]:
x y objectid rowid_ holiday violation_code violation_description location rp_plate_state body_style address_id streetsegid xcoord ycoord filename ticket_issue_datetime total
0 -77.028867 38.895245 85866 10692752 False P001 PARK ABREAST OF ANOTHER VEHICLE 1300 BLOCK PENNSYLVANIA AVE NW NOR MD TK 801823 1681.0 397496 136377 Parking_Violations_in_April_2009.csv 2009-04-28T14:16:00.000Z 1
1 -77.043465 38.903141 85918 10692911 False P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 1100 BLOCK 19TH ST NW WEST SIDE VA PU 814327 12343.0 396230 137254 Parking_Violations_in_April_2009.csv 2009-04-23T08:31:00.000Z 1
2 -77.019903 38.910390 85919 10694277 False P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 1600 BLOCK 6TH ST NW WEST SIDE MD 4D 807159 6136.0 398274 138058 Parking_Violations_in_April_2009.csv 2009-04-27T12:50:00.000Z 1
3 -77.042577 38.902529 85937 10692191 False P285 FAIL TO DISPLAY METER RECEIPT IN A LOADING ZONE 1900 BLOCK K ST NW SOUTH SIDE DC PU 805384 4617.0 396307 137186 Parking_Violations_in_April_2009.csv 2009-04-30T15:07:00.000Z 1
4 -77.015955 38.887978 85956 10692235 False P039 PARK AT EXPIRED METER 0400 BLOCK MARYLAND AVE SW SOUTH S DC VN 800131 112.0 398616 135570 Parking_Violations_in_April_2009.csv 2009-04-29T13:58:00.000Z 1

In [92]:
df.ticket_issue_datetime.sort_values()


Out[92]:
4428060    2009-01-01T00:00:00.000Z
4399181    2009-01-01T00:04:00.000Z
4468947    2009-01-01T00:16:00.000Z
4410174    2009-01-01T00:16:00.000Z
4488309    2009-01-01T00:16:00.000Z
4455916    2009-01-01T00:16:00.000Z
4485189    2009-01-01T00:16:00.000Z
4462913    2009-01-01T00:16:00.000Z
4433978    2009-01-01T00:16:00.000Z
4386134    2009-01-01T00:22:00.000Z
4461871    2009-01-01T00:25:00.000Z
4456925    2009-01-01T00:25:00.000Z
4412193    2009-01-01T00:25:00.000Z
4365426    2009-01-01T00:30:00.000Z
4418116    2009-01-01T00:30:00.000Z
4401126    2009-01-01T00:30:00.000Z
4482080    2009-01-01T00:37:00.000Z
4492347    2009-01-01T00:38:00.000Z
4427053    2009-01-01T00:40:00.000Z
4395065    2009-01-01T00:41:00.000Z
4467907    2009-01-01T00:45:00.000Z
4410142    2009-01-01T00:45:00.000Z
4409108    2009-01-01T00:45:00.000Z
4372033    2009-01-01T00:45:00.000Z
4386160    2009-01-01T00:46:00.000Z
4397128    2009-01-01T00:50:00.000Z
4458820    2009-01-01T00:58:00.000Z
4460901    2009-01-01T01:00:00.000Z
4487252    2009-01-01T01:07:00.000Z
4376082    2009-01-01T01:12:00.000Z
                     ...           
3347277    2015-12-31T20:26:00.000Z
3265245    2015-12-31T20:26:00.000Z
3294496    2015-12-31T20:27:00.000Z
3266359    2015-12-31T20:28:00.000Z
3360918    2015-12-31T20:29:00.000Z
3370224    2015-12-31T20:31:00.000Z
3337785    2015-12-31T20:33:00.000Z
3304853    2015-12-31T20:33:00.000Z
3286098    2015-12-31T20:33:00.000Z
3290353    2015-12-31T20:35:00.000Z
3285050    2015-12-31T20:36:00.000Z
3316243    2015-12-31T20:37:00.000Z
3301754    2015-12-31T20:38:00.000Z
3302758    2015-12-31T20:40:00.000Z
3340990    2015-12-31T20:41:00.000Z
3352445    2015-12-31T20:43:00.000Z
3246809    2015-12-31T20:43:00.000Z
3335752    2015-12-31T20:44:00.000Z
3372292    2015-12-31T20:46:00.000Z
3312061    2015-12-31T20:49:00.000Z
3365017    2015-12-31T20:52:00.000Z
3265249    2015-12-31T20:55:00.000Z
3351410    2015-12-31T20:57:00.000Z
3261193    2015-12-31T20:58:00.000Z
3255073    2015-12-31T20:59:00.000Z
3244763    2015-12-31T21:00:00.000Z
3329560    2015-12-31T21:01:00.000Z
3332698    2015-12-31T21:02:00.000Z
3321316    2015-12-31T21:03:00.000Z
3357767    2015-12-31T21:07:00.000Z
Name: ticket_issue_datetime, dtype: object

In [ ]:


In [ ]:


In [8]:
violation_state_df = df.groupby(['violation_code', 'rp_plate_state']).holiday.count().reset_index()

In [32]:
# violation_state_df.sort_values('holiday', ascending=False)

In [53]:
# http://opendata.dc.gov/datasets/aa514416aaf74fdc94748f1e56e7cc8a_0.csv
dc_address_df = pd.read_csv('../data/dc_address_id.csv', encoding="utf-8-sig")
dc_address_df.columns = [col.lower() for col in dc_address_df.columns]


/usr/local/var/pyenv/versions/3.5.2/envs/trafficdata/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (19) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [6]:
dc_address_df


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-b848138b34f9> in <module>()
----> 1 dc_address_df

NameError: name 'dc_address_df' is not defined

In [59]:
dc_address_df_selected = dc_address_df[['address_id', 'x', 'y', 'fulladdress']]

In [ ]:


In [75]:
fines_df = pd.read_csv('../cleaned_data/fine.csv', encoding="utf-8-sig")
fines_df.columns = [col.lower() for col in fines_df.columns]
fines_df.drop('shortdesc', axis=1, inplace=True)
fines_df.rename(columns={'code': 'violation_code'}, inplace=True)

fines_df.head(1)


Out[75]:
violation_code desc fine
0 P001 PARK ABREAST OF ANOTHER VEHICLE 50.0

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [60]:
# pd.groupby(b,by=[b.index.month,b.index.year])
df_address_id = df.groupby(['address_id']).total.count().reset_index()
df_address_id = df_address_id.sort_values('total', ascending=False).reset_index(drop=True)

In [94]:
df_address_id = df_address_id.merge(dc_address_df_selected, on='address_id')
df_address_id.to_csv('../cleaned_data/most_tickets_by_address_id_all_loaded_data.tsv', sep='\t', index=False)

In [95]:
df_address_id


Out[95]:
address_id total x y fulladdress
0 310966 16564 -77.030156 38.897063 1300 F STREET NW
1 243269 14229 -77.024329 38.897074 900 F STREET NW
2 306041 11768 -77.023254 38.896533 500 8TH STREET NW
3 240261 11339 -77.033939 38.902895 1000 VERMONT AVENUE NW
4 243354 11213 -77.022191 38.898635 700 7TH STREET NW
5 238147 10723 -77.023235 38.895038 400 8TH STREET NW
6 278917 10059 -77.028501 38.898052 1200 G STREET NW
7 243255 9907 -77.027322 38.898749 700 11TH STREET NW
8 279976 9820 -77.034829 38.901584 900 15TH STREET NW
9 241302 9716 -77.042240 38.905338 1800 M STREET NW
10 301002 9499 -77.032516 38.898118 1400 G STREET NW
11 238715 9368 -77.020492 38.895787 600 E STREET NW
12 238713 9242 -77.020125 38.897109 600 F STREET NW
13 243367 9056 -77.029947 38.916779 1300 U STREET NW
14 240250 8805 -77.033387 38.903982 1100 VERMONT AVENUE NW
15 241793 8794 -77.038887 38.904056 1100 17TH STREET NW
16 240642 8713 -77.029992 38.903521 1300 L STREET NW
17 310133 8274 -77.024392 38.898149 900 G STREET NW
18 240266 7744 -77.032393 38.903455 1400 L STREET NW
19 310208 7519 -77.032210 38.914257 1800 14TH STREET NW
20 217995 7496 -77.032283 38.901032 1400 I STREET NW
21 279730 7399 -77.022099 38.894960 400 7TH STREET NW
22 271896 7275 -77.063026 38.905022 3200 M STREET NW
23 279125 7259 -77.032432 38.898968 1400 NEW YORK AVENUE NW
24 279837 7095 -77.020171 38.900648 600 I STREET NW
25 279940 7023 -77.022148 38.899967 800 7TH STREET NW
26 302001 6695 -77.032266 38.897161 1400 F STREET NW
27 309437 6663 -77.032331 38.916852 1400 U STREET NW
28 240654 6410 -77.030002 38.898741 700 13TH STREET NW
29 279200 6319 -77.034829 38.916864 1500 U STREET NW
... ... ... ... ... ...
71765 269942 1 -77.074741 38.921306 2417 OBSERVATORY PLACE NW
71766 71390 1 -76.989421 38.945481 1218 ALLISON STREET NE
71767 269937 1 -77.074898 38.921541 3814 BEECHER STREET NW
71768 269989 1 -77.075288 38.921553 3818 BEECHER STREET NW
71769 269993 1 -77.075198 38.921301 2422 OBSERVATORY PLACE NW
71770 270081 1 -77.075791 38.919761 2215 38TH STREET NW
71771 270041 1 -77.076816 38.919762 2215 39TH STREET NW
71772 71323 1 -76.989092 38.941994 1216 UPSHUR STREET NE
71773 270079 1 -77.075805 38.919660 2211 38TH STREET NW
71774 270078 1 -77.075783 38.919610 2209 38TH STREET NW
71775 228495 1 -77.019473 38.936204 3654 PARK PLACE NW
71776 270072 1 -77.076283 38.919860 2218 38TH STREET NW
71777 270069 1 -77.076278 38.920008 2224 38TH STREET NW
71778 228499 1 -77.020230 38.935924 623 PRINCETON PLACE NW
71779 270057 1 -77.076545 38.920478 3812 BENTON STREET NW
71780 228504 1 -77.020576 38.935876 633 PRINCETON PLACE NW
71781 228505 1 -77.020645 38.935866 635 PRINCETON PLACE NW
71782 71332 1 -76.941652 38.906244 1217 42ND PLACE NE
71783 269994 1 -77.075179 38.921258 2420 OBSERVATORY PLACE NW
71784 270035 1 -77.076278 38.921543 3838 BEECHER STREET NW
71785 270034 1 -77.076349 38.921547 3840 BEECHER STREET NW
71786 270032 1 -77.076475 38.921543 3844 BEECHER STREET NW
71787 270030 1 -77.076605 38.921547 3848 BEECHER STREET NW
71788 270020 1 -77.076111 38.921394 2450 HUIDEKOPER PLACE NW
71789 270013 1 -77.076736 38.920890 3821 BENTON STREET NW
71790 270008 1 -77.076420 38.920891 3811 BENTON STREET NW
71791 270007 1 -77.076358 38.920900 3809 BENTON STREET NW
71792 71341 1 -76.989490 38.894601 1217 D STREET NE
71793 269995 1 -77.075151 38.921210 2418 OBSERVATORY PLACE NW
71794 273805 1 -77.058208 38.903236 1022 29TH STREET NW

71795 rows × 5 columns


In [ ]:


In [ ]:
# vehicle versus everything else

In [ ]:


In [78]:
violation_code_by_state_df = df.groupby(['violation_code', 'rp_plate_state']).total.count().reset_index()
violation_code_by_state_df = violation_code_by_state_df.sort_values('total', ascending=False).reset_index(drop=True)
violation_code_by_state_df = violation_code_by_state_df.merge(fines_df, on='violation_code')
violation_code_by_state_df['estimated_fine_total'] = violation_code_by_state_df.total * violation_code_by_state_df.fine
violation_code_by_state_df.to_csv('../cleaned_data/violation_code_by_state_play_with_fine_info.tsv', sep='\t', index=False)

In [ ]:


In [82]:
holiday_df = df[df.holiday==True]

In [86]:
holiday_df_violation_code = holiday_df.groupby(['violation_code', 'rp_plate_state']).total.count().reset_index()

holiday_df_violation_code = holiday_df_violation_code.sort_values('total', ascending=False).reset_index(drop=True)
holiday_df_violation_code = holiday_df_violation_code.merge(fines_df, on='violation_code')
holiday_df_violation_code['estimated_fine_total'] = holiday_df_violation_code.total * holiday_df_violation_code.fine
holiday_df_violation_code.to_csv('../cleaned_data/on_holiday_violation_code_by_state_play_with_fine_info.tsv', sep='\t', index=False)

In [ ]:


In [ ]:


In [88]:
body_style_df_violation_code = df.groupby(['violation_code', 'body_style']).total.count().reset_index()

body_style_df_violation_code = body_style_df_violation_code.sort_values('total', ascending=False).reset_index(drop=True)
body_style_df_violation_code = body_style_df_violation_code.merge(fines_df, on='violation_code')
body_style_df_violation_code['estimated_fine_total'] = body_style_df_violation_code.total * body_style_df_violation_code.fine
body_style_df_violation_code.to_csv('../cleaned_data/body_style_violation_code_with_fine_info.tsv', sep='\t', index=False)

In [ ]:


In [ ]:


In [90]:
violation_code_df = df.groupby(['violation_code']).total.count().reset_index()

violation_code_df = violation_code_df.sort_values('total', ascending=False).reset_index(drop=True)
violation_code_df = violation_code_df.merge(fines_df, on='violation_code')
violation_code_df['estimated_fine_total'] = violation_code_df.total * violation_code_df.fine
# violation_code_df
violation_code_df.to_csv('../cleaned_data/violation_code_with_fine_info.tsv', sep='\t', index=False)

In [ ]: