In [396]:
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline

1. I want to make sure my Plate ID is a string. Can't lose the leading zeroes!

2. I don't think anyone's car was built in 0AD. Discard the '0's as NaN.

3. I want the dates to be dates! Read the read_csv documentation to find out how to make pandas automatically parse dates.


In [397]:
col_types = "str"
df = pd.read_csv("mini_violations.csv", na_values={'Vehicle Year':"0", 'Date First Observed': "0", 'Vehicle Expiration Date': "0"}, low_memory=False, dtype=col_types, parse_dates=["Issue Date"])
#giving parse_time also "Vehicle Expiration Date" as a element, it did not convert it, thus left out and dealt with with .apply later

In [398]:
df.columns


Out[398]:
Index(['Summons Number', 'Plate ID', 'Registration State', 'Plate Type',
       'Issue Date', 'Violation Code', 'Vehicle Body Type', 'Vehicle Make',
       'Issuing Agency', 'Street Code1', 'Street Code2', 'Street Code3',
       'Vehicle Expiration Date', 'Violation Location', 'Violation Precinct',
       'Issuer Precinct', 'Issuer Code', 'Issuer Command', 'Issuer Squad',
       'Violation Time', 'Time First Observed', 'Violation County',
       'Violation In Front Of Or Opposite', 'House Number', 'Street Name',
       'Intersecting Street', 'Date First Observed', 'Law Section',
       'Sub Division', 'Violation Legal Code', 'Days Parking In Effect    ',
       'From Hours In Effect', 'To Hours In Effect', 'Vehicle Color',
       'Unregistered Vehicle?', 'Vehicle Year', 'Meter Number',
       'Feet From Curb', 'Violation Post Code', 'Violation Description',
       'No Standing or Stopping Violation', 'Hydrant Violation',
       'Double Parking Violation'],
      dtype='object')

In [399]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 43 columns):
Summons Number                       999 non-null object
Plate ID                             999 non-null object
Registration State                   999 non-null object
Plate Type                           999 non-null object
Issue Date                           999 non-null datetime64[ns]
Violation Code                       999 non-null object
Vehicle Body Type                    962 non-null object
Vehicle Make                         975 non-null object
Issuing Agency                       999 non-null object
Street Code1                         999 non-null object
Street Code2                         999 non-null object
Street Code3                         999 non-null object
Vehicle Expiration Date              644 non-null object
Violation Location                   984 non-null object
Violation Precinct                   999 non-null object
Issuer Precinct                      999 non-null object
Issuer Code                          999 non-null object
Issuer Command                       999 non-null object
Issuer Squad                         999 non-null object
Violation Time                       995 non-null object
Time First Observed                  115 non-null object
Violation County                     936 non-null object
Violation In Front Of Or Opposite    882 non-null object
House Number                         854 non-null object
Street Name                          993 non-null object
Intersecting Street                  133 non-null object
Date First Observed                  68 non-null object
Law Section                          999 non-null object
Sub Division                         999 non-null object
Violation Legal Code                 4 non-null object
Days Parking In Effect               999 non-null object
From Hours In Effect                 999 non-null object
To Hours In Effect                   999 non-null object
Vehicle Color                        981 non-null object
Unregistered Vehicle?                999 non-null object
Vehicle Year                         571 non-null object
Meter Number                         995 non-null object
Feet From Curb                       999 non-null object
Violation Post Code                  4 non-null object
Violation Description                0 non-null object
No Standing or Stopping Violation    0 non-null object
Hydrant Violation                    0 non-null object
Double Parking Violation             0 non-null object
dtypes: datetime64[ns](1), object(42)
memory usage: 335.7+ KB

In [400]:
print(df['Issue Date'][0])


2013-08-04 00:00:00

In [401]:
df['Violation County'].value_counts()


Out[401]:
K     384
NY    283
Q     177
BX     69
R      23
Name: Violation County, dtype: int64

In [402]:
df.dtypes


Out[402]:
Summons Number                               object
Plate ID                                     object
Registration State                           object
Plate Type                                   object
Issue Date                           datetime64[ns]
Violation Code                               object
Vehicle Body Type                            object
Vehicle Make                                 object
Issuing Agency                               object
Street Code1                                 object
Street Code2                                 object
Street Code3                                 object
Vehicle Expiration Date                      object
Violation Location                           object
Violation Precinct                           object
Issuer Precinct                              object
Issuer Code                                  object
Issuer Command                               object
Issuer Squad                                 object
Violation Time                               object
Time First Observed                          object
Violation County                             object
Violation In Front Of Or Opposite            object
House Number                                 object
Street Name                                  object
Intersecting Street                          object
Date First Observed                          object
Law Section                                  object
Sub Division                                 object
Violation Legal Code                         object
Days Parking In Effect                       object
From Hours In Effect                         object
To Hours In Effect                           object
Vehicle Color                                object
Unregistered Vehicle?                        object
Vehicle Year                                 object
Meter Number                                 object
Feet From Curb                               object
Violation Post Code                          object
Violation Description                        object
No Standing or Stopping Violation            object
Hydrant Violation                            object
Double Parking Violation                     object
dtype: object

4. "Date first observed" is a pretty weird column, but it seems like it has a date hiding inside. Using a function with .apply, transform the string (e.g. "20140324") into a Python date.

--> Already done in the first step: "Make the 0's show up as NaN"

As "Vehicle Expiration Date" has the same format as "Date First observed" will also convert it with the same method


In [403]:
df["Date First Observed"].head(40)
#format "Date First Observed": 20130719
type(df["Date First Observed"])


Out[403]:
pandas.core.series.Series

In [404]:
import dateutil
from dateutil import parser
#https://dateutil.readthedocs.io/en/stable/parser.html
#dateutil.parser.parse(timestr, parserinfo=None)

In [405]:
#WHY IS IT A FLOAT? I DEFINED ALL COLUMNS TO BE STR!?
type(df["Date First Observed"][0])


Out[405]:
float

In [406]:
def date_str_to_date(timestr):
    convert_date_str = str(timestr)
    try:
        return dateutil.parser.parse(convert_date_str)
    except: 
        return convert_date_str

In [407]:
df['date_first_observed'] = df["Date First Observed"].apply(date_str_to_date)

In [408]:
#format "Vehicle Expiration Date": 20140831

df['vehicle_expiration'] = df["Vehicle Expiration Date"].apply(date_str_to_date)

5. "Violation time" is... not a time. Make it a time.


In [409]:
import time

def changing_time(messy_time):
    a = messy_time[0:2]
    b = messy_time[-3:-1]
    c = messy_time[-1:]
    d = a +":"+ b + c + "M"
    #return d
    try: 
        stripped_time = dateutil.parser.parse(d).time()
        return stripped_time
    except:
        return d

df['violation_time'] = df["Violation Time"].astype(str).apply(changing_time)

In [410]:
type(df['violation_time'][0])


Out[410]:
datetime.time

6. There sure are a lot of colors of cars, too bad so many of them are the same. Make "BLK" and "BLACK", "WT" and "WHITE", and any other combinations that you notice.


In [411]:
#df["Vehicle Color"].value_counts() 
df['vehicle_color'] = df["Vehicle Color"].replace(["WH", "WT", "WHT", "WHI"], "WHITE").replace(["BK", "BLK"], "BLACK").replace(["GY", "GRY", "GRAY"], "GREY").replace(["BL", "BLU"], "BLUE").replace("RD", "RED").replace(["OR","ORANG"], "ORANGE").replace(["GRN", "GR", "GN"], "GREEN").replace(["BR", "BRN", "BN", "BRWN"], "BROWN").replace(["SILVE", "SILVR", "SIL", "SL"],"SILVER").replace("GL", "GOLD").replace(["YELLO","YW"],"YELLOW").replace("TN", "TAN")

#TRIED FUNCTION AND LIST COMPREHENSION AND FOR LOOP, ALL THREW ERRORS AT SOME POINT

#def change_color(wrongspell, rightspell):
#    newdf = df[df["Vehicle Color"].replace(wrongspell, rightspell)]
#    return newdf
#change_color("WH", "WHITE")
#change_color("WHT", "WHITE")
#change_color("WT", "WHITE")

#wrong_spellings = ["WH", "WT", "WHT"]
#for spelling in wrong_spellings:
#    newdf["Vehicle Color"] = df["Vehicle Color"].replace(spelling, "WHITE")

#[df["Vehicle Color"].replace(spelling, "WHITE") for spelling in wrong_spellings]

In [412]:
df.columns


Out[412]:
Index(['Summons Number', 'Plate ID', 'Registration State', 'Plate Type',
       'Issue Date', 'Violation Code', 'Vehicle Body Type', 'Vehicle Make',
       'Issuing Agency', 'Street Code1', 'Street Code2', 'Street Code3',
       'Vehicle Expiration Date', 'Violation Location', 'Violation Precinct',
       'Issuer Precinct', 'Issuer Code', 'Issuer Command', 'Issuer Squad',
       'Violation Time', 'Time First Observed', 'Violation County',
       'Violation In Front Of Or Opposite', 'House Number', 'Street Name',
       'Intersecting Street', 'Date First Observed', 'Law Section',
       'Sub Division', 'Violation Legal Code', 'Days Parking In Effect    ',
       'From Hours In Effect', 'To Hours In Effect', 'Vehicle Color',
       'Unregistered Vehicle?', 'Vehicle Year', 'Meter Number',
       'Feet From Curb', 'Violation Post Code', 'Violation Description',
       'No Standing or Stopping Violation', 'Hydrant Violation',
       'Double Parking Violation', 'date_first_observed', 'vehicle_expiration',
       'violation_time', 'vehicle_color'],
      dtype='object')

7. Join the data with the Parking Violations Code dataset from the NYC Open Data site.


In [413]:
# selecting the columns needed to answer the remaining questions 
# above manipulated columns vehicle_color and date_first_observed not included, as not needed
selection= df[['Summons Number', 'Plate ID', 'Registration State', 'Violation County', 'Issue Date', 'violation_time', 'Violation Code']]

In [414]:
# make table column names look neater/having the same structure (no blank spaces within name!!!)
selection.columns = ['summons_number', 'plate_id', 'registration_state', 'violation_county', 'issue_date', 'violation_time', 'violation_code']

In [415]:
selection


Out[415]:
summons_number plate_id registration_state violation_county issue_date violation_time violation_code
0 1283294138 GBB9093 NY NaN 2013-08-04 07:52:00 46
1 1283294151 62416MB NY NY 2013-08-04 12:40:00 46
2 1283294163 78755JZ NY NY 2013-08-05 12:43:00 46
3 1283294175 63009MA NY NY 2013-08-05 14:32:00 46
4 1283294187 91648MC NY NY 2013-08-08 12:39:00 41
5 1283294217 T60DAR NJ NY 2013-08-11 18:17:00 14
6 1283294229 GCR2838 NY NY 2013-08-11 19:41:00 14
7 1283983620 XZ764G NJ K 2013-08-07 04:25:00 24
8 1283983631 GBH9379 NY K 2013-08-07 04:37:00 24
9 1283983667 MCL78B NJ K 2013-07-18 08:39:00 24
10 1283983679 M367CN NY K 2013-07-18 08:45:00 24
11 1283983734 GAR6813 NY K 2013-07-18 09:07:00 24
12 1283983771 GEN8674 NY K 2013-07-31 17:14:00 24
13 1283983825 GAC2703 NY K 2013-08-12 18:56:00 24
14 1286036800 40793JY NY NaN 2013-07-05 23:45:00 14
15 1286123550 GAD1485 NY K 2013-08-12 17:46:00 20
16 1286246398 GFC5338 NY K 2013-07-26 11:42:00 14
17 1286246416 815M342 MD K 2013-07-30 07:24:00 20
18 1286248000 GJA3452 NY K 2013-07-23 07:58:00 14
19 1286282330 YZY6476 NC K 2013-07-29 07:36:00 20
20 1286282342 WBJ819 LA K 2013-08-07 08:47:00 17
21 1286289841 GAV9235 NY K 2013-07-20 11:20:00 50
22 1286654920 ZTR66R NJ NY 2013-07-18 10:20:00 50
23 1286799648 GDE3973 NY NY 2013-07-20 15:24:00 40
24 1286807062 P193871 IL K 2013-08-10 10:18:00 40
25 1286807475 GGL6608 NY K 2013-07-26 07:43:00 40
26 1286807633 84301JU NY K 2013-07-19 08:50:00 14
27 1286808807 HHG3545 PA K 2013-08-06 09:15:00 20
28 1287320491 50097JX NY NY 2013-08-07 10:23:00 40
29 1287320570 GEJ9154 NY K 2013-08-06 11:20:00 51
... ... ... ... ... ... ... ...
969 1334886817 T600324C NY Q 2013-07-12 14:28:00 17
970 1334893391 GEF8039 NY NaN 2013-08-08 11:29:00 14
971 1334899332 GAR5973 NY Q 2013-05-28 10:59:00 20
972 1334899356 76860 NY Q 2013-07-11 10:50:00 20
973 1334900190 G44DBB NJ Q 2013-06-06 02:00:00 20
974 1334960409 GCP7978 NY NaN 2013-07-16 08:55:00 20
975 1334965547 T606011C NY BX 2013-08-18 19:30:00 40
976 1335085889 GEB4077 NY K 2013-08-05 13:21:00 24
977 1335086067 408861 RI NY 2013-08-04 13:15:00 24
978 1335087308 505GSB OK NaN 2013-08-21 21:50:00 20
979 1335087436 GGF7010 NY NaN 2013-08-14 01:53:00 24
980 1335087618 XH208F 99 NaN 2013-07-27 15:50:00 20
981 1335087930 GGB3579 NY K 2013-07-27 21:53:00 41
982 1335089433 49SD67 NY K 2013-08-05 23:30:00 51
983 1335089445 GDM5570 NY K 2013-08-20 20:10:00 20
984 1335089731 55RG48 NY K 2013-08-17 20:40:00 51
985 1335090538 GDF7589 NY K 2013-07-21 20:45:00 24
986 1335091440 PC613253 99 K 2013-07-27 21:25:00 24
987 1335092730 82GN94 NY K 2013-08-17 19:00:00 51
988 1335092742 47RE79 NY K 2013-08-17 19:00:00 51
989 1335092754 78GX62 NY K 2013-08-17 19:00:00 51
990 1335092766 49SD33 NY K 2013-08-20 19:40:00 51
991 1335092778 GDU8731 NY K 2013-08-21 01:20:00 24
992 1335093072 GEN7535 NY K 2013-07-23 00:40:00 24
993 1335093199 T529952C NY K 2013-08-01 00:05:00 50
994 1335093205 GBJ4073 NY K 2013-08-01 21:50:00 14
995 1335095342 JHA1050 PA K 2013-07-21 14:31:00 40
996 1335096139 72416R VA K 2013-08-11 14:13:00 24
997 1335096140 HPJ538 MS K 2013-08-11 14:20:00 24
998 1335096152 HYW9707 PA K 2013-08-19 16:27:00 24

999 rows × 7 columns


In [416]:
# in atom I replaced all the $ by nothing -- seemed more time efficient than doing it in pandas ;) 

vc_set = pd.read_csv("DOF_Parking_Violation_Codes.csv", dtype="str")

vc_set.columns = ["code", "definition", "manhattan_below_96", "all_other_areas"]
vc_set


Out[416]:
code definition manhattan_below_96 all_other_areas
0 10 Stopping, standing or parking where a sign, st... 115 115
1 11 Hotel Loading/Unloading: Standing or parking w... 115 115
2 12 Snow Emergency: Standing or parking where stan... 95 95
3 13 Taxi Stand: Standing or parking where standing... 115 115
4 14 General No Standing: Standing or parking where... 115 115
5 16 Truck Loading/Unloading: Standing or parking w... 95 95
6 17 Authorized Vehicles Only: Standing or parking ... 95 95
7 18 Bus Lane: Standing or parking where standing i... 115 115
8 19 Bus Stop: Standing or parking where standing i... 115 115
9 20 General No Parking: No parking where parking i... 65 60
10 21 Street Cleaning: No parking where parking is n... 65 45
11 22 Hotel Loading/Unloading: No parking where park... 60 60
12 23 Taxi Stand: No parking where parking is not al... 65 60
13 24 Authorized Vehicles Only: No parking where par... 65 60
14 25 Standing at a commuter van stop, other than te... 115 115
15 26 Standing at a for-hire vehicle stop, other tha... 115 115
16 27 No parking in a handicapped zone (off-street o... 180 180
17 28 Overtime standing (diplomat) 95 95
18 31 Standing of a non-commercial vehicle in a comm... 115 115
19 32 Parking at a broken or missing meter for longe... 65 35
20 33 "Feeding Meter" -- parking in a metered space ... 65 35
21 34 Expired Meter -- parking in a metered space wh... 65 35
22 35 Parking in a meter space for the purpose of di... 65 35
23 37-38 Muni Meter --\n(37) Parking in excess of the a... 65 35
24 39 Parking for longer than the maximum time permi... 65 60
25 69 Failing to show a muni-meter receipt, commerci... 65 65
26 40 Stopping, standing or parking closer than 15 f... 115 115
27 42 Parking in a Muni Metered space in a commercia... 65 35
28 43 Parking in a commercial metered zone in which ... 65 35
29 44 Parking in a commercial metered zone for longe... 65 35
... ... ... ... ...
54 71 Standing or parking a vehicle without showing ... 65 65
55 72 Standing or parking a vehicle with NY Plates a... 65 65
56 73 Standing or parking a vehicle showing an expir... 65 65
57 74 Standing or parking a vehicle without properly... 65 65
58 75 Standing or parking a vehicle in which the Lic... 65 65
59 77 Parking a bus, unless allowed by signs. A char... 65 45
60 78 Parking a Commercial Vehicle on a residential ... 65 65
61 79 For a bus without passengers, waiting at a cur... 115 115
62 80 Standing or parking a vehicle without head lam... 60 45
63 81 No standing except diplomat 95 95
64 82 Standing or parking a Commercial Vehicle unles... 115 115
65 83 Standing or parking a vehicle which is not pro... 65 65
66 84 Parking a Commercial Vehicle on any city stree... 65 45
67 85 Parking a Commercial Vehicle more than 3 hours... 65 65
68 86 Standing or parking a vehicle to make pickups,... 115 115
69 89 Standing or parking a vehicle in the Garment D... 115 115
70 91 Parking in order to sell a vehicle by a person... 65 45
71 92 Parking in order to wash, grease, or repair a ... 65 45
72 93 Stopping, standing or parking on paved roadway... 65 65
73 94 Vehicle Release Agreement code associated with... 100\n(Regular Tow, plus violation fine) 200 (Heavy Tow, plus violation fine)
74 96 Standing or parking within 50 feet of the near... 95 95
75 97 Parking in a vacant lot. A vehicle may be park... 65 45
76 98 Standing or parking in front of a public or pr... 95 95
77 99 All other parking, standing or stopping violat... vary vary
78 04 Vehicles parked illegally south of Houston Str... 115 NaN
79 05 Failure to make a right turn from a bus lane. 115 115
80 06 Parking a tractor-trailer on a residential str... 1st Offense - 265\n2nd Offense - 515 2nd Offense - 265\n2nd Offense - 515
81 07 Vehicles photographed going through a red ligh... 50 50
82 08 Vehicle idling in a restricted area. 115 115
83 09 Blocking an Intersection: Obstructing traffic ... 115 115

84 rows × 4 columns


In [417]:
combined_df = selection.merge (vc_set, left_on='violation_code', right_on='code')

In [418]:
combined_df.columns


Out[418]:
Index(['summons_number', 'plate_id', 'registration_state', 'violation_county',
       'issue_date', 'violation_time', 'violation_code', 'code', 'definition',
       'manhattan_below_96', 'all_other_areas'],
      dtype='object')

In [419]:
combined_df


Out[419]:
summons_number plate_id registration_state violation_county issue_date violation_time violation_code code definition manhattan_below_96 all_other_areas
0 1283294138 GBB9093 NY NaN 2013-08-04 07:52:00 46 46 Standing or parking on the roadway side of a v... 115 115
1 1283294151 62416MB NY NY 2013-08-04 12:40:00 46 46 Standing or parking on the roadway side of a v... 115 115
2 1283294163 78755JZ NY NY 2013-08-05 12:43:00 46 46 Standing or parking on the roadway side of a v... 115 115
3 1283294175 63009MA NY NY 2013-08-05 14:32:00 46 46 Standing or parking on the roadway side of a v... 115 115
4 1290834349 429J6JM NY Q 2013-07-23 10:37:00 46 46 Standing or parking on the roadway side of a v... 115 115
5 1290834374 42587JW NY Q 2013-07-30 11:19:00 46 46 Standing or parking on the roadway side of a v... 115 115
6 1291521665 YANKYJAM NY K 2013-07-02 19:42:00 46 46 Standing or parking on the roadway side of a v... 115 115
7 1293030739 GHG2484 NY Q 2013-08-11 01:30:00 46 46 Standing or parking on the roadway side of a v... 115 115
8 1293100924 64195MC NY K 2013-06-20 15:44:00 46 46 Standing or parking on the roadway side of a v... 115 115
9 1293101151 54100JA NY NY 2013-07-05 13:15:00 46 46 Standing or parking on the roadway side of a v... 115 115
10 1293525625 GGP1847 NY K 2013-07-29 21:45:00 46 46 Standing or parking on the roadway side of a v... 115 115
11 1294202492 XD377T NJ NY 2013-07-25 09:53:00 46 46 Standing or parking on the roadway side of a v... 115 115
12 1302446484 452WW4 MA K 2013-06-24 10:25:00 46 46 Standing or parking on the roadway side of a v... 115 115
13 1302446538 4274WY CT K 2013-07-10 10:15:00 46 46 Standing or parking on the roadway side of a v... 115 115
14 1306112679 97732JZ NY Q 2013-08-15 23:50:00 46 46 Standing or parking on the roadway side of a v... 115 115
15 1306114172 91628MC NY Q 2013-08-02 04:48:00 46 46 Standing or parking on the roadway side of a v... 115 115
16 1306114184 84614JR NY Q 2013-08-23 04:42:00 46 46 Standing or parking on the roadway side of a v... 115 115
17 1306114196 62092JL NY Q 2013-08-23 02:46:00 46 46 Standing or parking on the roadway side of a v... 115 115
18 1306116235 GGU4798 NY Q 2013-07-27 16:52:00 46 46 Standing or parking on the roadway side of a v... 115 115
19 1306206844 55197JJ NY NY 2013-08-06 16:10:00 46 46 Standing or parking on the roadway side of a v... 115 115
20 1307452577 55003JG NY NY 2013-08-12 09:40:00 46 46 Standing or parking on the roadway side of a v... 115 115
21 1307452590 40470JU 99 NaN 2013-08-12 09:20:00 46 46 Standing or parking on the roadway side of a v... 115 115
22 1321774722 61944JY NY NaN 2013-07-17 10:05:00 46 46 Standing or parking on the roadway side of a v... 115 115
23 1321774771 35210MB NY NaN 2013-07-17 12:14:00 46 46 Standing or parking on the roadway side of a v... 115 115
24 1321965783 GBN4223 NY K 2013-08-05 07:47:00 46 46 Standing or parking on the roadway side of a v... 115 115
25 1325733878 T635444C NY BX 2013-08-13 19:37:00 46 46 Standing or parking on the roadway side of a v... 115 115
26 1325972393 GBX9511 NY NY 2013-07-14 02:38:00 46 46 Standing or parking on the roadway side of a v... 115 115
27 1326475710 WZF5882 NY K 2013-08-11 19:25:00 46 46 Standing or parking on the roadway side of a v... 115 115
28 1295546516 GCY5976 NY Q 2013-07-25 15:13:00 46 46 Standing or parking on the roadway side of a v... 115 115
29 1295547089 62516JM NY Q 2013-07-19 14:30:00 46 46 Standing or parking on the roadway side of a v... 115 115
... ... ... ... ... ... ... ... ... ... ... ...
959 1306632572 45940MD NY BX 2013-08-01 23:45:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
960 1321688313 5X150411 AL Q 2013-07-15 21:47:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
961 1326367304 JJA6304 PA K 2013-07-30 04:23:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
962 1326367316 XZ758G NJ K 2013-07-30 04:30:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
963 1326367330 XAUM69 NJ K 2013-07-30 04:35:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
964 1326367341 XT254Y NJ K 2013-07-30 04:37:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
965 1326367377 ZBL9225 PA K 2013-08-09 03:35:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
966 1326367389 JJA6304 PA K 2013-08-09 03:36:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
967 1333437845 54059MC NY NY 2013-07-28 02:55:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
968 1311447684 XK134N NJ R 2013-07-21 22:06:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65
969 1307949484 GEZ5475 NY NY 2013-08-11 19:22:00 39 39 Parking for longer than the maximum time permi... 65 60
970 1307953128 N34ASW NJ R 2013-08-11 15:31:00 61 61 Except where angle parking is allowed, stoppin... 65 45
971 1308157344 GER6778 NY Q 2013-07-19 13:14:00 61 61 Except where angle parking is allowed, stoppin... 65 45
972 1334736820 GGN3919 NY K 2013-08-13 08:01:00 61 61 Except where angle parking is allowed, stoppin... 65 45
973 1307953141 85RS47 NY R 2013-08-15 18:41:00 45 45 Stopping, standing or parking in a traffic lan... 115 115
974 1333925347 97431JZ NY NY 2013-08-09 13:31:00 45 45 Stopping, standing or parking in a traffic lan... 115 115
975 1307953852 GFP6251 NY R 2013-08-24 13:28:00 27 27 No parking in a handicapped zone (off-street o... 180 180
976 1334878444 GGB5893 NY K 2013-07-22 11:31:00 27 27 No parking in a handicapped zone (off-street o... 180 180
977 1325855789 GBJ1134 NY Q 2013-07-21 14:00:00 67 67 Parking in front of a pedestrian ramp 165 165
978 1295547478 72044 RI Q 2013-07-17 17:55:00 67 67 Parking in front of a pedestrian ramp 165 165
979 1295547480 9N77D NY Q 2013-07-22 14:15:00 67 67 Parking in front of a pedestrian ramp 165 165
980 1295548021 GCG3830 NY Q 2013-07-24 15:55:00 67 67 Parking in front of a pedestrian ramp 165 165
981 1307964011 78439 MA NY 2013-07-26 20:14:00 67 67 Parking in front of a pedestrian ramp 165 165
982 1307964485 GAA1955 NY NY 2013-07-21 16:45:00 67 67 Parking in front of a pedestrian ramp 165 165
983 1311687713 7501500 NY NY 2013-07-26 06:47:00 67 67 Parking in front of a pedestrian ramp 165 165
984 1333609826 GCL1507 NY K 2013-07-18 06:45:00 75 75 Standing or parking a vehicle in which the Lic... 65 65
985 1308157320 GFS4162 NY Q 2013-07-19 12:29:00 80 80 Standing or parking a vehicle without head lam... 60 45
986 1333716310 N58APT NJ NY 2013-07-07 00:10:00 13 13 Taxi Stand: Standing or parking where standing... 115 115
987 1334012568 YUZ75Z NJ Q 2013-07-29 15:10:00 13 13 Taxi Stand: Standing or parking where standing... 115 115
988 1334032191 85677X IL Q 2013-07-31 11:10:00 99 99 All other parking, standing or stopping violat... vary vary

989 rows × 11 columns

8. How much money did NYC make off of parking violations?


In [420]:
all_fines = combined_df["manhattan_below_96"]
all_list = all_fines.sum().split(" ")

cleaned_fines =[]
for fine in all_list:
    try:
        int_fine = int(fine)
        cleaned_fines.append(int_fine)
    except:
        continue

money_nyc = sum(cleaned_fines)
print("NYC makes at least", money_nyc, " US-Dollar off parking violations.")


NYC makes at least 84270  US-Dollar off parking violations.

9. What's the most lucrative kind of parking violation? The most frequent?


In [421]:
#most lucrative:
combined_df["manhattan_below_96"].value_counts()


Out[421]:
65                                         535
115                                        314
95                                         125
165                                          7
100\n(Regular Tow, plus violation fine)      4
180                                          2
60                                           1
vary                                         1
Name: manhattan_below_96, dtype: int64

In [422]:
combined_df["all_other_areas"].value_counts()


Out[422]:
45                                      391
115                                     314
95                                      125
60                                      117
65                                       28
165                                       7
200 (Heavy Tow, plus violation fine)      4
180                                       2
vary                                      1
Name: all_other_areas, dtype: int64

In [423]:
combined_df[combined_df["all_other_areas"] == "200 (Heavy Tow, plus violation fine)"]


Out[423]:
summons_number plate_id registration_state violation_county issue_date violation_time violation_code code definition manhattan_below_96 all_other_areas
544 6020309034 GEW4836 99 NaN 2013-07-18 na:nanM 94 94 Vehicle Release Agreement code associated with... 100\n(Regular Tow, plus violation fine) 200 (Heavy Tow, plus violation fine)
545 6020310784 447ZRW CT NaN 2013-07-20 na:nanM 94 94 Vehicle Release Agreement code associated with... 100\n(Regular Tow, plus violation fine) 200 (Heavy Tow, plus violation fine)
546 6020314583 87336JV PA NaN 2013-07-26 na:nanM 94 94 Vehicle Release Agreement code associated with... 100\n(Regular Tow, plus violation fine) 200 (Heavy Tow, plus violation fine)
547 6020342748 HWD923 SC NaN 2013-07-22 na:nanM 94 94 Vehicle Release Agreement code associated with... 100\n(Regular Tow, plus violation fine) 200 (Heavy Tow, plus violation fine)

The highest fine of 200 Dollar is given outside of Manhattan and is associated with the "Violation Code" of 46


In [424]:
#to find out row index of Violation code 46:
vc_set['code'].head(40)

#find out the definition of 
vc_set.loc[31]["definition"]


Out[424]:
'Standing or parking on the roadway side of a vehicle stopped, standing or parked at the curb; in other words\xa0also known as "double parking". However,\xa0a person may stand a Commercial Vehicle alongside a vehicle parked at the curb at such locations and during such hours that stopping, standing and parking is allowed when quickly making pickups, deliveries or service calls. This is allowed if there is no parking space or marked loading zone on either side of the street within 100 feet. "Double parking" any type of vehicle is not allowed in Midtown Manhattan (the area from 14th Street to 60th Street, between First Avenue and Eighth Avenue inclusive). Midtown double parking is not allowed between 7:00am\xa0 – 7:00pm daily except Sundays. (See Code 47.)'

In [425]:
#most frequent:
combined_df['violation_code'].describe()


Out[425]:
count     989
unique     29
top        21
freq      387
Name: violation_code, dtype: object

Most freqent is "Violation code" 21, occurring with a frequency of 387


In [426]:
#to find out row index of Violation code 21:
#vc_set['code']

#find out the definition of 
vc_set.loc[10]["definition"]


Out[426]:
'Street Cleaning: No parking where parking is not allowed by sign, street marking or traffic control device.'

10. New Jersey has bad drivers, but does it have bad parkers, too? How much money does NYC make off of all non-New York vehicles?

Thoughts: 1) Group by Registration State 2) sum up fare for all_other_areas


In [427]:
all_non_ny = combined_df[combined_df["registration_state"] != 'NY']
fines_non_ny = all_non_ny['all_other_areas']
fine_list = []
for fine in fines_non_ny:
    clean_fine = fine.replace(' (Heavy Tow, plus violation fine)', '').replace('vary','')
    try:
        fine_list.append(int(clean_fine))
    except:
        continue

sum_fines = sum(fine_list)
print("NYC makes at least", sum_fines, "US-Dollar from vehicles that are not registered in NY")


NYC makes at least 30245 US-Dollar from vehicles that are not registered in NY

11. Make a chart of the top few.


In [428]:
def cleaning(value):
    if "(" in value:
        return 100
    if "vary" in value:
        return 2
    else:
        return int(value)

In [429]:
combined_df['manhattan_cleaned_fines'] = combined_df['manhattan_below_96'].apply(cleaning)

In [430]:
combined_df.groupby(by='registration_state')['manhattan_cleaned_fines'].sum().sort_values().plot(kind='barh', figsize=(20,10))


Out[430]:
<matplotlib.axes._subplots.AxesSubplot at 0x1143fbf28>

12. What time of day do people usually get their tickets? You can break the day up into several blocks - for example 12am-6am, 6am-12pm, 12pm-6pm, 6pm-12am.


In [431]:
type(combined_df['violation_time'][0])


Out[431]:
datetime.time

In [432]:
print(combined_df['violation_time'][0])


07:52:00

In [433]:
combined_df['violation_time'][0].hour


Out[433]:
7

In [434]:
def time_to_hour(value):
    try:
        return value.hour
    except:
        pass

In [435]:
combined_df["violation_hour"] = combined_df['violation_time'].apply(time_to_hour)
combined_df.head(3)


Out[435]:
summons_number plate_id registration_state violation_county issue_date violation_time violation_code code definition manhattan_below_96 all_other_areas manhattan_cleaned_fines violation_hour
0 1283294138 GBB9093 NY NaN 2013-08-04 07:52:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 7.0
1 1283294151 62416MB NY NY 2013-08-04 12:40:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 12.0
2 1283294163 78755JZ NY NY 2013-08-05 12:43:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 12.0

In [448]:
combined_df.groupby('violation_hour').count()['summons_number'].plot(kind="bar", figsize=(20,10))


Out[448]:
<matplotlib.axes._subplots.AxesSubplot at 0x1173cd978>

In [452]:
combined_df['violation_hour'].hist(bins=4)


Out[452]:
<matplotlib.axes._subplots.AxesSubplot at 0x117b6e198>

13. What's the average ticket cost in NYC?


In [303]:
number_of_all_tickets = combined_df['manhattan_below_96'].count()
number_of_all_tickets


Out[303]:
989

In [304]:
# taking the total money NYC makes from question 8  -- stored in variable "money_nyc"
average_ticket_cost = money_nyc/number_of_all_tickets
print("The average ticket cost in NYC is", round(average_ticket_cost), "US-dollar.")


The average ticket cost in NYC is 85.0 US-dollar.

14. Make a graph of the number of tickets per day.


In [305]:
combined_df.groupby('issue_date').count()
combined_df.groupby('issue_date').count()['summons_number']
plt.style.use('ggplot')
combined_df.groupby('issue_date').count()['summons_number'].plot(kind='bar', figsize=(20,10))


Out[305]:
<matplotlib.axes._subplots.AxesSubplot at 0x114288b70>

15. Make a graph of the amount of revenue collected per day.


In [306]:
#groupby date, sum revenue

In [307]:
copy_df = combined_df
copy_df.index = copy_df['issue_date']

copy_df.groupby("issue_date")['manhattan_cleaned_fines'].sum().plot(kind='bar', figsize=(20,10))


Out[307]:
<matplotlib.axes._subplots.AxesSubplot at 0x114a95978>

In [308]:
type(dates_frame['issue_date'][0])


Out[308]:
pandas.tslib.Timestamp

In [309]:
dates_frame


Out[309]:
summons_number plate_id registration_state violation_county issue_date violation_time violation_code code definition manhattan_below_96 all_other_areas manhattan_fines test test2 manhattan_cleaned_fines
0 1283294138 GBB9093 NY NaN 2013-08-04 07:52:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
1 1283294151 62416MB NY NY 2013-08-04 12:40:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
2 1283294163 78755JZ NY NY 2013-08-05 12:43:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
3 1283294175 63009MA NY NY 2013-08-05 14:32:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
4 1290834349 429J6JM NY Q 2013-07-23 10:37:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
5 1290834374 42587JW NY Q 2013-07-30 11:19:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
6 1291521665 YANKYJAM NY K 2013-07-02 19:42:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
7 1293030739 GHG2484 NY Q 2013-08-11 01:30:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
8 1293100924 64195MC NY K 2013-06-20 15:44:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
9 1293101151 54100JA NY NY 2013-07-05 13:15:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
10 1293525625 GGP1847 NY K 2013-07-29 21:45:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
11 1294202492 XD377T NJ NY 2013-07-25 09:53:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
12 1302446484 452WW4 MA K 2013-06-24 10:25:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
13 1302446538 4274WY CT K 2013-07-10 10:15:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
14 1306112679 97732JZ NY Q 2013-08-15 23:50:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
15 1306114172 91628MC NY Q 2013-08-02 04:48:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
16 1306114184 84614JR NY Q 2013-08-23 04:42:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
17 1306114196 62092JL NY Q 2013-08-23 02:46:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
18 1306116235 GGU4798 NY Q 2013-07-27 16:52:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
19 1306206844 55197JJ NY NY 2013-08-06 16:10:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
20 1307452577 55003JG NY NY 2013-08-12 09:40:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
21 1307452590 40470JU 99 NaN 2013-08-12 09:20:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
22 1321774722 61944JY NY NaN 2013-07-17 10:05:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
23 1321774771 35210MB NY NaN 2013-07-17 12:14:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
24 1321965783 GBN4223 NY K 2013-08-05 07:47:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
25 1325733878 T635444C NY BX 2013-08-13 19:37:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
26 1325972393 GBX9511 NY NY 2013-07-14 02:38:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
27 1326475710 WZF5882 NY K 2013-08-11 19:25:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
28 1295546516 GCY5976 NY Q 2013-07-25 15:13:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
29 1295547089 62516JM NY Q 2013-07-19 14:30:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 YAY 115 115
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
959 1306632572 45940MD NY BX 2013-08-01 23:45:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
960 1321688313 5X150411 AL Q 2013-07-15 21:47:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
961 1326367304 JJA6304 PA K 2013-07-30 04:23:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
962 1326367316 XZ758G NJ K 2013-07-30 04:30:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
963 1326367330 XAUM69 NJ K 2013-07-30 04:35:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
964 1326367341 XT254Y NJ K 2013-07-30 04:37:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
965 1326367377 ZBL9225 PA K 2013-08-09 03:35:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
966 1326367389 JJA6304 PA K 2013-08-09 03:36:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
967 1333437845 54059MC NY NY 2013-07-28 02:55:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
968 1311447684 XK134N NJ R 2013-07-21 22:06:00 78 78 Parking a Commercial Vehicle on a residential ... 65 65 65 YAY 65 65
969 1307949484 GEZ5475 NY NY 2013-08-11 19:22:00 39 39 Parking for longer than the maximum time permi... 65 60 65 YAY 65 65
970 1307953128 N34ASW NJ R 2013-08-11 15:31:00 61 61 Except where angle parking is allowed, stoppin... 65 45 65 YAY 65 65
971 1308157344 GER6778 NY Q 2013-07-19 13:14:00 61 61 Except where angle parking is allowed, stoppin... 65 45 65 YAY 65 65
972 1334736820 GGN3919 NY K 2013-08-13 08:01:00 61 61 Except where angle parking is allowed, stoppin... 65 45 65 YAY 65 65
973 1307953141 85RS47 NY R 2013-08-15 18:41:00 45 45 Stopping, standing or parking in a traffic lan... 115 115 115 YAY 115 115
974 1333925347 97431JZ NY NY 2013-08-09 13:31:00 45 45 Stopping, standing or parking in a traffic lan... 115 115 115 YAY 115 115
975 1307953852 GFP6251 NY R 2013-08-24 13:28:00 27 27 No parking in a handicapped zone (off-street o... 180 180 180 YAY 180 180
976 1334878444 GGB5893 NY K 2013-07-22 11:31:00 27 27 No parking in a handicapped zone (off-street o... 180 180 180 YAY 180 180
977 1325855789 GBJ1134 NY Q 2013-07-21 14:00:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
978 1295547478 72044 RI Q 2013-07-17 17:55:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
979 1295547480 9N77D NY Q 2013-07-22 14:15:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
980 1295548021 GCG3830 NY Q 2013-07-24 15:55:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
981 1307964011 78439 MA NY 2013-07-26 20:14:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
982 1307964485 GAA1955 NY NY 2013-07-21 16:45:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
983 1311687713 7501500 NY NY 2013-07-26 06:47:00 67 67 Parking in front of a pedestrian ramp 165 165 165 YAY 165 165
984 1333609826 GCL1507 NY K 2013-07-18 06:45:00 75 75 Standing or parking a vehicle in which the Lic... 65 65 65 YAY 65 65
985 1308157320 GFS4162 NY Q 2013-07-19 12:29:00 80 80 Standing or parking a vehicle without head lam... 60 45 60 YAY 60 60
986 1333716310 N58APT NJ NY 2013-07-07 00:10:00 13 13 Taxi Stand: Standing or parking where standing... 115 115 115 YAY 115 115
987 1334012568 YUZ75Z NJ Q 2013-07-29 15:10:00 13 13 Taxi Stand: Standing or parking where standing... 115 115 115 YAY 115 115
988 1334032191 85677X IL Q 2013-07-31 11:10:00 99 99 All other parking, standing or stopping violat... vary vary 2 2 2

989 rows × 15 columns


In [310]:
dates_frame = combined_df[combined_df['issue_date'] == combined_df['issue_date']]
rashida = dates_frame.groupby('issue_date')
rashida.head()


Out[310]:
summons_number plate_id registration_state violation_county issue_date violation_time violation_code code definition manhattan_below_96 all_other_areas manhattan_cleaned_fines violation_hour
issue_date
2013-08-04 1283294138 GBB9093 NY NaN 2013-08-04 07:52:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 7.0
2013-08-04 1283294151 62416MB NY NY 2013-08-04 12:40:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 12.0
2013-08-05 1283294163 78755JZ NY NY 2013-08-05 12:43:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 12.0
2013-08-05 1283294175 63009MA NY NY 2013-08-05 14:32:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 14.0
2013-07-23 1290834349 429J6JM NY Q 2013-07-23 10:37:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 10.0
2013-07-30 1290834374 42587JW NY Q 2013-07-30 11:19:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 11.0
2013-07-02 1291521665 YANKYJAM NY K 2013-07-02 19:42:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 19.0
2013-08-11 1293030739 GHG2484 NY Q 2013-08-11 01:30:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 1.0
2013-06-20 1293100924 64195MC NY K 2013-06-20 15:44:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 15.0
2013-07-05 1293101151 54100JA NY NY 2013-07-05 13:15:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 13.0
2013-07-29 1293525625 GGP1847 NY K 2013-07-29 21:45:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 21.0
2013-07-25 1294202492 XD377T NJ NY 2013-07-25 09:53:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 9.0
2013-06-24 1302446484 452WW4 MA K 2013-06-24 10:25:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 10.0
2013-07-10 1302446538 4274WY CT K 2013-07-10 10:15:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 10.0
2013-08-15 1306112679 97732JZ NY Q 2013-08-15 23:50:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 23.0
2013-08-02 1306114172 91628MC NY Q 2013-08-02 04:48:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 4.0
2013-08-23 1306114184 84614JR NY Q 2013-08-23 04:42:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 4.0
2013-08-23 1306114196 62092JL NY Q 2013-08-23 02:46:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 2.0
2013-07-27 1306116235 GGU4798 NY Q 2013-07-27 16:52:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 16.0
2013-08-06 1306206844 55197JJ NY NY 2013-08-06 16:10:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 16.0
2013-08-12 1307452577 55003JG NY NY 2013-08-12 09:40:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 9.0
2013-08-12 1307452590 40470JU 99 NaN 2013-08-12 09:20:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 9.0
2013-07-17 1321774722 61944JY NY NaN 2013-07-17 10:05:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 10.0
2013-07-17 1321774771 35210MB NY NaN 2013-07-17 12:14:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 12.0
2013-08-05 1321965783 GBN4223 NY K 2013-08-05 07:47:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 7.0
2013-08-13 1325733878 T635444C NY BX 2013-08-13 19:37:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 19.0
2013-07-14 1325972393 GBX9511 NY NY 2013-07-14 02:38:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 2.0
2013-08-11 1326475710 WZF5882 NY K 2013-08-11 19:25:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 19.0
2013-07-25 1295546516 GCY5976 NY Q 2013-07-25 15:13:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 15.0
2013-07-19 1295547089 62516JM NY Q 2013-07-19 14:30:00 46 46 Standing or parking on the roadway side of a v... 115 115 115 14.0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2013-08-25 1326413156 T605555C NY Q 2013-08-25 02:13:00 40 40 Stopping, standing or parking closer than 15 f... 115 115 115 2.0
2013-08-16 1307964412 GFS8424 NY NY 2013-08-16 16:27:00 40 40 Stopping, standing or parking closer than 15 f... 115 115 115 16.0
2013-07-07 1333717143 951TK9 MA NY 2013-07-07 03:07:00 40 40 Stopping, standing or parking closer than 15 f... 115 115 115 3.0
2013-07-14 1333718809 T93AUT NJ NY 2013-07-14 00:03:00 40 40 Stopping, standing or parking closer than 15 f... 115 115 115 0.0
2013-08-17 1335089731 55RG48 NY K 2013-08-17 20:40:00 51 51 Stopping, standing or parking on a sidewalk. 115 115 115 20.0
2013-08-20 1335092766 49SD33 NY K 2013-08-20 19:40:00 51 51 Stopping, standing or parking on a sidewalk. 115 115 115 19.0
2013-06-20 1293094389 GDS9810 NJ NY 2013-06-20 17:32:00 31 31 Standing of a non-commercial vehicle in a comm... 115 115 115 17.0
2013-03-20 1287603683 P774757 IL K 2013-03-20 03:00:00 85 85 Parking a Commercial Vehicle more than 3 hours... 65 65 65 3.0
2013-07-14 1292755052 GBB7685 NY Q 2013-07-14 18:00:00 19 19 Bus Stop: Standing or parking where standing i... 115 115 115 18.0
2013-07-15 1292756792 GDX8921 99 NaN 2013-07-15 10:53:00 19 19 Bus Stop: Standing or parking where standing i... 115 115 115 10.0
2013-08-21 1311690335 93393JN NY NY 2013-08-21 19:01:00 19 19 Bus Stop: Standing or parking where standing i... 115 115 115 19.0
2013-08-16 1333719103 GHF9136 NY BX 2013-08-16 09:13:00 19 19 Bus Stop: Standing or parking where standing i... 115 115 115 9.0
2013-07-12 1302494910 GGK6345 PA K 2013-07-12 15:19:00 74 74 Standing or parking a vehicle without properly... 65 65 65 15.0
2013-08-03 1326400241 T619288C NY Q 2013-08-03 20:00:00 98 98 Standing or parking in front of a public or pr... 95 95 95 20.0
2013-08-03 1295546875 GDX6432 NY Q 2013-08-03 18:32:00 98 98 Standing or parking in front of a public or pr... 95 95 95 18.0
2013-08-16 1305348461 WVE5014 VA K 2013-08-16 02:35:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 2.0
2013-08-21 1321931773 609ZLS CO K 2013-08-21 11:50:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 11.0
2013-09-12 1321964500 GGF7236 NY K 2013-09-12 02:10:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 2.0
2013-07-11 1325680606 GEL3623 NY NY 2013-07-11 09:45:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 9.0
2013-08-23 1334436733 GBY1367 NY K 2013-08-23 07:47:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 7.0
2013-08-23 1334436745 M49DEE NJ K 2013-08-23 07:49:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 7.0
2013-08-23 1334436757 GCN3350 NY K 2013-08-23 07:50:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 7.0
2013-08-22 1334437440 JBF2138 PA K 2013-08-22 08:08:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 8.0
2013-08-22 1334437452 GAV9209 NY K 2013-08-22 08:09:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 8.0
2013-08-22 1334437464 R141607 IL K 2013-08-22 08:10:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 8.0
2013-08-22 1334437476 54260MB NY K 2013-08-22 08:12:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 8.0
2013-08-22 1334504660 YSY13U NJ NY 2013-08-22 11:45:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 11.0
2031-08-01 1334584655 GDN8900 99 NY 2031-08-01 09:06:00 21 21 Street Cleaning: No parking where parking is n... 65 45 65 9.0
2013-07-07 1333718767 NWW81U NJ NY 2013-07-07 00:33:00 16 16 Truck Loading/Unloading: Standing or parking w... 95 95 95 0.0
2013-08-24 1307953852 GFP6251 NY R 2013-08-24 13:28:00 27 27 No parking in a handicapped zone (off-street o... 180 180 180 13.0

265 rows × 13 columns


In [311]:
dates_frame = combined_df[combined_df['issue_date'] == combined_df['issue_date']]
by_date = dates_frame.groupby('issue_date')
type(by_date)
[date[0] for date in by_date["issue_date"]]
# [date for date in dates_frame["issue_date"]]


Out[311]:
[Timestamp('2013-03-20 00:00:00'),
 Timestamp('2013-04-12 00:00:00'),
 Timestamp('2013-05-24 00:00:00'),
 Timestamp('2013-05-28 00:00:00'),
 Timestamp('2013-06-06 00:00:00'),
 Timestamp('2013-06-07 00:00:00'),
 Timestamp('2013-06-18 00:00:00'),
 Timestamp('2013-06-20 00:00:00'),
 Timestamp('2013-06-24 00:00:00'),
 Timestamp('2013-06-28 00:00:00'),
 Timestamp('2013-07-02 00:00:00'),
 Timestamp('2013-07-05 00:00:00'),
 Timestamp('2013-07-06 00:00:00'),
 Timestamp('2013-07-07 00:00:00'),
 Timestamp('2013-07-09 00:00:00'),
 Timestamp('2013-07-10 00:00:00'),
 Timestamp('2013-07-11 00:00:00'),
 Timestamp('2013-07-12 00:00:00'),
 Timestamp('2013-07-13 00:00:00'),
 Timestamp('2013-07-14 00:00:00'),
 Timestamp('2013-07-15 00:00:00'),
 Timestamp('2013-07-16 00:00:00'),
 Timestamp('2013-07-17 00:00:00'),
 Timestamp('2013-07-18 00:00:00'),
 Timestamp('2013-07-19 00:00:00'),
 Timestamp('2013-07-20 00:00:00'),
 Timestamp('2013-07-21 00:00:00'),
 Timestamp('2013-07-22 00:00:00'),
 Timestamp('2013-07-23 00:00:00'),
 Timestamp('2013-07-24 00:00:00'),
 Timestamp('2013-07-25 00:00:00'),
 Timestamp('2013-07-26 00:00:00'),
 Timestamp('2013-07-27 00:00:00'),
 Timestamp('2013-07-28 00:00:00'),
 Timestamp('2013-07-29 00:00:00'),
 Timestamp('2013-07-30 00:00:00'),
 Timestamp('2013-07-31 00:00:00'),
 Timestamp('2013-08-01 00:00:00'),
 Timestamp('2013-08-02 00:00:00'),
 Timestamp('2013-08-03 00:00:00'),
 Timestamp('2013-08-04 00:00:00'),
 Timestamp('2013-08-05 00:00:00'),
 Timestamp('2013-08-06 00:00:00'),
 Timestamp('2013-08-07 00:00:00'),
 Timestamp('2013-08-08 00:00:00'),
 Timestamp('2013-08-09 00:00:00'),
 Timestamp('2013-08-10 00:00:00'),
 Timestamp('2013-08-11 00:00:00'),
 Timestamp('2013-08-12 00:00:00'),
 Timestamp('2013-08-13 00:00:00'),
 Timestamp('2013-08-14 00:00:00'),
 Timestamp('2013-08-15 00:00:00'),
 Timestamp('2013-08-16 00:00:00'),
 Timestamp('2013-08-17 00:00:00'),
 Timestamp('2013-08-18 00:00:00'),
 Timestamp('2013-08-19 00:00:00'),
 Timestamp('2013-08-20 00:00:00'),
 Timestamp('2013-08-21 00:00:00'),
 Timestamp('2013-08-22 00:00:00'),
 Timestamp('2013-08-23 00:00:00'),
 Timestamp('2013-08-24 00:00:00'),
 Timestamp('2013-08-25 00:00:00'),
 Timestamp('2013-09-12 00:00:00'),
 Timestamp('2015-08-06 00:00:00'),
 Timestamp('2031-08-01 00:00:00')]

In [312]:
# group by date, sum fines per day

dates_frame = combined_df[combined_df['issue_date'] == combined_df['issue_date']]
by_date = dates_frame.groupby('issue_date')

by_dates_list = []
for value in by_date['manhattan_below_96'].sum():
    cleaned_value_list = value.replace("(Regular Tow, plus violation fine)", "").replace("vary", "").split()
    revenue_int_list = [int(value) for value in cleaned_value_list]
    revenue_sum = sum(revenue_int_list)
    individual_date = [date[0] for date in by_date["issue_date"]]
    date_dict={'date': "individual_date", 'revenue': revenue_sum}
    by_dates_list.append(date_dict)

by_dates_list


Out[312]:
[{'date': 'individual_date', 'revenue': 65},
 {'date': 'individual_date', 'revenue': 65},
 {'date': 'individual_date', 'revenue': 95},
 {'date': 'individual_date', 'revenue': 65},
 {'date': 'individual_date', 'revenue': 65},
 {'date': 'individual_date', 'revenue': 115},
 {'date': 'individual_date', 'revenue': 95},
 {'date': 'individual_date', 'revenue': 230},
 {'date': 'individual_date', 'revenue': 230},
 {'date': 'individual_date', 'revenue': 95},
 {'date': 'individual_date', 'revenue': 210},
 {'date': 'individual_date', 'revenue': 460},
 {'date': 'individual_date', 'revenue': 1015},
 {'date': 'individual_date', 'revenue': 670},
 {'date': 'individual_date', 'revenue': 400},
 {'date': 'individual_date', 'revenue': 485},
 {'date': 'individual_date', 'revenue': 320},
 {'date': 'individual_date', 'revenue': 485},
 {'date': 'individual_date', 'revenue': 1060},
 {'date': 'individual_date', 'revenue': 690},
 {'date': 'individual_date', 'revenue': 595},
 {'date': 'individual_date', 'revenue': 845},
 {'date': 'individual_date', 'revenue': 1210},
 {'date': 'individual_date', 'revenue': 2260},
 {'date': 'individual_date', 'revenue': 2110},
 {'date': 'individual_date', 'revenue': 2110},
 {'date': 'individual_date', 'revenue': 2945},
 {'date': 'individual_date', 'revenue': 1715},
 {'date': 'individual_date', 'revenue': 1790},
 {'date': 'individual_date', 'revenue': 1850},
 {'date': 'individual_date', 'revenue': 1540},
 {'date': 'individual_date', 'revenue': 4080},
 {'date': 'individual_date', 'revenue': 2100},
 {'date': 'individual_date', 'revenue': 795},
 {'date': 'individual_date', 'revenue': 3085},
 {'date': 'individual_date', 'revenue': 3925},
 {'date': 'individual_date', 'revenue': 1350},
 {'date': 'individual_date', 'revenue': 3715},
 {'date': 'individual_date', 'revenue': 3245},
 {'date': 'individual_date', 'revenue': 1130},
 {'date': 'individual_date', 'revenue': 2105},
 {'date': 'individual_date', 'revenue': 5200},
 {'date': 'individual_date', 'revenue': 3090},
 {'date': 'individual_date', 'revenue': 1410},
 {'date': 'individual_date', 'revenue': 850},
 {'date': 'individual_date', 'revenue': 1285},
 {'date': 'individual_date', 'revenue': 1905},
 {'date': 'individual_date', 'revenue': 2215},
 {'date': 'individual_date', 'revenue': 3220},
 {'date': 'individual_date', 'revenue': 2135},
 {'date': 'individual_date', 'revenue': 1520},
 {'date': 'individual_date', 'revenue': 830},
 {'date': 'individual_date', 'revenue': 980},
 {'date': 'individual_date', 'revenue': 1095},
 {'date': 'individual_date', 'revenue': 2445},
 {'date': 'individual_date', 'revenue': 1435},
 {'date': 'individual_date', 'revenue': 880},
 {'date': 'individual_date', 'revenue': 570},
 {'date': 'individual_date', 'revenue': 585},
 {'date': 'individual_date', 'revenue': 750},
 {'date': 'individual_date', 'revenue': 370},
 {'date': 'individual_date', 'revenue': 305},
 {'date': 'individual_date', 'revenue': 65},
 {'date': 'individual_date', 'revenue': 115},
 {'date': 'individual_date', 'revenue': 65}]

In [313]:
by_dates_df = pd.DataFrame(by_dates_list)
by_dates_df.plot(kind='bar', figsize=(20,10))


Out[313]:
<matplotlib.axes._subplots.AxesSubplot at 0x110895b38>

16. Manually construct a dataframe out of https://dmv.ny.gov/statistic/2015licinforce-web.pdf (only NYC boroughts - bronx, queens, manhattan, staten island, brooklyn), having columns for borough name, abbreviation, and number of licensed drivers.


In [314]:
licensed_drivers = pd.read_csv("licensed_drivers.csv")
selection_licensed = licensed_drivers.tail(5)[["County", "Total"]]
selection_licensed


Out[314]:
County Total
59 Bronx 456847
60 Brooklyn 983769
61 New York 748583
62 Queens 1134064
63 Staten Island 304250

In [315]:
abbreviation = [{'state': "Staten Island", 'abbreviation': "K"}, {'state': "Brooklyn", 'abbreviation': "R"}, {'state':"Queens", 'abbreviation':"Q"}, {'state':"Bronx", 'abbreviation': "BX"}, {'state':"New York", 'abbreviation':"NY"}]
abbrev_table = pd.DataFrame(abbreviation)
abbrev_table


Out[315]:
abbreviation state
0 K Staten Island
1 R Brooklyn
2 Q Queens
3 BX Bronx
4 NY New York

In [316]:
joined_table = abbrev_table.merge(selection_licensed, left_on="state", right_on="County")
licensed_df = joined_table[['County', 'abbreviation', 'Total']]
licensed_df.columns = ("borough name", "abbreviation", "number of licensed drivers")
licensed_df


Out[316]:
borough name abbreviation number of licensed drivers
0 Staten Island K 304250
1 Brooklyn R 983769
2 Queens Q 1134064
3 Bronx BX 456847
4 New York NY 748583

17. What's the parking-ticket-cost-per-licensed-driver in each borough of NYC? Do this with pandas and the dataframe you just made, not with your head!


In [317]:
#1) Get total violation cost per borough
#2) Divide it by number of licensed drivers

In [318]:
df_17 = combined_df[['summons_number', 'violation_county', 'manhattan_below_96', 'all_other_areas']]
df_17


Out[318]:
summons_number violation_county manhattan_below_96 all_other_areas
issue_date
2013-08-04 1283294138 NaN 115 115
2013-08-04 1283294151 NY 115 115
2013-08-05 1283294163 NY 115 115
2013-08-05 1283294175 NY 115 115
2013-07-23 1290834349 Q 115 115
2013-07-30 1290834374 Q 115 115
2013-07-02 1291521665 K 115 115
2013-08-11 1293030739 Q 115 115
2013-06-20 1293100924 K 115 115
2013-07-05 1293101151 NY 115 115
2013-07-29 1293525625 K 115 115
2013-07-25 1294202492 NY 115 115
2013-06-24 1302446484 K 115 115
2013-07-10 1302446538 K 115 115
2013-08-15 1306112679 Q 115 115
2013-08-02 1306114172 Q 115 115
2013-08-23 1306114184 Q 115 115
2013-08-23 1306114196 Q 115 115
2013-07-27 1306116235 Q 115 115
2013-08-06 1306206844 NY 115 115
2013-08-12 1307452577 NY 115 115
2013-08-12 1307452590 NaN 115 115
2013-07-17 1321774722 NaN 115 115
2013-07-17 1321774771 NaN 115 115
2013-08-05 1321965783 K 115 115
2013-08-13 1325733878 BX 115 115
2013-07-14 1325972393 NY 115 115
2013-08-11 1326475710 K 115 115
2013-07-25 1295546516 Q 115 115
2013-07-19 1295547089 Q 115 115
... ... ... ... ...
2013-08-01 1306632572 BX 65 65
2013-07-15 1321688313 Q 65 65
2013-07-30 1326367304 K 65 65
2013-07-30 1326367316 K 65 65
2013-07-30 1326367330 K 65 65
2013-07-30 1326367341 K 65 65
2013-08-09 1326367377 K 65 65
2013-08-09 1326367389 K 65 65
2013-07-28 1333437845 NY 65 65
2013-07-21 1311447684 R 65 65
2013-08-11 1307949484 NY 65 60
2013-08-11 1307953128 R 65 45
2013-07-19 1308157344 Q 65 45
2013-08-13 1334736820 K 65 45
2013-08-15 1307953141 R 115 115
2013-08-09 1333925347 NY 115 115
2013-08-24 1307953852 R 180 180
2013-07-22 1334878444 K 180 180
2013-07-21 1325855789 Q 165 165
2013-07-17 1295547478 Q 165 165
2013-07-22 1295547480 Q 165 165
2013-07-24 1295548021 Q 165 165
2013-07-26 1307964011 NY 165 165
2013-07-21 1307964485 NY 165 165
2013-07-26 1311687713 NY 165 165
2013-07-18 1333609826 K 65 65
2013-07-19 1308157320 Q 60 45
2013-07-07 1333716310 NY 115 115
2013-07-29 1334012568 Q 115 115
2013-07-31 1334032191 Q vary vary

989 rows × 4 columns


In [319]:
fine_dict_list = []
def get_fines(a,b):
    viol = df_17[df_17['violation_county'] == a]
    fines = viol[b]

    fines_list = []
    for fine in fines:
        try:
            fines_list.append(int(fine))
        except:
            continue
    fine_per_borough = sum(fines_list)
    dict_fines = {'state': a, 'total_fines_in_borough': fine_per_borough}
    fine_dict_list.append(dict_fines)

fines_per_borough = fine_dict_list

get_fines('NY', 'manhattan_below_96')
get_fines('K', 'all_other_areas')
get_fines('BX', 'all_other_areas')
get_fines('R', 'all_other_areas')
get_fines('Q', 'all_other_areas')

In [320]:
fines_per_borough


Out[320]:
[{'state': 'NY', 'total_fines_in_borough': 26000},
 {'state': 'K', 'total_fines_in_borough': 21830},
 {'state': 'BX', 'total_fines_in_borough': 6650},
 {'state': 'R', 'total_fines_in_borough': 1840},
 {'state': 'Q', 'total_fines_in_borough': 16755}]

In [321]:
borough_fines = pd.DataFrame(fines_per_borough)

In [322]:
joined_df = licensed_df.merge(borough_fines, left_on="abbreviation", right_on="state")
joined_df


Out[322]:
borough name abbreviation number of licensed drivers state total_fines_in_borough
0 Staten Island K 304250 K 21830
1 Brooklyn R 983769 R 1840
2 Queens Q 1134064 Q 16755
3 Bronx BX 456847 BX 6650
4 New York NY 748583 NY 26000

In [323]:
joined_df['parking-ticket-cost-per-licensed-driver'] = joined_df['total_fines_in_borough'] / joined_df['number of licensed drivers']

In [324]:
joined_df


Out[324]:
borough name abbreviation number of licensed drivers state total_fines_in_borough parking-ticket-cost-per-licensed-driver
0 Staten Island K 304250 K 21830 0.071750
1 Brooklyn R 983769 R 1840 0.001870
2 Queens Q 1134064 Q 16755 0.014774
3 Bronx BX 456847 BX 6650 0.014556
4 New York NY 748583 NY 26000 0.034732

Either my calculation went wrong somewhere or the fines are collected just by a very few drivers.


In [ ]: