In [1]:
import glob
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_columns', 500)

In [3]:
files = glob.glob('../vz-hackathon/**')

In [4]:
moving_violations = [f for f in files if 'Moving_Violations_in' in f]

In [5]:
parking_violations = [f for f in files if 'Parking_Violations_in' in f]

In [6]:
len(moving_violations)


Out[6]:
68

In [7]:
list_ = []
for file_ in moving_violations:
    df = pd.read_csv(file_,index_col=None, header=0)
    
    filename = file_[len('../vz-hackathon/'):]
    
    df['filename'] = filename
    
    if filename ==  'Moving_Violations_in_July_2014.csv':
        
        df['TICKETISSUEDATE'] = df['TICKET_ISSUE_DATE']
        
        df['ROW_'] = df['ROWID_']
        
        df['VIOLATIONCODE'] = df['VIOLATION_CODE']
        
        df['VIOLATIONDESC'] = df['VIOLATION_DESCRIPTION']
            
    list_.append(df)
    
frame = pd.concat(list_)
# TESTs
assert len(frame) == 3993673
assert frame.filename.nunique() == len(moving_violations)

frame.columns = [col.lower() for col in frame.columns]
frame = frame.reset_index(drop=True)

# convert dt string to object
# frame['ticketissuedate'] = pd.to_datetime(frame.ticketissuedate)

In [8]:
# DELETE the Moving_Violations_in_July_2014.csv mess
del frame['ticket_issue_date'] 
del frame['day_of_week'] # only appears in one file

# Possible deletes -- only appears in one file - Moving_Violations_in_July_2014.csv
del frame['body_style'] # only appears in one file - Moving_Violations_in_July_2014.csv
del frame['month_of_year'] # only appears in one file - Moving_Violations_in_July_2014.csv
del frame['holiday'] # only appears in one file - Moving_Violations_in_July_2014.csv
del frame['issue_time'] # only appears in one file - Moving_Violations_in_July_2014.csv
del frame['rp_plate_state']
del frame['week_of_year']
del frame['row_id']
del frame['rowid_']
del frame['violation_code']
del frame['violation_description']

In [9]:
# Feature creation
frame['accidentindicator'] = frame.accidentindicator.map({'No': False, 'Yes':True})

In [10]:
#drop duplications
frame.drop_duplicates(subset='row_', inplace=True)

In [11]:
frame.head(2)


Out[11]:
accidentindicator address_id agencyid fineamt location objectid penalty1 penalty2 row_ streetsegid ticketissuedate tickettype totalpaid violationcode violationdesc xcoord y ycoord filename x
0 False 31943.0 25.0 50.0 3700 BLK SOUTHERN AVENUE SE (SW/B) 119 50.0 NaN 195854 1435.0 2009-04-01T00:00:00.000Z Photo 0.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT 404133.54 38.859665 132428.01 Moving_Violations_in_April_2009.csv -76.952377
1 False 301049.0 25.0 50.0 3400 BLK BENNING ROAD NE W/B 120 NaN NaN 194461 6467.0 2009-04-01T00:00:00.000Z Photo 50.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT 404151.99 38.896986 136570.94 Moving_Violations_in_April_2009.csv -76.952139

In [12]:
# frame.to_json('moving_violations.json')
# frame.to_csv('moving_violations.csv', sep='\t', index=False)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
p

In [ ]:


In [ ]:


In [13]:
frame.sample(100000).to_csv('moving_violations_sample.csv', sep='\t', index=False)

In [14]:
frame.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3801217 entries, 0 to 3993672
Data columns (total 20 columns):
accidentindicator    object
address_id           float64
agencyid             float64
fineamt              float64
location             object
objectid             int64
penalty1             float64
penalty2             float64
row_                 int64
streetsegid          float64
ticketissuedate      object
tickettype           object
totalpaid            float64
violationcode        object
violationdesc        object
xcoord               float64
y                    float64
ycoord               float64
filename             object
x                   float64
dtypes: float64(11), int64(2), object(7)
memory usage: 609.0+ MB

In [ ]:


In [3]:
# cleaning steps from Nicole
df = pd.read_csv('/Users/nicole/Documents/Data Science/code_for_dc/moving_violations.csv')


/Applications/anaconda/envs/codedc/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (1,4,5,17,20,21,25,26) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [4]:
df.shape


Out[4]:
(3993673, 33)

In [5]:
df.isnull().sum()


Out[5]:
Unnamed: 0                     0
accidentindicator         154379
address_id               1115882
agencyid                  154384
body_style               3864793
day_of_week              3839294
fineamt                   308409
holiday                  3839294
issue_time               3839378
location                       0
month_of_year            3839294
objectid                       0
penalty1                 2812133
penalty2                 3993673
rowid_                   3839294
row_                           0
row_id                   3993673
rp_plate_state           3839458
streetsegid               110277
ticketissuedate                0
tickettype                154379
ticket_issue_date        3839294
totalpaid                 154379
violationcode                  0
violationdesc               4327
violation_code           3839294
violation_description    3843512
week_of_year             3839294
xcoord                         0
y                              0
ycoord                         0
filename                       0
x                             0
dtype: int64

In [6]:
# drop two  columns that contain all null values- penalty2, row_id
df.drop(['penalty2', 'row_id'], axis=1, inplace=True)

In [7]:
print df.shape
print df.isnull().sum()


(3993673, 31)
Unnamed: 0                     0
accidentindicator         154379
address_id               1115882
agencyid                  154384
body_style               3864793
day_of_week              3839294
fineamt                   308409
holiday                  3839294
issue_time               3839378
location                       0
month_of_year            3839294
objectid                       0
penalty1                 2812133
rowid_                   3839294
row_                           0
rp_plate_state           3839458
streetsegid               110277
ticketissuedate                0
tickettype                154379
ticket_issue_date        3839294
totalpaid                 154379
violationcode                  0
violationdesc               4327
violation_code           3839294
violation_description    3843512
week_of_year             3839294
xcoord                         0
y                              0
ycoord                         0
filename                       0
x                             0
dtype: int64

In [8]:
df.head(5)


Out[8]:
Unnamed: 0 accidentindicator address_id agencyid body_style day_of_week fineamt holiday issue_time location month_of_year objectid penalty1 rowid_ row_ rp_plate_state streetsegid ticketissuedate tickettype ticket_issue_date totalpaid violationcode violationdesc violation_code violation_description week_of_year xcoord y ycoord filename x
0 0 No 31943.0 25.0 NaN NaN 50.0 NaN NaN 3700 BLK SOUTHERN AVENUE SE (SW/B) NaN 119 50.0 NaN 195854 NaN 1435.0 2009-04-01T00:00:00.000Z Photo NaN 0.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT NaN NaN NaN 404133.5400 38.859665 132428.0100 Moving_Violations_in_April_2009.csv -76.952377
1 1 No 301049.0 25.0 NaN NaN 50.0 NaN NaN 3400 BLK BENNING ROAD NE W/B NaN 120 NaN NaN 194461 NaN 6467.0 2009-04-01T00:00:00.000Z Photo NaN 50.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT NaN NaN NaN 404151.9900 38.896986 136570.9400 Moving_Violations_in_April_2009.csv -76.952139
2 2 No 146162.0 25.0 NaN NaN 50.0 NaN NaN 100 BLK MICHIGAN AVENUE NE E/B NaN 125 NaN NaN 194565 NaN 2623.0 2009-04-01T00:00:00.000Z Photo NaN 50.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT NaN NaN NaN 399463.2700 38.927444 139950.9200 Moving_Violations_in_April_2009.csv -77.006192
3 3 No NaN 25.0 NaN NaN 50.0 NaN NaN SE/SW FRWY @ 9TH ST ENTRANCE SE E/B NaN 133 50.0 NaN 209519 NaN 14746.0 2009-04-08T00:00:00.000Z Photo NaN 100.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT NaN NaN NaN 400436.0807 38.878792 134550.1426 Moving_Violations_in_April_2009.csv -76.994977
4 4 No NaN 25.0 NaN NaN 100.0 NaN NaN SE/SW FRWY @ 9TH ST ENTRANCE SE E/B NaN 135 100.0 NaN 230448 NaN 14746.0 2009-04-18T00:00:00.000Z Photo NaN 200.0 T120 SPEED 16-20 MPH OVER THE SPEED LIMIT NaN NaN NaN 400436.0807 38.878792 134550.1426 Moving_Violations_in_April_2009.csv -76.994977

In [9]:
df.dtypes


Out[9]:
Unnamed: 0                 int64
accidentindicator         object
address_id               float64
agencyid                 float64
body_style                object
day_of_week               object
fineamt                  float64
holiday                  float64
issue_time               float64
location                  object
month_of_year            float64
objectid                   int64
penalty1                 float64
rowid_                   float64
row_                       int64
rp_plate_state            object
streetsegid              float64
ticketissuedate           object
tickettype                object
ticket_issue_date         object
totalpaid                float64
violationcode             object
violationdesc             object
violation_code            object
violation_description     object
week_of_year             float64
xcoord                   float64
y                        float64
ycoord                   float64
filename                  object
x                       float64
dtype: object

In [10]:
df[~df['ticket_issue_date'].isnull()]

# ticketissuedate has no null values. spot check of ticket_issue_date confirms redundant column. drop ticket_issue_date


Out[10]:
Unnamed: 0 accidentindicator address_id agencyid body_style day_of_week fineamt holiday issue_time location month_of_year objectid penalty1 rowid_ row_ rp_plate_state streetsegid ticketissuedate tickettype ticket_issue_date totalpaid violationcode violationdesc violation_code violation_description week_of_year xcoord y ycoord filename x
1784390 1784390 NaN 809744.0 NaN VA THURSDAY NaN 0.0 1619.0 200 BLOCK 3RD ST SW EAST SIDE 7.0 122625 NaN 1398224.0 1398224 MD 8322.0 2014-07-17T00:00:00.000Z NaN 2014-07-17T00:00:00.000Z NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 29.0 398684.0 38.887879 135559.0 Moving_Violations_in_July_2014.csv -77.015171
1784391 1784391 NaN 280105.0 NaN PU FRIDAY NaN 0.0 1024.0 FRONT OF 700 A ST SE NORTH SIDE 7.0 122628 NaN 1401793.0 1401793 DC 12261.0 2014-07-18T00:00:00.000Z NaN 2014-07-18T00:00:00.000Z NaN P055 NO PARKING ANYTIME P055 NO PARKING ANYTIME 29.0 400357.0 38.888826 135664.0 Moving_Violations_in_July_2014.csv -76.995887
1784392 1784392 NaN 811553.0 NaN UT THURSDAY NaN 0.0 1151.0 2000 BLOCK VIRGINIA AVE NW NORTH SI 7.0 122630 NaN 1400359.0 1400359 MD 9916.0 2014-07-24T00:00:00.000Z NaN 2014-07-24T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 30.0 396015.0 38.894933 136343.0 Moving_Violations_in_July_2014.csv -77.045939
1784393 1784393 NaN 809521.0 NaN UT WEDNESDAY NaN 0.0 1813.0 5100 BLOCK MACARTHUR BLVD NW EAST S 7.0 122631 NaN 1398271.0 1398271 MD 8140.0 2014-07-16T00:00:00.000Z NaN 2014-07-16T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 29.0 391257.0 38.924284 139605.0 Moving_Violations_in_July_2014.csv -77.100828
1784394 1784394 NaN 811771.0 NaN 4D MONDAY NaN 0.0 1154.0 1200 BLOCK H ST NE NORTH SIDE 7.0 122634 NaN 1406598.0 1406598 MD 10098.0 2014-07-14T00:00:00.000Z NaN 2014-07-14T00:00:00.000Z NaN P170 FAILURE TO DISPLAY CURRENT TAGS P170 FAILURE TO DISPLAY CURRENT TAGS 29.0 400791.0 38.900212 136928.0 Moving_Violations_in_July_2014.csv -76.990883
1784395 1784395 NaN 804518.0 NaN 4D TUESDAY NaN 0.0 2145.0 1300 BLOCK U ST NW NORTH SIDE 7.0 122635 NaN 1404521.0 1404521 OH 3894.0 2014-07-29T00:00:00.000Z NaN 2014-07-29T00:00:00.000Z NaN P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT 31.0 397330.0 38.917009 138793.0 Moving_Violations_in_July_2014.csv -77.030790
1784396 1784396 NaN 806786.0 NaN 4D WEDNESDAY NaN 0.0 1117.0 1200 BLOCK 5TH ST NE WEST SIDE 7.0 122637 NaN 1397544.0 1397544 MD 5820.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P025 PARK LESS THAN 40 FEET FROM AN INTERSECTION P025 PARK LESS THAN 40 FEET FROM AN INTERSECTION 27.0 400043.0 38.906185 137591.0 Moving_Violations_in_July_2014.csv -76.999507
1784397 1784397 NaN 22742.0 NaN UT WEDNESDAY NaN 0.0 300.0 FRONT OF 4361 F ST SE SOUTH SIDE 7.0 122638 NaN 1397549.0 1397549 DC 2567.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P170 FAILURE TO DISPLAY CURRENT TAGS P170 FAILURE TO DISPLAY CURRENT TAGS 27.0 405091.0 38.881595 134863.0 Moving_Violations_in_July_2014.csv -76.941327
1784398 1784398 NaN 813159.0 NaN UT THURSDAY NaN 0.0 2133.0 1100 BLOCK 17TH ST NW WEST SIDE 7.0 122639 NaN 1400423.0 1400423 DC 11313.0 2014-07-03T00:00:00.000Z NaN 2014-07-03T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 396660.0 38.903143 137254.0 Moving_Violations_in_July_2014.csv -77.038508
1784399 1784399 NaN 800713.0 NaN 4D FRIDAY NaN 0.0 817.0 BLOCK 2000 CONNECTICUT AVE NW WEST 7.0 122640 NaN 1401816.0 1401816 DC 745.0 2014-07-18T00:00:00.000Z NaN 2014-07-18T00:00:00.000Z NaN P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 29.0 395966.0 38.916391 138725.0 Moving_Violations_in_July_2014.csv -77.046518
1784400 1784400 NaN 801812.0 NaN 2D MONDAY NaN 0.0 1613.0 1700 BLOCK NEW YORK AVE NW NORTH SI 7.0 122642 NaN 1399653.0 1399653 MD 1505.0 2014-07-21T00:00:00.000Z NaN 2014-07-21T00:00:00.000Z NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 30.0 396548.0 38.896521 136519.0 Moving_Violations_in_July_2014.csv -77.039796
1784401 1784401 NaN 806390.0 NaN 2D MONDAY NaN 0.0 1414.0 1700 BLOCK E ST NW SOUTH SIDE 7.0 122643 NaN 1399665.0 1399665 VA 5492.0 2014-07-21T00:00:00.000Z NaN 2014-07-21T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 30.0 396692.0 38.895396 136394.0 Moving_Violations_in_July_2014.csv -77.038135
1784402 1784402 NaN 802182.0 NaN VA TUESDAY NaN 0.0 1609.0 4200 BLOCK NEBRASKA AV NW SOUTH SID 7.0 122644 NaN 1396901.0 1396901 OH 1827.0 2014-07-08T00:00:00.000Z NaN 2014-07-08T00:00:00.000Z NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 28.0 392986.0 38.943767 141766.0 Moving_Violations_in_July_2014.csv -77.080911
1784403 1784403 NaN 801613.0 NaN 4D TUESDAY NaN 0.0 1112.0 1700 BLOCK 15TH ST NW EAST SIDE 7.0 122645 NaN 1396910.0 1396910 VA 1371.0 2014-07-08T00:00:00.000Z NaN 2014-07-08T00:00:00.000Z NaN P173 NO PARKING STREET CLEANING P173 NO PARKING STREET CLEANING 28.0 397004.0 38.913359 138388.0 Moving_Violations_in_July_2014.csv -77.034547
1784404 1784404 NaN 803643.0 NaN 4D TUESDAY NaN 0.0 1636.0 3400 BLOCK CONNECTICUT AVE NW EAST 7.0 122646 NaN 1397602.0 1397602 MD 3262.0 2014-07-29T00:00:00.000Z NaN 2014-07-29T00:00:00.000Z NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 31.0 394964.0 38.934835 140773.0 Moving_Violations_in_July_2014.csv -77.058087
1784405 1784405 NaN 800301.0 NaN VA THURSDAY NaN 0.0 1314.0 2000 BLOCK EYE ST NW NORTH SIDE 7.0 122647 NaN 1399000.0 1399000 DC 253.0 2014-07-10T00:00:00.000Z NaN 2014-07-10T00:00:00.000Z NaN P007 PARK IN A BUS ZONE P007 PARK IN A BUS ZONE 28.0 396168.0 38.901348 137055.0 Moving_Violations_in_July_2014.csv -77.044179
1784406 1784406 NaN 801489.0 NaN 4D TUESDAY NaN 0.0 1407.0 3000 BLOCK WARDER ST NW WEST SIDE 7.0 122648 NaN 1400465.0 1400465 DC 1102.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P173 NO PARKING STREET CLEANING P173 NO PARKING STREET CLEANING 27.0 398326.0 38.928326 140049.0 Moving_Violations_in_July_2014.csv -77.019308
1784407 1784407 NaN 804514.0 NaN TK MONDAY NaN 0.0 1035.0 500 BLOCK 11TH ST NW EAST SIDE 7.0 122653 NaN 1398326.0 1398326 MD 3891.0 2014-07-28T00:00:00.000Z NaN 2014-07-28T00:00:00.000Z NaN P385 STOPPING, STANDING OR PARKING VEHICLE IN BIKE ... P385 STOPPING, STANDING OR PARKING VEHICLE IN BIKE ... 31.0 397655.0 38.896750 136544.0 Moving_Violations_in_July_2014.csv -77.027035
1784408 1784408 NaN 812697.0 NaN 4D THURSDAY NaN 0.0 810.0 400 BLOCK N CAPITOL ST NW WEST SIDE 7.0 122611 NaN 1405767.0 1405767 VA 10918.0 2014-07-31T00:00:00.000Z NaN 2014-07-31T00:00:00.000Z NaN P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 31.0 399214.0 38.895618 136418.0 Moving_Violations_in_July_2014.csv -77.009063
1784409 1784409 NaN 804634.0 NaN PU THURSDAY NaN 0.0 1210.0 1200 BLOCK 31ST ST NW WEST SIDE 7.0 122612 NaN 1405840.0 1405840 DC 3995.0 2014-07-10T00:00:00.000Z NaN 2014-07-10T00:00:00.000Z NaN P002 STAND OR PARK IN ALLEY P002 STAND OR PARK IN ALLEY 28.0 394692.0 38.906007 137573.0 Moving_Violations_in_July_2014.csv -77.061199
1784410 1784410 NaN 801277.0 NaN PU WEDNESDAY NaN 0.0 909.0 1200 BLOCK 9TH ST NW EAST SIDE 7.0 122613 NaN 1401026.0 1401026 MD 1255.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 397918.0 38.904912 137450.0 Moving_Violations_in_July_2014.csv -77.024006
1784411 1784411 NaN 800656.0 NaN TK WEDNESDAY NaN 0.0 1224.0 REAR OF 1400 R ST NW NORTH SIDE 7.0 122616 NaN 1400314.0 1400314 MD 697.0 2014-07-09T00:00:00.000Z NaN 2014-07-09T00:00:00.000Z NaN P002 STAND OR PARK IN ALLEY P002 STAND OR PARK IN ALLEY 28.0 397173.0 38.912612 138305.0 Moving_Violations_in_July_2014.csv -77.032598
1784412 1784412 NaN 804439.0 NaN 2D WEDNESDAY NaN 0.0 12.0 BLOCK 1900 1ST ST NW EAST SIDE 7.0 122617 NaN 1402452.0 1402452 OH 3829.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P076 NaN P076 NaN 27.0 398946.0 38.915634 138640.0 Moving_Violations_in_July_2014.csv -77.012156
1784413 1784413 NaN 804354.0 NaN 4D TUESDAY NaN 0.0 2336.0 BLOCK 1300 BRYANT ST NE NORTH SIDE 7.0 122618 NaN 1402462.0 1402462 MD 3758.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P076 NaN P076 NaN 27.0 401165.0 38.921093 139246.0 Moving_Violations_in_July_2014.csv -76.986568
1784414 1784414 NaN 800067.0 NaN 4D WEDNESDAY NaN 0.0 537.0 500 BLOCK 9TH ST NE WEST SIDE 7.0 122619 NaN 1403807.0 1403807 VA 59.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P020 PARK WITHIN 10 FEET OF A FIRE HYDRANT P020 PARK WITHIN 10 FEET OF A FIRE HYDRANT 27.0 400542.0 38.896050 136466.0 Moving_Violations_in_July_2014.csv -76.993754
1784415 1784415 NaN 805318.0 NaN UT TUESDAY NaN 0.0 1618.0 3900 BLOCK CONNECTICUT AVE NW WEST 7.0 122623 NaN 1401078.0 1401078 MD 4561.0 2014-07-22T00:00:00.000Z NaN 2014-07-22T00:00:00.000Z NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 30.0 394764.0 38.938815 141215.0 Moving_Violations_in_July_2014.csv -77.060397
1784416 1784416 NaN 815376.0 NaN 4D FRIDAY NaN 0.0 2225.0 1900 BLOCK BELMONT RD NW SOUTH SIDE 7.0 123148 NaN 1467400.0 1467400 MD 13270.0 2014-07-11T00:00:00.000Z NaN 2014-07-11T00:00:00.000Z NaN P050 PARKED WITHIN 25 FEET OF A STOP SIGN P050 PARKED WITHIN 25 FEET OF A STOP SIGN 28.0 396273.0 38.920581 139190.0 Moving_Violations_in_July_2014.csv -77.042980
1784417 1784417 NaN 811078.0 NaN 4D SATURDAY NaN 0.0 1501.0 3200 BLOCK S ST NW SOUTH SIDE 7.0 123149 NaN 1466720.0 1466720 VA 9498.0 2014-07-05T00:00:00.000Z NaN 2014-07-05T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 394301.0 38.914706 138539.0 Moving_Violations_in_July_2014.csv -77.065715
1784418 1784418 NaN 805282.0 NaN PU TUESDAY NaN 0.0 1138.0 1700 BLOCK 36TH ST NW EAST SIDE 7.0 123153 NaN 1464610.0 1464610 PA 4531.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 393831.0 38.914072 138469.0 Moving_Violations_in_July_2014.csv -77.071134
1784419 1784419 NaN 808009.0 NaN PU TUESDAY NaN 0.0 1141.0 3600 BLOCK MACOMB ST NW NORTH SIDE 7.0 123169 NaN 1469677.0 1469677 MD 6860.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 393952.0 38.933495 140625.0 Moving_Violations_in_July_2014.csv -77.069758
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1938739 1938739 NaN 803810.0 NaN 4D TUESDAY NaN 0.0 1256.0 700 BLOCK NORTH CAPITOL ST NW WEST 7.0 13446849 NaN 1440221.0 1440221 NC 3513.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 399213.0 38.898203 136705.0 Moving_Violations_in_July_2014.csv -77.009075
1938740 1938740 NaN 809878.0 NaN 4D TUESDAY NaN 0.0 1125.0 1100 BLOCK 10TH ST NW WEST SIDE 7.0 13446852 NaN 1432629.0 1432629 MD 8440.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P322 PARK OVERTIME IN TIMED ZONE P322 PARK OVERTIME IN TIMED ZONE 27.0 397746.0 38.904912 137450.0 Moving_Violations_in_July_2014.csv -77.025989
1938741 1938741 NaN 806752.0 NaN 4D WEDNESDAY NaN 0.0 1158.0 4700 BLOCK RESERVOIR RD NW NORTH SI 7.0 13446858 NaN 1434676.0 1434676 VA 5790.0 2014-07-09T00:00:00.000Z NaN 2014-07-09T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 28.0 392022.0 38.914734 138544.0 Moving_Violations_in_July_2014.csv -77.091993
1938742 1938742 NaN 800405.0 NaN 4D WEDNESDAY NaN 0.0 1931.0 1700 BLOCK COLUMBIA RD NW NORTH SID 7.0 13446864 NaN 1433401.0 1433401 IL 339.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT 27.0 396379.0 38.923293 139491.0 Moving_Violations_in_July_2014.csv -77.041760
1938743 1938743 NaN 802404.0 NaN UT THURSDAY NaN 0.0 713.0 1600 BLOCK HARVARD ST NW SOUTH SIDE 7.0 13446865 NaN 1434091.0 1434091 MD 1936.0 2014-07-10T00:00:00.000Z NaN 2014-07-10T00:00:00.000Z NaN P269 NO STANDING ANYTIME P269 NO STANDING ANYTIME 28.0 396879.0 38.926322 139827.0 Moving_Violations_in_July_2014.csv -77.035995
1938744 1938744 NaN 802675.0 NaN TK TUESDAY NaN 0.0 1653.0 2600 BLOCK M L KING JR AV SE WEST S 7.0 13446875 NaN 1432805.0 1432805 MD 2053.0 2014-07-08T00:00:00.000Z NaN 2014-07-08T00:00:00.000Z NaN P057 NO PARKING SPECIFIC HOURS P057 NO PARKING SPECIFIC HOURS 28.0 400366.0 38.857449 132181.0 Moving_Violations_in_July_2014.csv -76.995785
1938745 1938745 NaN 815142.0 NaN 2D WEDNESDAY NaN 0.0 703.0 1400 BLOCK 22ND ST NW EAST SIDE 7.0 13446880 NaN 1434293.0 1434293 MD 13058.0 2014-07-23T00:00:00.000Z NaN 2014-07-23T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 30.0 395768.0 38.908238 137820.0 Moving_Violations_in_July_2014.csv -77.048795
1938746 1938746 NaN 807781.0 NaN PU TUESDAY NaN 0.0 703.0 2000 BLOCK T ST NW SOUTH SIDE 7.0 13446885 NaN 1437759.0 1437759 MD 6674.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 396081.0 38.915851 138665.0 Moving_Violations_in_July_2014.csv -77.045191
1938747 1938747 NaN 815378.0 NaN UT THURSDAY NaN 0.0 845.0 2700 BLOCK 42ND ST NW WEST SIDE 7.0 13446893 NaN 1437863.0 1437863 DC 13272.0 2014-07-10T00:00:00.000Z NaN 2014-07-10T00:00:00.000Z NaN P057 NO PARKING SPECIFIC HOURS P057 NO PARKING SPECIFIC HOURS 28.0 392920.0 38.924182 139592.0 Moving_Violations_in_July_2014.csv -77.081650
1938748 1938748 NaN 804086.0 NaN UT WEDNESDAY NaN 0.0 140.0 900 BLOCK L ST NW SOUTH SIDE 7.0 13446897 NaN 1437231.0 1437231 MD 3521.0 2014-07-09T00:00:00.000Z NaN 2014-07-09T00:00:00.000Z NaN P055 NO PARKING ANYTIME P055 NO PARKING ANYTIME 28.0 397832.0 38.904173 137368.0 Moving_Violations_in_July_2014.csv -77.024997
1938749 1938749 NaN 802306.0 NaN 4D TUESDAY NaN 0.0 1001.0 3500 BLOCK W PL NW NORTH SIDE 7.0 13446903 NaN 1442825.0 1442825 VA 2991.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P012 DISOBEYING OFFICIAL SIGN P012 DISOBEYING OFFICIAL SIGN 27.0 393835.0 38.918973 139013.0 Moving_Violations_in_July_2014.csv -77.071093
1938750 1938750 NaN 800405.0 NaN 4D WEDNESDAY NaN 0.0 2058.0 1800 BLOCK COLUMBIA RD NW SOUTH SID 7.0 13446905 NaN 1444185.0 1444185 NJ 339.0 2014-07-09T00:00:00.000Z NaN 2014-07-09T00:00:00.000Z NaN P280 EXPIRATION TIME ON METER RECEIPT LAPSED P280 EXPIRATION TIME ON METER RECEIPT LAPSED 28.0 396379.0 38.923293 139491.0 Moving_Violations_in_July_2014.csv -77.041760
1938751 1938751 NaN 803006.0 NaN 4D TUESDAY NaN 0.0 846.0 1000 BLOCK WISCONSIN AVE NW WEST SI 7.0 13446907 NaN 1447010.0 1447010 DC 2497.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 27.0 394556.0 38.903015 137241.0 Moving_Violations_in_July_2014.csv -77.062764
1938752 1938752 NaN 804634.0 NaN VA TUESDAY NaN 0.0 1047.0 1300 BLOCK 31ST ST NW EAST SIDE 7.0 13446913 NaN 1442180.0 1442180 MD 3995.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 394692.0 38.906007 137573.0 Moving_Violations_in_July_2014.csv -77.061199
1938753 1938753 NaN 804593.0 NaN UT WEDNESDAY NaN 0.0 1513.0 400 BLOCK F ST NW NORTH SIDE 7.0 13446914 NaN 1442200.0 1442200 DC 3961.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 398477.0 38.897337 136609.0 Moving_Violations_in_July_2014.csv -77.017559
1938754 1938754 NaN 801566.0 NaN VA THURSDAY NaN 0.0 1242.0 200 BLOCK R ST NE NORTH SIDE 7.0 13446915 NaN 1442857.0 1442857 VA 1328.0 2014-07-10T00:00:00.000Z NaN 2014-07-10T00:00:00.000Z NaN P173 NO PARKING STREET CLEANING P173 NO PARKING STREET CLEANING 28.0 399526.0 38.912608 138304.0 Moving_Violations_in_July_2014.csv -77.005468
1938755 1938755 NaN 813496.0 NaN 4D THURSDAY NaN 0.0 1050.0 400 BLOCK ELM ST NW NORTH SIDE 7.0 13446916 NaN 1442865.0 1442865 MD 11615.0 2014-07-10T00:00:00.000Z NaN 2014-07-10T00:00:00.000Z NaN P010 OBSTRUCTING CROSSWALK P010 OBSTRUCTING CROSSWALK 28.0 398547.0 38.917435 138840.0 Moving_Violations_in_July_2014.csv -77.016757
1938756 1938756 NaN 812162.0 NaN PU WEDNESDAY NaN 0.0 746.0 300 BLOCK C ST NW NORTH SIDE 7.0 13446921 NaN 1442279.0 1442279 DC 10438.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P307 PARK IN RESERVED SPACE FOR MOTORCYCLES ONLY P307 PARK IN RESERVED SPACE FOR MOTORCYCLES ONLY 27.0 398475.0 38.893599 136194.0 Moving_Violations_in_July_2014.csv -77.017581
1938757 1938757 NaN 802860.0 NaN 4D TUESDAY NaN 0.0 1311.0 1700A VERMONT AVE NW WEST SIDE 7.0 13446923 NaN 1447165.0 1447165 MD 2347.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P322 PARK OVERTIME IN TIMED ZONE P322 PARK OVERTIME IN TIMED ZONE 27.0 397564.0 38.912271 138267.0 Moving_Violations_in_July_2014.csv -77.028090
1938758 1938758 NaN 814533.0 NaN VA FRIDAY NaN 0.0 844.0 1300 BLOCK SAVANNAH ST SE SOUTH SID 7.0 13446926 NaN 1447123.0 1447123 DC 12528.0 2014-07-11T00:00:00.000Z NaN 2014-07-11T00:00:00.000Z NaN P170 FAILURE TO DISPLAY CURRENT TAGS P170 FAILURE TO DISPLAY CURRENT TAGS 28.0 400954.0 38.843153 130594.0 Moving_Violations_in_July_2014.csv -76.989013
1938759 1938759 NaN 803647.0 NaN TK WEDNESDAY NaN 0.0 1304.0 1500 BLOCK M ST NW NORTH SIDE 7.0 13446927 NaN 1447801.0 1447801 MD 3266.0 2014-07-02T00:00:00.000Z NaN 2014-07-02T00:00:00.000Z NaN P001 PARK ABREAST OF ANOTHER VEHICLE P001 PARK ABREAST OF ANOTHER VEHICLE 27.0 396916.0 38.905657 137533.0 Moving_Violations_in_July_2014.csv -77.035558
1938760 1938760 NaN 806627.0 NaN UT TUESDAY NaN 0.0 931.0 FRONT OF 441 BLOCK 4TH ST NW EAST S 7.0 13446929 NaN 1441658.0 1441658 MD 5688.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P282 PARKED IN A RESERVED CAR SHARING SPACE P282 PARKED IN A RESERVED CAR SHARING SPACE 27.0 398596.0 38.895446 136399.0 Moving_Violations_in_July_2014.csv -77.016187
1938761 1938761 NaN 808411.0 NaN UT TUESDAY NaN 0.0 1217.0 600 BLOCK E ST NW NORTH SIDE 7.0 13446930 NaN 1441635.0 1441635 MD 7200.0 2014-07-01T00:00:00.000Z NaN 2014-07-01T00:00:00.000Z NaN P037 PARK OVERTIME AT A METER P037 PARK OVERTIME AT A METER 27.0 398316.0 38.896139 136476.0 Moving_Violations_in_July_2014.csv -77.019415
1938762 1938762 NaN 803274.0 NaN 4D WEDNESDAY NaN 0.0 1652.0 600 BLOCK P ST NW NORTH SIDE 7.0 13446931 NaN 1444372.0 1444372 NC 2719.0 2014-07-30T00:00:00.000Z NaN 2014-07-30T00:00:00.000Z NaN P012 DISOBEYING OFFICIAL SIGN P012 DISOBEYING OFFICIAL SIGN 31.0 398142.0 38.909651 137976.0 Moving_Violations_in_July_2014.csv -77.021425
1938763 1938763 NaN 802201.0 NaN UT MONDAY NaN 0.0 1238.0 400 BLOCK M ST NW SOUTH SIDE 7.0 13446936 NaN 1447880.0 1447880 DC 1842.0 2014-07-14T00:00:00.000Z NaN 2014-07-14T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 29.0 398637.0 38.905652 137532.0 Moving_Violations_in_July_2014.csv -77.015716
1938764 1938764 NaN 814581.0 NaN 4D TUESDAY NaN 0.0 1101.0 1600 BLOCK R ST SE NORTH SIDE 7.0 13446937 NaN 1447900.0 1447900 VA 12568.0 2014-07-08T00:00:00.000Z NaN 2014-07-08T00:00:00.000Z NaN P112 FAIL TO REPORT FOR INSPECTION. P112 FAIL TO REPORT FOR INSPECTION. 28.0 401606.0 38.869474 133516.0 Moving_Violations_in_July_2014.csv -76.981496
1938765 1938765 NaN 813259.0 NaN UT SATURDAY NaN 0.0 1125.0 1300 BLOCK 14TH ST NW WEST SIDE 7.0 13446945 NaN 1452009.0 1452009 DC 11399.0 2014-07-12T00:00:00.000Z NaN 2014-07-12T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 28.0 397229.0 38.906622 137640.0 Moving_Violations_in_July_2014.csv -77.031950
1938766 1938766 NaN 279379.0 NaN 4D SATURDAY NaN 0.0 1102.0 FRONT OF 1755 MASSACHUSETTS AVE NW 7.0 13446947 NaN 1441799.0 1441799 DC 10208.0 2014-07-05T00:00:00.000Z NaN 2014-07-05T00:00:00.000Z NaN P199 PARK IN A DESIGNATED ENTRANCE P199 PARK IN A DESIGNATED ENTRANCE 27.0 396475.0 38.909051 137910.0 Moving_Violations_in_July_2014.csv -77.040644
1938767 1938767 NaN 805800.0 NaN UT THURSDAY NaN 0.0 1843.0 400 BLOCK SEWARD SQ SE SOUTH SIDE 7.0 13446948 NaN 1443834.0 1443834 OR 4968.0 2014-07-17T00:00:00.000Z NaN 2014-07-17T00:00:00.000Z NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 29.0 399995.0 38.885547 135300.0 Moving_Violations_in_July_2014.csv -77.000060
1938768 1938768 NaN 805361.0 NaN 4D THURSDAY NaN 0.0 735.0 3400 BLOCK YUMA ST NW SOUTH SIDE 7.0 13446955 NaN 1450079.0 1450079 VA 4596.0 2014-07-03T00:00:00.000Z NaN 2014-07-03T00:00:00.000Z NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 394257.0 38.946253 142041.0 Moving_Violations_in_July_2014.csv -77.066252

154379 rows × 31 columns


In [11]:
df.drop(['ticket_issue_date'], axis=1, inplace=True)

In [12]:
# drop day_of_week, month_of_year
df.drop(['day_of_week', 'month_of_year'], axis=1, inplace=True)

In [13]:
print df.shape
print df.isnull().sum()


(3993673, 28)
Unnamed: 0                     0
accidentindicator         154379
address_id               1115882
agencyid                  154384
body_style               3864793
fineamt                   308409
holiday                  3839294
issue_time               3839378
location                       0
objectid                       0
penalty1                 2812133
rowid_                   3839294
row_                           0
rp_plate_state           3839458
streetsegid               110277
ticketissuedate                0
tickettype                154379
totalpaid                 154379
violationcode                  0
violationdesc               4327
violation_code           3839294
violation_description    3843512
week_of_year             3839294
xcoord                         0
y                              0
ycoord                         0
filename                       0
x                             0
dtype: int64

In [14]:
df[~df['issue_time'].isnull()]


Out[14]:
Unnamed: 0 accidentindicator address_id agencyid body_style fineamt holiday issue_time location objectid penalty1 rowid_ row_ rp_plate_state streetsegid ticketissuedate tickettype totalpaid violationcode violationdesc violation_code violation_description week_of_year xcoord y ycoord filename x
1784390 1784390 NaN 809744.0 NaN VA NaN 0.0 1619.0 200 BLOCK 3RD ST SW EAST SIDE 122625 NaN 1398224.0 1398224 MD 8322.0 2014-07-17T00:00:00.000Z NaN NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 29.0 398684.0 38.887879 135559.0 Moving_Violations_in_July_2014.csv -77.015171
1784391 1784391 NaN 280105.0 NaN PU NaN 0.0 1024.0 FRONT OF 700 A ST SE NORTH SIDE 122628 NaN 1401793.0 1401793 DC 12261.0 2014-07-18T00:00:00.000Z NaN NaN P055 NO PARKING ANYTIME P055 NO PARKING ANYTIME 29.0 400357.0 38.888826 135664.0 Moving_Violations_in_July_2014.csv -76.995887
1784392 1784392 NaN 811553.0 NaN UT NaN 0.0 1151.0 2000 BLOCK VIRGINIA AVE NW NORTH SI 122630 NaN 1400359.0 1400359 MD 9916.0 2014-07-24T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 30.0 396015.0 38.894933 136343.0 Moving_Violations_in_July_2014.csv -77.045939
1784393 1784393 NaN 809521.0 NaN UT NaN 0.0 1813.0 5100 BLOCK MACARTHUR BLVD NW EAST S 122631 NaN 1398271.0 1398271 MD 8140.0 2014-07-16T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 29.0 391257.0 38.924284 139605.0 Moving_Violations_in_July_2014.csv -77.100828
1784394 1784394 NaN 811771.0 NaN 4D NaN 0.0 1154.0 1200 BLOCK H ST NE NORTH SIDE 122634 NaN 1406598.0 1406598 MD 10098.0 2014-07-14T00:00:00.000Z NaN NaN P170 FAILURE TO DISPLAY CURRENT TAGS P170 FAILURE TO DISPLAY CURRENT TAGS 29.0 400791.0 38.900212 136928.0 Moving_Violations_in_July_2014.csv -76.990883
1784395 1784395 NaN 804518.0 NaN 4D NaN 0.0 2145.0 1300 BLOCK U ST NW NORTH SIDE 122635 NaN 1404521.0 1404521 OH 3894.0 2014-07-29T00:00:00.000Z NaN NaN P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT 31.0 397330.0 38.917009 138793.0 Moving_Violations_in_July_2014.csv -77.030790
1784396 1784396 NaN 806786.0 NaN 4D NaN 0.0 1117.0 1200 BLOCK 5TH ST NE WEST SIDE 122637 NaN 1397544.0 1397544 MD 5820.0 2014-07-02T00:00:00.000Z NaN NaN P025 PARK LESS THAN 40 FEET FROM AN INTERSECTION P025 PARK LESS THAN 40 FEET FROM AN INTERSECTION 27.0 400043.0 38.906185 137591.0 Moving_Violations_in_July_2014.csv -76.999507
1784397 1784397 NaN 22742.0 NaN UT NaN 0.0 300.0 FRONT OF 4361 F ST SE SOUTH SIDE 122638 NaN 1397549.0 1397549 DC 2567.0 2014-07-02T00:00:00.000Z NaN NaN P170 FAILURE TO DISPLAY CURRENT TAGS P170 FAILURE TO DISPLAY CURRENT TAGS 27.0 405091.0 38.881595 134863.0 Moving_Violations_in_July_2014.csv -76.941327
1784398 1784398 NaN 813159.0 NaN UT NaN 0.0 2133.0 1100 BLOCK 17TH ST NW WEST SIDE 122639 NaN 1400423.0 1400423 DC 11313.0 2014-07-03T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 396660.0 38.903143 137254.0 Moving_Violations_in_July_2014.csv -77.038508
1784399 1784399 NaN 800713.0 NaN 4D NaN 0.0 817.0 BLOCK 2000 CONNECTICUT AVE NW WEST 122640 NaN 1401816.0 1401816 DC 745.0 2014-07-18T00:00:00.000Z NaN NaN P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 29.0 395966.0 38.916391 138725.0 Moving_Violations_in_July_2014.csv -77.046518
1784400 1784400 NaN 801812.0 NaN 2D NaN 0.0 1613.0 1700 BLOCK NEW YORK AVE NW NORTH SI 122642 NaN 1399653.0 1399653 MD 1505.0 2014-07-21T00:00:00.000Z NaN NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 30.0 396548.0 38.896521 136519.0 Moving_Violations_in_July_2014.csv -77.039796
1784401 1784401 NaN 806390.0 NaN 2D NaN 0.0 1414.0 1700 BLOCK E ST NW SOUTH SIDE 122643 NaN 1399665.0 1399665 VA 5492.0 2014-07-21T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 30.0 396692.0 38.895396 136394.0 Moving_Violations_in_July_2014.csv -77.038135
1784402 1784402 NaN 802182.0 NaN VA NaN 0.0 1609.0 4200 BLOCK NEBRASKA AV NW SOUTH SID 122644 NaN 1396901.0 1396901 OH 1827.0 2014-07-08T00:00:00.000Z NaN NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 28.0 392986.0 38.943767 141766.0 Moving_Violations_in_July_2014.csv -77.080911
1784403 1784403 NaN 801613.0 NaN 4D NaN 0.0 1112.0 1700 BLOCK 15TH ST NW EAST SIDE 122645 NaN 1396910.0 1396910 VA 1371.0 2014-07-08T00:00:00.000Z NaN NaN P173 NO PARKING STREET CLEANING P173 NO PARKING STREET CLEANING 28.0 397004.0 38.913359 138388.0 Moving_Violations_in_July_2014.csv -77.034547
1784404 1784404 NaN 803643.0 NaN 4D NaN 0.0 1636.0 3400 BLOCK CONNECTICUT AVE NW EAST 122646 NaN 1397602.0 1397602 MD 3262.0 2014-07-29T00:00:00.000Z NaN NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 31.0 394964.0 38.934835 140773.0 Moving_Violations_in_July_2014.csv -77.058087
1784405 1784405 NaN 800301.0 NaN VA NaN 0.0 1314.0 2000 BLOCK EYE ST NW NORTH SIDE 122647 NaN 1399000.0 1399000 DC 253.0 2014-07-10T00:00:00.000Z NaN NaN P007 PARK IN A BUS ZONE P007 PARK IN A BUS ZONE 28.0 396168.0 38.901348 137055.0 Moving_Violations_in_July_2014.csv -77.044179
1784406 1784406 NaN 801489.0 NaN 4D NaN 0.0 1407.0 3000 BLOCK WARDER ST NW WEST SIDE 122648 NaN 1400465.0 1400465 DC 1102.0 2014-07-01T00:00:00.000Z NaN NaN P173 NO PARKING STREET CLEANING P173 NO PARKING STREET CLEANING 27.0 398326.0 38.928326 140049.0 Moving_Violations_in_July_2014.csv -77.019308
1784407 1784407 NaN 804514.0 NaN TK NaN 0.0 1035.0 500 BLOCK 11TH ST NW EAST SIDE 122653 NaN 1398326.0 1398326 MD 3891.0 2014-07-28T00:00:00.000Z NaN NaN P385 STOPPING, STANDING OR PARKING VEHICLE IN BIKE ... P385 STOPPING, STANDING OR PARKING VEHICLE IN BIKE ... 31.0 397655.0 38.896750 136544.0 Moving_Violations_in_July_2014.csv -77.027035
1784408 1784408 NaN 812697.0 NaN 4D NaN 0.0 810.0 400 BLOCK N CAPITOL ST NW WEST SIDE 122611 NaN 1405767.0 1405767 VA 10918.0 2014-07-31T00:00:00.000Z NaN NaN P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 31.0 399214.0 38.895618 136418.0 Moving_Violations_in_July_2014.csv -77.009063
1784409 1784409 NaN 804634.0 NaN PU NaN 0.0 1210.0 1200 BLOCK 31ST ST NW WEST SIDE 122612 NaN 1405840.0 1405840 DC 3995.0 2014-07-10T00:00:00.000Z NaN NaN P002 STAND OR PARK IN ALLEY P002 STAND OR PARK IN ALLEY 28.0 394692.0 38.906007 137573.0 Moving_Violations_in_July_2014.csv -77.061199
1784410 1784410 NaN 801277.0 NaN PU NaN 0.0 909.0 1200 BLOCK 9TH ST NW EAST SIDE 122613 NaN 1401026.0 1401026 MD 1255.0 2014-07-02T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 397918.0 38.904912 137450.0 Moving_Violations_in_July_2014.csv -77.024006
1784411 1784411 NaN 800656.0 NaN TK NaN 0.0 1224.0 REAR OF 1400 R ST NW NORTH SIDE 122616 NaN 1400314.0 1400314 MD 697.0 2014-07-09T00:00:00.000Z NaN NaN P002 STAND OR PARK IN ALLEY P002 STAND OR PARK IN ALLEY 28.0 397173.0 38.912612 138305.0 Moving_Violations_in_July_2014.csv -77.032598
1784412 1784412 NaN 804439.0 NaN 2D NaN 0.0 12.0 BLOCK 1900 1ST ST NW EAST SIDE 122617 NaN 1402452.0 1402452 OH 3829.0 2014-07-02T00:00:00.000Z NaN NaN P076 NaN P076 NaN 27.0 398946.0 38.915634 138640.0 Moving_Violations_in_July_2014.csv -77.012156
1784413 1784413 NaN 804354.0 NaN 4D NaN 0.0 2336.0 BLOCK 1300 BRYANT ST NE NORTH SIDE 122618 NaN 1402462.0 1402462 MD 3758.0 2014-07-01T00:00:00.000Z NaN NaN P076 NaN P076 NaN 27.0 401165.0 38.921093 139246.0 Moving_Violations_in_July_2014.csv -76.986568
1784414 1784414 NaN 800067.0 NaN 4D NaN 0.0 537.0 500 BLOCK 9TH ST NE WEST SIDE 122619 NaN 1403807.0 1403807 VA 59.0 2014-07-02T00:00:00.000Z NaN NaN P020 PARK WITHIN 10 FEET OF A FIRE HYDRANT P020 PARK WITHIN 10 FEET OF A FIRE HYDRANT 27.0 400542.0 38.896050 136466.0 Moving_Violations_in_July_2014.csv -76.993754
1784415 1784415 NaN 805318.0 NaN UT NaN 0.0 1618.0 3900 BLOCK CONNECTICUT AVE NW WEST 122623 NaN 1401078.0 1401078 MD 4561.0 2014-07-22T00:00:00.000Z NaN NaN P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE P259 NO STOPPING OR STANDING IN PM RUSH HOUR ZONE 30.0 394764.0 38.938815 141215.0 Moving_Violations_in_July_2014.csv -77.060397
1784416 1784416 NaN 815376.0 NaN 4D NaN 0.0 2225.0 1900 BLOCK BELMONT RD NW SOUTH SIDE 123148 NaN 1467400.0 1467400 MD 13270.0 2014-07-11T00:00:00.000Z NaN NaN P050 PARKED WITHIN 25 FEET OF A STOP SIGN P050 PARKED WITHIN 25 FEET OF A STOP SIGN 28.0 396273.0 38.920581 139190.0 Moving_Violations_in_July_2014.csv -77.042980
1784417 1784417 NaN 811078.0 NaN 4D NaN 0.0 1501.0 3200 BLOCK S ST NW SOUTH SIDE 123149 NaN 1466720.0 1466720 VA 9498.0 2014-07-05T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 394301.0 38.914706 138539.0 Moving_Violations_in_July_2014.csv -77.065715
1784418 1784418 NaN 805282.0 NaN PU NaN 0.0 1138.0 1700 BLOCK 36TH ST NW EAST SIDE 123153 NaN 1464610.0 1464610 PA 4531.0 2014-07-01T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 393831.0 38.914072 138469.0 Moving_Violations_in_July_2014.csv -77.071134
1784419 1784419 NaN 808009.0 NaN PU NaN 0.0 1141.0 3600 BLOCK MACOMB ST NW NORTH SIDE 123169 NaN 1469677.0 1469677 MD 6860.0 2014-07-01T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 393952.0 38.933495 140625.0 Moving_Violations_in_July_2014.csv -77.069758
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1938739 1938739 NaN 803810.0 NaN 4D NaN 0.0 1256.0 700 BLOCK NORTH CAPITOL ST NW WEST 13446849 NaN 1440221.0 1440221 NC 3513.0 2014-07-01T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 399213.0 38.898203 136705.0 Moving_Violations_in_July_2014.csv -77.009075
1938740 1938740 NaN 809878.0 NaN 4D NaN 0.0 1125.0 1100 BLOCK 10TH ST NW WEST SIDE 13446852 NaN 1432629.0 1432629 MD 8440.0 2014-07-01T00:00:00.000Z NaN NaN P322 PARK OVERTIME IN TIMED ZONE P322 PARK OVERTIME IN TIMED ZONE 27.0 397746.0 38.904912 137450.0 Moving_Violations_in_July_2014.csv -77.025989
1938741 1938741 NaN 806752.0 NaN 4D NaN 0.0 1158.0 4700 BLOCK RESERVOIR RD NW NORTH SI 13446858 NaN 1434676.0 1434676 VA 5790.0 2014-07-09T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 28.0 392022.0 38.914734 138544.0 Moving_Violations_in_July_2014.csv -77.091993
1938742 1938742 NaN 800405.0 NaN 4D NaN 0.0 1931.0 1700 BLOCK COLUMBIA RD NW NORTH SID 13446864 NaN 1433401.0 1433401 IL 339.0 2014-07-02T00:00:00.000Z NaN NaN P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT P281 FAIL TO DISPLAY A MULTISPACE METER RECEIPT 27.0 396379.0 38.923293 139491.0 Moving_Violations_in_July_2014.csv -77.041760
1938743 1938743 NaN 802404.0 NaN UT NaN 0.0 713.0 1600 BLOCK HARVARD ST NW SOUTH SIDE 13446865 NaN 1434091.0 1434091 MD 1936.0 2014-07-10T00:00:00.000Z NaN NaN P269 NO STANDING ANYTIME P269 NO STANDING ANYTIME 28.0 396879.0 38.926322 139827.0 Moving_Violations_in_July_2014.csv -77.035995
1938744 1938744 NaN 802675.0 NaN TK NaN 0.0 1653.0 2600 BLOCK M L KING JR AV SE WEST S 13446875 NaN 1432805.0 1432805 MD 2053.0 2014-07-08T00:00:00.000Z NaN NaN P057 NO PARKING SPECIFIC HOURS P057 NO PARKING SPECIFIC HOURS 28.0 400366.0 38.857449 132181.0 Moving_Violations_in_July_2014.csv -76.995785
1938745 1938745 NaN 815142.0 NaN 2D NaN 0.0 703.0 1400 BLOCK 22ND ST NW EAST SIDE 13446880 NaN 1434293.0 1434293 MD 13058.0 2014-07-23T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 30.0 395768.0 38.908238 137820.0 Moving_Violations_in_July_2014.csv -77.048795
1938746 1938746 NaN 807781.0 NaN PU NaN 0.0 703.0 2000 BLOCK T ST NW SOUTH SIDE 13446885 NaN 1437759.0 1437759 MD 6674.0 2014-07-01T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 396081.0 38.915851 138665.0 Moving_Violations_in_July_2014.csv -77.045191
1938747 1938747 NaN 815378.0 NaN UT NaN 0.0 845.0 2700 BLOCK 42ND ST NW WEST SIDE 13446893 NaN 1437863.0 1437863 DC 13272.0 2014-07-10T00:00:00.000Z NaN NaN P057 NO PARKING SPECIFIC HOURS P057 NO PARKING SPECIFIC HOURS 28.0 392920.0 38.924182 139592.0 Moving_Violations_in_July_2014.csv -77.081650
1938748 1938748 NaN 804086.0 NaN UT NaN 0.0 140.0 900 BLOCK L ST NW SOUTH SIDE 13446897 NaN 1437231.0 1437231 MD 3521.0 2014-07-09T00:00:00.000Z NaN NaN P055 NO PARKING ANYTIME P055 NO PARKING ANYTIME 28.0 397832.0 38.904173 137368.0 Moving_Violations_in_July_2014.csv -77.024997
1938749 1938749 NaN 802306.0 NaN 4D NaN 0.0 1001.0 3500 BLOCK W PL NW NORTH SIDE 13446903 NaN 1442825.0 1442825 VA 2991.0 2014-07-01T00:00:00.000Z NaN NaN P012 DISOBEYING OFFICIAL SIGN P012 DISOBEYING OFFICIAL SIGN 27.0 393835.0 38.918973 139013.0 Moving_Violations_in_July_2014.csv -77.071093
1938750 1938750 NaN 800405.0 NaN 4D NaN 0.0 2058.0 1800 BLOCK COLUMBIA RD NW SOUTH SID 13446905 NaN 1444185.0 1444185 NJ 339.0 2014-07-09T00:00:00.000Z NaN NaN P280 EXPIRATION TIME ON METER RECEIPT LAPSED P280 EXPIRATION TIME ON METER RECEIPT LAPSED 28.0 396379.0 38.923293 139491.0 Moving_Violations_in_July_2014.csv -77.041760
1938751 1938751 NaN 803006.0 NaN 4D NaN 0.0 846.0 1000 BLOCK WISCONSIN AVE NW WEST SI 13446907 NaN 1447010.0 1447010 DC 2497.0 2014-07-01T00:00:00.000Z NaN NaN P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE P159 NO STOPPING OR STANDING IN AM RUSH HOUR ZONE 27.0 394556.0 38.903015 137241.0 Moving_Violations_in_July_2014.csv -77.062764
1938752 1938752 NaN 804634.0 NaN VA NaN 0.0 1047.0 1300 BLOCK 31ST ST NW EAST SIDE 13446913 NaN 1442180.0 1442180 MD 3995.0 2014-07-01T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 27.0 394692.0 38.906007 137573.0 Moving_Violations_in_July_2014.csv -77.061199
1938753 1938753 NaN 804593.0 NaN UT NaN 0.0 1513.0 400 BLOCK F ST NW NORTH SIDE 13446914 NaN 1442200.0 1442200 DC 3961.0 2014-07-02T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 398477.0 38.897337 136609.0 Moving_Violations_in_July_2014.csv -77.017559
1938754 1938754 NaN 801566.0 NaN VA NaN 0.0 1242.0 200 BLOCK R ST NE NORTH SIDE 13446915 NaN 1442857.0 1442857 VA 1328.0 2014-07-10T00:00:00.000Z NaN NaN P173 NO PARKING STREET CLEANING P173 NO PARKING STREET CLEANING 28.0 399526.0 38.912608 138304.0 Moving_Violations_in_July_2014.csv -77.005468
1938755 1938755 NaN 813496.0 NaN 4D NaN 0.0 1050.0 400 BLOCK ELM ST NW NORTH SIDE 13446916 NaN 1442865.0 1442865 MD 11615.0 2014-07-10T00:00:00.000Z NaN NaN P010 OBSTRUCTING CROSSWALK P010 OBSTRUCTING CROSSWALK 28.0 398547.0 38.917435 138840.0 Moving_Violations_in_July_2014.csv -77.016757
1938756 1938756 NaN 812162.0 NaN PU NaN 0.0 746.0 300 BLOCK C ST NW NORTH SIDE 13446921 NaN 1442279.0 1442279 DC 10438.0 2014-07-02T00:00:00.000Z NaN NaN P307 PARK IN RESERVED SPACE FOR MOTORCYCLES ONLY P307 PARK IN RESERVED SPACE FOR MOTORCYCLES ONLY 27.0 398475.0 38.893599 136194.0 Moving_Violations_in_July_2014.csv -77.017581
1938757 1938757 NaN 802860.0 NaN 4D NaN 0.0 1311.0 1700A VERMONT AVE NW WEST SIDE 13446923 NaN 1447165.0 1447165 MD 2347.0 2014-07-01T00:00:00.000Z NaN NaN P322 PARK OVERTIME IN TIMED ZONE P322 PARK OVERTIME IN TIMED ZONE 27.0 397564.0 38.912271 138267.0 Moving_Violations_in_July_2014.csv -77.028090
1938758 1938758 NaN 814533.0 NaN VA NaN 0.0 844.0 1300 BLOCK SAVANNAH ST SE SOUTH SID 13446926 NaN 1447123.0 1447123 DC 12528.0 2014-07-11T00:00:00.000Z NaN NaN P170 FAILURE TO DISPLAY CURRENT TAGS P170 FAILURE TO DISPLAY CURRENT TAGS 28.0 400954.0 38.843153 130594.0 Moving_Violations_in_July_2014.csv -76.989013
1938759 1938759 NaN 803647.0 NaN TK NaN 0.0 1304.0 1500 BLOCK M ST NW NORTH SIDE 13446927 NaN 1447801.0 1447801 MD 3266.0 2014-07-02T00:00:00.000Z NaN NaN P001 PARK ABREAST OF ANOTHER VEHICLE P001 PARK ABREAST OF ANOTHER VEHICLE 27.0 396916.0 38.905657 137533.0 Moving_Violations_in_July_2014.csv -77.035558
1938760 1938760 NaN 806627.0 NaN UT NaN 0.0 931.0 FRONT OF 441 BLOCK 4TH ST NW EAST S 13446929 NaN 1441658.0 1441658 MD 5688.0 2014-07-01T00:00:00.000Z NaN NaN P282 PARKED IN A RESERVED CAR SHARING SPACE P282 PARKED IN A RESERVED CAR SHARING SPACE 27.0 398596.0 38.895446 136399.0 Moving_Violations_in_July_2014.csv -77.016187
1938761 1938761 NaN 808411.0 NaN UT NaN 0.0 1217.0 600 BLOCK E ST NW NORTH SIDE 13446930 NaN 1441635.0 1441635 MD 7200.0 2014-07-01T00:00:00.000Z NaN NaN P037 PARK OVERTIME AT A METER P037 PARK OVERTIME AT A METER 27.0 398316.0 38.896139 136476.0 Moving_Violations_in_July_2014.csv -77.019415
1938762 1938762 NaN 803274.0 NaN 4D NaN 0.0 1652.0 600 BLOCK P ST NW NORTH SIDE 13446931 NaN 1444372.0 1444372 NC 2719.0 2014-07-30T00:00:00.000Z NaN NaN P012 DISOBEYING OFFICIAL SIGN P012 DISOBEYING OFFICIAL SIGN 31.0 398142.0 38.909651 137976.0 Moving_Violations_in_July_2014.csv -77.021425
1938763 1938763 NaN 802201.0 NaN UT NaN 0.0 1238.0 400 BLOCK M ST NW SOUTH SIDE 13446936 NaN 1447880.0 1447880 DC 1842.0 2014-07-14T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 29.0 398637.0 38.905652 137532.0 Moving_Violations_in_July_2014.csv -77.015716
1938764 1938764 NaN 814581.0 NaN 4D NaN 0.0 1101.0 1600 BLOCK R ST SE NORTH SIDE 13446937 NaN 1447900.0 1447900 VA 12568.0 2014-07-08T00:00:00.000Z NaN NaN P112 FAIL TO REPORT FOR INSPECTION. P112 FAIL TO REPORT FOR INSPECTION. 28.0 401606.0 38.869474 133516.0 Moving_Violations_in_July_2014.csv -76.981496
1938765 1938765 NaN 813259.0 NaN UT NaN 0.0 1125.0 1300 BLOCK 14TH ST NW WEST SIDE 13446945 NaN 1452009.0 1452009 DC 11399.0 2014-07-12T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 28.0 397229.0 38.906622 137640.0 Moving_Violations_in_July_2014.csv -77.031950
1938766 1938766 NaN 279379.0 NaN 4D NaN 0.0 1102.0 FRONT OF 1755 MASSACHUSETTS AVE NW 13446947 NaN 1441799.0 1441799 DC 10208.0 2014-07-05T00:00:00.000Z NaN NaN P199 PARK IN A DESIGNATED ENTRANCE P199 PARK IN A DESIGNATED ENTRANCE 27.0 396475.0 38.909051 137910.0 Moving_Violations_in_July_2014.csv -77.040644
1938767 1938767 NaN 805800.0 NaN UT NaN 0.0 1843.0 400 BLOCK SEWARD SQ SE SOUTH SIDE 13446948 NaN 1443834.0 1443834 OR 4968.0 2014-07-17T00:00:00.000Z NaN NaN P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT P003 RESIDENTIAL PERMIT PKING BEYOND LIMIT W/O PERMIT 29.0 399995.0 38.885547 135300.0 Moving_Violations_in_July_2014.csv -77.000060
1938768 1938768 NaN 805361.0 NaN 4D NaN 0.0 735.0 3400 BLOCK YUMA ST NW SOUTH SIDE 13446955 NaN 1450079.0 1450079 VA 4596.0 2014-07-03T00:00:00.000Z NaN NaN P039 PARK AT EXPIRED METER P039 PARK AT EXPIRED METER 27.0 394257.0 38.946253 142041.0 Moving_Violations_in_July_2014.csv -77.066252

154295 rows × 28 columns


In [15]:
cols = list(df.columns.values)
cols


Out[15]:
['Unnamed: 0',
 'accidentindicator',
 'address_id',
 'agencyid',
 'body_style',
 'fineamt',
 'holiday',
 'issue_time',
 'location',
 'objectid',
 'penalty1',
 'rowid_',
 'row_',
 'rp_plate_state',
 'streetsegid',
 'ticketissuedate',
 'tickettype',
 'totalpaid',
 'violationcode',
 'violationdesc',
 'violation_code',
 'violation_description',
 'week_of_year',
 'xcoord',
 'y',
 'ycoord',
 'filename',
 '\xef\xbb\xbfx']

In [16]:
# move some columns around at the end of the data set

df = df[['Unnamed: 0',
 'accidentindicator',
 'address_id',
 'agencyid',
 'body_style',
 'fineamt',
 'holiday',
 'issue_time',
 'location',
 'objectid',
 'penalty1',
 'rowid_',
 'row_',
 'rp_plate_state',
 'streetsegid',
 'ticketissuedate',
 'tickettype',
 'totalpaid',
 'violationcode',
 'violationdesc',
 'violation_code',
 'violation_description',
 'week_of_year',
 'xcoord', 'ycoord',
 '\xef\xbb\xbfx', 'y',
 'filename'
 ]]

In [17]:
df.head(2)


Out[17]:
Unnamed: 0 accidentindicator address_id agencyid body_style fineamt holiday issue_time location objectid penalty1 rowid_ row_ rp_plate_state streetsegid ticketissuedate tickettype totalpaid violationcode violationdesc violation_code violation_description week_of_year xcoord ycoord x y filename
0 0 No 31943.0 25.0 NaN 50.0 NaN NaN 3700 BLK SOUTHERN AVENUE SE (SW/B) 119 50.0 NaN 195854 NaN 1435.0 2009-04-01T00:00:00.000Z Photo 0.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT NaN NaN NaN 404133.54 132428.01 -76.952377 38.859665 Moving_Violations_in_April_2009.csv
1 1 No 301049.0 25.0 NaN 50.0 NaN NaN 3400 BLK BENNING ROAD NE W/B 120 NaN NaN 194461 NaN 6467.0 2009-04-01T00:00:00.000Z Photo 50.0 T119 SPEED 11-15 MPH OVER THE SPEED LIMIT NaN NaN NaN 404151.99 136570.94 -76.952139 38.896986 Moving_Violations_in_April_2009.csv

In [18]:
# drop week_of_year, rowid_, row_, holiday, issue_time
df.drop(['week_of_year', 'rowid_', 'row_', 'holiday', 'issue_time'], axis=1, inplace=True)

In [19]:
print df.shape
print df.head(2)


(3993673, 23)
   Unnamed: 0 accidentindicator  address_id  agencyid body_style  fineamt  \
0           0                No     31943.0      25.0        NaN     50.0   
1           1                No    301049.0      25.0        NaN     50.0   

                             location  objectid  penalty1 rp_plate_state  \
0  3700 BLK SOUTHERN AVENUE SE (SW/B)       119      50.0            NaN   
1        3400 BLK BENNING ROAD NE W/B       120       NaN            NaN   

   streetsegid           ticketissuedate tickettype  totalpaid violationcode  \
0       1435.0  2009-04-01T00:00:00.000Z      Photo        0.0          T119   
1       6467.0  2009-04-01T00:00:00.000Z      Photo       50.0          T119   

                          violationdesc violation_code violation_description  \
0  SPEED 11-15 MPH OVER THE SPEED LIMIT            NaN                   NaN   
1  SPEED 11-15 MPH OVER THE SPEED LIMIT            NaN                   NaN   

      xcoord     ycoord         x          y  \
0  404133.54  132428.01 -76.952377  38.859665   
1  404151.99  136570.94 -76.952139  38.896986   

                              filename  
0  Moving_Violations_in_April_2009.csv  
1  Moving_Violations_in_April_2009.csv  

In [ ]: