Simple schema:
Date range:
In [1]:
# 2004 --- 2017
! head ../../IL-New-Data/ILtrafficstops-2016-10-03.csv
In [2]:
! ls -l ../../IL-New-Data/IL-DEC182018
total 1528984
-r-xr-xr-x 1 copelco staff 71987504 Aug 2 2010 2004 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 74590654 Aug 2 2010 2005 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 74902678 Aug 2 2010 2006 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 69306736 May 30 2018 2007 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 71554102 Aug 2 2010 2008 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 71634921 Aug 2 2010 2009 ITSS Data.zip
drwxr-xr-x 4 copelco staff 128 Feb 5 10:48 2009 Raw Data
-r-xr-xr-x 1 copelco staff 36139089 Jun 21 2012 2010 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 33394481 Jul 3 2012 2011 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 36092200 Jul 12 2013 2012 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 34271224 Oct 23 14:32 2013 ITSS Data.zip
-r-xr-xr-x 1 copelco staff 33097198 Oct 23 14:35 2014 ITSS Data.zip
drwx------ 5 copelco staff 160 Feb 5 10:14 2015 ITSS Data
-r-xr-xr-x 1 copelco staff 32780490 Oct 23 14:38 2015 ITSS Data.zip
drwxr-xr-x 4 copelco staff 128 Feb 5 11:12 2016 ITSS Data
-r-xr-xr-x 1 copelco staff 35392723 Jul 13 2017 2016 ITSS Data.zip
drwx------ 4 copelco staff 128 Feb 5 12:05 2017 ITSS Data
-r-xr-xr-x 1 copelco staff 35810474 Jun 13 2018 2017 ITSS Data.zip
In [3]:
# very different format
! head "../../IL-New-Data/IL-DEC182018/2015 ITSS Data/2015 ITSS Data.txt"
In [4]:
! wc -l "../../IL-New-Data/IL-DEC182018/2015 ITSS Data/2015 ITSS Data.txt"
2022333 ../../IL-New-Data/IL-DEC182018/2015 ITSS Data/2015 ITSS Data.txt
In [5]:
import pandas as pd
In [6]:
filename = "../../IL-New-Data/IL-DEC182018/2015 ITSS Data/2015 ITSS Data.txt"
reader = pd.read_csv(filename, sep="~", chunksize=10 ** 8, iterator=True)
In [7]:
df = reader.get_chunk(20)
df
Out[7]:
AgencyName
AgencyCode
DateOfStop
TimeOfStop
DurationOfStop
ZIP
VehicleMake
VehicleYear
DriversYearofBirth
DriverSex
...
PoliceDogAlertIfSniffed
PoliceDogVehicleSearched
PoliceDogContrabandFound
PoliceDogDrugsFound
PoliceDogDrugParaphernaliaFound
PoliceDogAlcoholFound
PoliceDogWeaponFound
PoliceDogStolenPropertyFound
PoliceDogOtherContrabandFound
PoliceDogDrugAmount
0
ABINGDON POLICE
13462
10/24/2015
18:03
10
61401
Dodge
2000
1974
2
...
0
0
0
0
0
0
0
0
0
0
1
ABINGDON POLICE
13462
10/23/2015
23:33
10
61410
Chevrolet
2004
1988
1
...
0
0
0
0
0
0
0
0
0
0
2
ABINGDON POLICE
13462
10/24/2015
18:35
10
61410
Chrysler
2008
1998
2
...
0
0
0
0
0
0
0
0
0
0
3
ABINGDON POLICE
13462
10/25/2015
00:36
10
61410
Yukon
2001
1974
2
...
0
0
0
0
0
0
0
0
0
0
4
ABINGDON POLICE
13462
10/23/2015
03:30
10
52722
Chevrolet
2000
1964
1
...
0
0
0
0
0
0
0
0
0
0
5
ABINGDON POLICE
13462
10/23/2015
07:40
10
61410
Ford
1997
1991
1
...
0
0
0
0
0
0
0
0
0
0
6
ABINGDON POLICE
13462
10/23/2015
18:40
10
61469
Honda
2002
1978
1
...
0
0
0
0
0
0
0
0
0
0
7
ABINGDON POLICE
13462
10/22/2015
22:52
20
61428
Chevrolet
2002
1993
1
...
0
0
0
0
0
0
0
0
0
0
8
ABINGDON POLICE
13462
10/25/2015
00:29
14
61410
Jeep
2005
1993
1
...
0
0
0
0
0
0
0
0
0
0
9
ABINGDON POLICE
13462
10/19/2015
22:38
10
61410
Dodge
1996
1993
1
...
0
0
0
0
0
0
0
0
0
0
10
ABINGDON POLICE
13462
10/17/2015
23:47
10
61410
Buick
2011
1996
1
...
0
0
0
0
0
0
0
0
0
0
11
ABINGDON POLICE
13462
10/24/2015
22:16
40
61410
Audi
1999
1975
1
...
0
0
0
0
0
0
0
0
0
0
12
ABINGDON POLICE
13462
10/17/2015
22:03
13
61415
GMC
2015
1967
1
...
0
0
0
0
0
0
0
0
0
0
13
ABINGDON POLICE
13462
10/17/2015
23:15
13
61477
Chevrolet
2007
1955
1
...
0
0
0
0
0
0
0
0
0
0
14
ABINGDON POLICE
13462
10/17/2015
00:45
10
61410
Chevrolet
2004
1996
1
...
0
0
0
0
0
0
0
0
0
0
15
ABINGDON POLICE
13462
10/16/2015
21:05
10
61410
Chrysler
2005
1994
2
...
0
0
0
0
0
0
0
0
0
0
16
ABINGDON POLICE
13462
10/16/2015
19:25
10
77520
Chevrolet
2005
1978
2
...
0
0
0
0
0
0
0
0
0
0
17
ABINGDON POLICE
13462
10/20/2015
21:54
13
61410
Dodge
1981
1953
2
...
0
0
0
0
0
0
0
0
0
0
18
ABINGDON POLICE
13462
03/23/2015
22:52
14
61410
GMC
2008
1954
2
...
0
0
0
0
0
0
0
0
0
0
19
ABINGDON POLICE
13462
03/23/2015
23:43
13
60510
Saturn
2007
1989
2
...
0
0
0
0
0
0
0
0
0
0
20 rows × 54 columns
In [8]:
list(df.columns.values)
Out[8]:
['AgencyName',
'AgencyCode',
'DateOfStop',
'TimeOfStop',
'DurationOfStop',
'ZIP',
'VehicleMake',
'VehicleYear',
'DriversYearofBirth',
'DriverSex',
'DriverRace',
'ReasonForStop',
'TypeOfMovingViolation',
'ResultOfStop',
'BeatLocationOfStop',
'VehicleConsentSearchRequested',
'VehicleConsentGiven',
'VehicleSearchConducted',
'VehicleSearchConductedBy',
'VehicleContrabandFound',
'VehicleDrugsFound',
'VehicleDrugParaphernaliaFound',
'VehicleAlcoholFound',
'VehicleWeaponFound',
'VehicleStolenPropertyFound',
'VehicleOtherContrabandFound',
'VehicleDrugAmount',
'DriverConsentSearchRequested',
'DriverConsentGiven',
'DriverSearchConducted',
'DriverSearchConductedBy',
'PassengerConsentSearchRequested',
'PassengerConsentGiven',
'PassengerSearchConducted',
'PassengerSearchConductedBy',
'DriverPassengerContrabandFound',
'DriverPassengerDrugsFound',
'DriverPassengerDrugParaphernaliaFound',
'DriverPassengerAlcoholFound',
'DriverPassengerWeaponFound',
'DriverPassengerStolenPropertyFound',
'DriverPassengerOtherContrabandFound',
'DriverPassengerDrugAmount',
'PoliceDogPerformSniffOfVehicle',
'PoliceDogAlertIfSniffed',
'PoliceDogVehicleSearched',
'PoliceDogContrabandFound',
'PoliceDogDrugsFound',
'PoliceDogDrugParaphernaliaFound',
'PoliceDogAlcoholFound',
'PoliceDogWeaponFound',
'PoliceDogStolenPropertyFound',
'PoliceDogOtherContrabandFound',
'PoliceDogDrugAmount']
In [9]:
filename = "../../IL-New-Data/IL-DEC182018/2015 ITSS Data/2015 ITSS Data.txt"
df = pd.read_csv(filename, sep="~", encoding="iso-8859-1")
/Users/copelco/.pyenv/versions/3.6.6/envs/open-data-policing/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3020: DtypeWarning: Columns (5,14) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [10]:
len(df.index)
Out[10]:
2022332
In [11]:
df.AgencyName.value_counts()
Out[11]:
ILLINOIS STATE POLICE 294397
CHICAGO POLICE 85965
COOK COUNTY SHERIFF 28439
AURORA POLICE 24976
HOFFMAN ESTATES POLICE 23799
PALATINE POLICE 22661
EVERGREEN PARK POLICE 20955
NORMAL POLICE 19637
SCHAUMBURG POLICE 19120
ELK GROVE VILLAGE POLICE 17770
BOLINGBROOK POLICE 17524
SPRINGFIELD POLICE 15910
BELLEVILLE POLICE 14296
ARLINGTON HEIGHTS POLICE 14234
DOWNERS GROVE POLICE 12659
LOMBARD POLICE 12256
BLOOMINGDALE POLICE 12196
ORLAND PARK POLICE 12062
LAKE COUNTY SHERIFF 12018
EVANSTON POLICE 11915
OAK LAWN POLICE 11748
MOUNT PROSPECT POLICE 11737
NAPERVILLE POLICE 11521
ELMHURST POLICE 11426
PALOS HEIGHTS POLICE 11164
WHEELING POLICE 10976
HANOVER PARK POLICE 10682
BUFFALO GROVE POLICE 10551
CAROL STREAM POLICE 10541
ELGIN POLICE 10275
...
SCOTT COUNTY SHERIFF 8
MORAINE VALLEY COMMUNITY COLLEGE POLICE 7
MCNABB POLICE 7
SOUTHERN ILLINOIS UNIVERSITY SCHOOL OF MEDICINE POLICE 7
RAMSEY POLICE 6
APPLE RIVER POLICE 6
ASTORIA POLICE 5
SUMMERFIELD POLICE 5
KANE COUNTY FOREST PRESERVE POLICE 5
OLMSTED POLICE 5
ELKHART POLICE 5
LIVINGSTON POLICE 5
WITT POLICE 4
MENDON POLICE 4
TAMPICO POLICE 4
TOULON POLICE 4
PATOKA POLICE 4
IRVING POLICE 4
WINCHESTER POLICE 3
CANADIAN NATIONAL RAILWAY POLICE 3
CREAL SPRINGS POLICE 3
ST. JACOB POLICE 3
ASSUMPTION POLICE 2
BEMENT POLICE 2
CAVE-IN-ROCK POLICE 2
WAMAC POLICE 1
STEWARDSON POLICE 1
BROCTON POLICE 1
LIBERTY POLICE 1
WYOMING POLICE 1
Name: AgencyName, dtype: int64
In [12]:
df.VehicleSearchConducted.value_counts()
Out[12]:
2 1941508
1 80824
Name: VehicleSearchConducted, dtype: int64
In [13]:
is_search = df['VehicleSearchConducted'] == 1
len(df[is_search].index)
Out[13]:
80824
In [14]:
df[is_search].groupby(df.AgencyName)['AgencyName'].count().sort_values(ascending=False)
Out[14]:
AgencyName
ILLINOIS STATE POLICE 7471
CHICAGO POLICE 4273
PONTOON BEACH POLICE 2618
AURORA POLICE 2303
GLENCOE POLICE 2214
SPRINGFIELD POLICE 2038
CENTRALIA POLICE 2028
MERCER COUNTY SHERIFF 1789
SALEM POLICE 1412
MARION COUNTY SHERIFF 1171
CLINTON COUNTY SHERIFF 1069
HOMEWOOD POLICE 928
MONMOUTH POLICE 927
HANOVER PARK POLICE 918
STREATOR POLICE 907
FAIRVIEW HEIGHTS POLICE 828
ROCKFORD POLICE 822
TRENTON POLICE 732
BELLEVILLE POLICE 703
BLOOMINGTON POLICE 695
HANCOCK COUNTY SHERIFF 690
WINNEBAGO COUNTY SHERIFF 685
GRUNDY COUNTY SHERIFF 677
EVANSTON POLICE 667
BELVIDERE POLICE 599
NEW BADEN POLICE 531
MUNDELEIN POLICE 524
DECATUR POLICE 513
ELGIN POLICE 491
JOLIET POLICE 490
...
DALZELL POLICE 1
CRYSTAL LAKE PARK DISTRICT POLICE 1
SHANNON POLICE 1
THOMPSONVILLE POLICE 1
OKAWVILLE POLICE 1
BUCKLEY POLICE 1
HARWOOD HEIGHTS POLICE 1
JOHNSBURG POLICE 1
HANOVER POLICE 1
TOLEDO POLICE 1
TREMONT POLICE 1
CASEY POLICE 1
SHERRARD POLICE 1
CATLIN POLICE 1
CHAPIN POLICE 1
NOKOMIS POLICE 1
TRITON COLLEGE POLICE 1
NEWTON POLICE 1
CLIFTON POLICE 1
WAYNE CITY POLICE 1
SMITHTON POLICE 1
COBDEN POLICE 1
GRANT PARK POLICE 1
KINMUNDY POLICE 1
MOUNT STERLING POLICE 1
WAUBONSEE COMMUNITY COLLEGE POLICE 1
MOUNT CARROLL POLICE 1
GOREVILLE POLICE 1
SOUTHWESTERN ILLINOIS COLLEGE POLICE 1
SPRING BAY POLICE 1
Name: AgencyName, dtype: int64
In [15]:
df.DriverRace.unique()
Out[15]:
array([ 1, 4, 2, 3, 5, 6, 999])
In [16]:
df.DriverSex.unique()
Out[16]:
array([ 2., 1., nan])
In [ ]:
Content source: OpenDataPolicingNC/Traffic-Stops
Similar notebooks: