In [ ]:
# http://pbpython.com/excel-file-combine.html

In [119]:
import datetime
import pandas as pd

In [30]:
df = pd.read_excel('/Users/copelco/Desktop/PIALog_16-0806.xlsx', sheetname=[0,1,2])

In [69]:
df[1].columns = df[0].columns
df[2].columns = df[0].columns
stops = df[0].append(df[1], ignore_index=True)
stops = stops.append(df[2], ignore_index=True)

In [71]:
len(stops.index)


Out[71]:
2257860

In [97]:
stops.AGENCY.value_counts()


Out[97]:
MSP                641952
MONTGOMERY         230317
BACOPD             222241
MDTA               180312
AACOPD             126123
PGCOPD             123212
HOCPD               91294
FREDERICK           44122
HARFORD             41243
OCEANCITY           39378
CHARLES             36301
UMCP                24220
FREDERICKPD         23380
CARROLL             20200
GAITHERSBURG        19971
WASHINGTON          18526
CALVERT             17947
GREENBELT           16769
LAUREL              16689
MCPARK              16549
ROCKVILLE           14910
TAKOMA              14318
EASTON              13696
STMARYS             13193
ANNAPOLIS           11987
WICOMICO            11327
WESTMINSTER         10991
CECIL               10447
PGPARK              10443
BOWIE               10229
                    ...  
UNIVERSITYPARK        414
ROCKHALL              407
GREENSBORO            370
COMPTROLLER           360
COPPIN                347
DELTA                 331
SMITHSBURG            315
DGS                   314
BALTIMORESCHOOL       294
SALISBURYUNIV         291
HANCOCK               275
UMES                  271
BRENTWOOD             166
COLMAR                148
MVA                   135
OXFORD                117
AACCPSP               115
AACOSHERIFF           110
PERRYVILLE             84
MANCHESTER             79
FAIRMOUNT              70
BALTSHERIFF            63
OAKLAND                55
PGCOSHERIFF            24
FORTDETRICK            23
TRAPPE                 22
FORTMEADE              13
BOWIEUNIV               7
DHMH                    4
BALTIMORE               2
Name: AGENCY, dtype: int64

In [78]:
# covert datetime column
stops['datetime'] = pd.to_datetime(stops['STOPDATE'])

In [128]:
stops.dtypes


Out[128]:
STOPDATE                      object
TIME_OF_STOP                  object
LOCATION                      object
DURATION                      object
STOP_REASON                   object
SEARCH_CONDUCTED              object
SEARCH_REASON                 object
WHATSEARCHED                  object
SEIZED                        object
STOPOUTCOME                   object
CRIME_CHARGED                 object
REGISTRATION_STATE            object
GENDER                        object
DOB                           object
RESIDENCE_STATE               object
MD_COUNTY                     object
ETHNICITY                     object
OFFICERID                      int64
AGENCY                        object
datetime              datetime64[ns]
dtype: object

In [129]:
years = stops['datetime'].map(lambda x: x.year)
stops.groupby(years)['datetime'].count()


Out[129]:
datetime
2012     30818
2013    673441
2014    747158
2015    806443
Name: datetime, dtype: int64

In [132]:
stops.groupby([years, stops.ETHNICITY])['ETHNICITY'].count()


Out[132]:
datetime  ETHNICITY      
2012      ASIAN                 564
          BLACK               11107
          BLK                     1
          F                       3
          HISPANIC             2214
          M                       8
          NATIVE AMERICAN        37
          OTHER                 515
          UNKNOWN                42
          W                       3
          WHITE               16314
          W`                      1
          f                       1
          hiq                     1
          m                       2
2013      ASIAN               18492
          BLACK              241008
          HISPANIC            47084
          NATIVE AMERICAN      1420
          OTHER               20185
          UNKNOWN               494
          WHITE              344707
2014      ASIAN               21672
          BLACK              268749
          HISPANIC            57812
          NATIVE AMERICAN      1884
          OTHER               24647
          UNKNOWN               338
          WHITE              371963
2015      ASIAN               23871
          BLACK              287542
          HISPANIC            67536
          NATIVE AMERICAN      1992
          OTHER               29206
          UNKNOWN               186
          WHITE              396087
Name: ETHNICITY, dtype: int64

2012 looks messy ^^


In [133]:
stops.groupby([years, stops.SEIZED])['SEIZED'].count()


Out[133]:
datetime  SEIZED                 
2012      28124                          1
          Contraband Only             2959
          Contraband and Property      591
          Nothing                     8554
          Property Only                841
          n/a                           34
          x                              1
2013      0                              2
          18004                          5
          18644                          1
          2812                           1
          Both                          53
          Contraband                   363
          Contraband Only             4659
          Contraband and Property     1337
          NONE                           8
          Nothing                    13590
          Property                      36
          Property Only               1540
          arrest                         1
          impound inventory              1
          n                              1
          n//a                           3
          n/a                          267
          na                             2
          no search                      1
          none                          29
          p                              1
2014      18344                          1
          Both                         167
          Contraband                  1873
          Contraband Only             3782
          Contraband and Property     1248
          No CDs in vehicle              1
          Nothing                    10385
          Property                      84
          Property Only               1507
          n/a                          108
          paraphernalia                  1
2015      Both                         148
          Contraband                  1915
          Contraband Only             4194
          Contraband and Property     1483
          Nothing                     9855
          Property                     100
          Property Only               1382
          n                              1
          non                            1
Name: SEIZED, dtype: int64

In [136]:
stops.groupby([years, stops.STOPOUTCOME])['STOPOUTCOME'].count()


Out[136]:
datetime  STOPOUTCOME        
2012       SERO, Warning              1
          74                          1
          Arrest                   5795
          CDS ARREST                  1
          CDS MARIJUANA               1
          Citation                 9294
          Citation/ARREST             1
          ERO                         2
          NOTHING LOCATED             1
          RECOVERED EVIDENCE          1
          SERO                     1396
          SERO and Warning            1
          SERO and Warning x2         1
          W                           1
          Warning                 12022
          Warning / Citation          1
          Warnnig                     1
          cds located                 1
          dui                         1
          er                          1
          nothing found               2
          nothing located             1
          x                           1
2013      Arrest                  11828
          Citation               229883
          SERO                    75676
          Warning                355993
2014      Arrest                  11935
          Citation               198416
          SERO                    85886
          Warning                419917
2015      Arrest                  10413
          Citation               214943
          SERO                    85527
          Warning                476541
Name: STOPOUTCOME, dtype: int64

In [134]:
BACOPD = stops.AGENCY == 'BACOPD'

In [137]:
stops[BACOPD].groupby(years)['datetime'].count()


Out[137]:
datetime
2012     6074
2013    72442
2014    74776
2015    68949
Name: datetime, dtype: int64

In [ ]: