In [11]:
# This gets the most recent crime, but might have to query in batches of 1000

In [2]:
from urllib2 import urlopen
import json
from pandas.io.json import json_normalize
import pandas as pd

In [3]:
# Access Chicago Data Portal
# The json request url comes from Chicago Website: https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2
request = urlopen('https://data.cityofchicago.org/resource/6zsd-86xi.json')
#output = []
output = json.load(request)

In [4]:
# Format output into pandas
data = json_normalize(output)

In [10]:
data.head()


Out[10]:
arrest beat block case_number community_area date description district domestic fbi_code ... location.coordinates location.type location_description longitude primary_type updated_on ward x_coordinate y_coordinate year
0 True 1232 007XX W MADISON ST HY243642 28 2015-05-01T22:55:00.000 CALL OPERATION 012 False 16 ... [-87.64697, 41.881676] Point HOTEL/MOTEL -87.64697006 PROSTITUTION 2015-05-09T12:51:58.000 27 1171158 1900220 2015
1 False 0714 062XX S HONORE ST G186972 NaN 2001-04-01T08:00:00.000 TO PROPERTY NaN False 14 ... [-87.670136, 41.77986] Point RESIDENCE -87.670135735 CRIMINAL DAMAGE 2006-03-31T22:03:38.000 NaN 1165136 1863069 2001
2 False 0513 003XX W 110TH ST HY191041 49 2014-11-02T05:57:00.000 HARASSMENT BY TELEPHONE 005 False 26 ... [-87.631029, 41.694202] Point RESIDENCE -87.631029227 OTHER OFFENSE 2015-03-22T16:07:18.000 34 1176057 1831941 2014
3 True 1023 014XX S TALMAN AVE HY194551 29 2015-03-22T11:00:00.000 TO VEHICLE 010 False 14 ... [-87.691879, 41.861639] Point STREET -87.691879227 CRIMINAL DAMAGE 2015-05-09T12:51:58.000 28 1158983 1892824 2015
4 True 1511 056XX W CHICAGO AVE HY196277 25 2015-03-24T08:11:00.000 ATTEMPT ARSON 015 False 09 ... [-87.765976, 41.894735] Point VACANT LOT/LAND -87.765976491 ARSON 2015-05-11T12:38:40.000 29 1138719 1904742 2015

5 rows × 23 columns


In [6]:
data.tail()


Out[6]:
arrest beat block case_number community_area date description district domestic fbi_code ... location.coordinates location.type location_description longitude primary_type updated_on ward x_coordinate y_coordinate year
995 False 0214 040XX S ELLIS AVE HX220675 36 2014-04-11T20:30:00.000 TO VEHICLE 002 False 14 ... [-87.603363, 41.820965] Point STREET -87.603363192 CRIMINAL DAMAGE 2014-04-15T00:40:31.000 4 1183221 1878195 2014
996 True 0915 021XX W 51ST ST HJ108928 61 2003-01-05T18:39:00.000 POSS: CRACK NaN False 18 ... NaN NaN SIDEWALK NaN NARCOTICS 2006-03-22T21:58:07.000 16 NaN NaN 2003
997 True 1222 012XX W 18TH ST HJ107566 31 2003-01-04T22:55:00.000 SOLICIT ON PUBLIC WAY NaN False 16 ... NaN NaN STREET NaN PROSTITUTION 2006-03-22T21:58:07.000 25 NaN NaN 2003
998 False 0712 060XX S MAY ST HJ124431 68 2003-01-13T18:00:00.000 STRONGARM - NO WEAPON NaN False 03 ... NaN NaN APARTMENT NaN ROBBERY 2006-03-22T21:58:07.000 16 NaN NaN 2003
999 False 0711 004XX W 61ST PL HJ121444 68 2003-01-12T05:50:00.000 TO PROPERTY NaN False 14 ... NaN NaN RESIDENCE NaN CRIMINAL DAMAGE 2006-03-22T21:58:07.000 20 NaN NaN 2003

5 rows × 23 columns


In [17]:
#data.describe()

In [18]:
# Assign variable names for each column
arrest = data['arrest']
primary_type = data['primary_type']
beat = data['beat']
block = data['block']
case_number = data['case_number']
area_num = data['community_area']
date = data['date']
description = data['description']
district  = data['district']
fbi_code = data['fbi_code']
location_coordinates = data['location.coordinates']
location_type = data['location.type']
location_description = data['location_description']
longitude = data['longitude']
updated_on = data['updated_on']

In [16]:
#print crime

In [ ]: