In [1]:
# Query Chicago Data Portal to acquire employee salaries
# Ask and answer some basic questions about the data

In [20]:
from urllib2 import urlopen
import json
from pandas.io.json import json_normalize
import pandas as pd

In [22]:
# This is the counter we'll use to cycle through the queries
increments = [1000*x for x in range(100)]

In [24]:
# Query the server, script written by Joe Walsh
output = []
for element in increments:
    query = 'https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=%s' % (element,)
    #print query
    request = urlopen(query)
    output += json.load(request)


https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=0
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=1000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=2000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=3000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=4000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=5000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=6000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=7000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=8000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=9000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=10000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=11000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=12000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=13000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=14000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=15000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=16000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=17000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=18000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=19000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=20000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=21000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=22000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=23000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=24000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=25000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=26000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=27000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=28000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=29000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=30000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=31000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=32000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=33000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=34000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=35000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=36000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=37000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=38000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=39000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=40000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=41000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=42000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=43000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=44000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=45000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=46000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=47000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=48000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=49000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=50000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=51000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=52000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=53000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=54000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=55000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=56000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=57000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=58000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=59000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=60000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=61000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=62000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=63000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=64000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=65000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=66000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=67000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=68000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=69000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=70000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=71000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=72000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=73000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=74000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=75000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=76000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=77000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=78000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=79000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=80000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=81000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=82000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=83000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=84000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=85000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=86000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=87000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=88000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=89000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=90000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=91000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=92000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=93000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=94000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=95000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=96000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=97000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=98000
https://data.cityofchicago.org/resource/xzkq-xp2w.json?$limit=1000&$offset=99000

In [82]:
# This takes the json format and puts it into nice columns
data = json_normalize(output)

In [84]:
# This drops the row "index" 32181, because on June 10th that was nan
data = data.drop(data.index[[32181]])

In [85]:
data.head()


Out[85]:
department employee_annual_salary job_titles name
0 WATER MGMNT 88968.00 WATER RATE TAKER AARON, ELVIA J
1 POLICE 80778.00 POLICE OFFICER AARON, JEFFERY M
2 POLICE 80778.00 POLICE OFFICER AARON, KARINA
3 GENERAL SERVICES 84780.00 CHIEF CONTRACT EXPEDITER AARON, KIMBERLEI R
4 WATER MGMNT 104736.00 CIVIL ENGINEER IV ABAD JR, VICENTE M

In [86]:
data.tail()


Out[86]:
department employee_annual_salary job_titles name
32176 GENERAL SERVICES 97448.00 FRM OF MACHINISTS - AUTOMOTIVE ZYGADLO, MICHAEL J
32177 POLICE 86520.00 POLICE OFFICER ZYGOWICZ, PETER J
32178 POLICE 83616.00 POLICE OFFICER ZYMANTAS, MARK E
32179 POLICE 86520.00 POLICE OFFICER ZYRKOWSKI, CARLO E
32180 DoIT 110352.00 CHIEF DATA BASE ANALYST ZYSKOWSKI, DARIUSZ

In [88]:
data.describe()


Out[88]:
department employee_annual_salary job_titles name
count 32181 32181 32181 32181
unique 35 1105 1094 31893
top POLICE 86520.00 POLICE OFFICER PEREZ, JOSE A
freq 13570 2497 9489 4

In [89]:
# Assign variable names to each column indexed within the dataframe
department = data['department']
salary = data['employee_annual_salary']
job_title = data['job_titles']
name = data['name']

In [90]:
# Create list of each type of department
all_offices = []
for office in department:
    if office not in all_offices:
        all_offices.append(office)
    else:
        pass
print len(all_offices)


35

In [91]:
# Create list of job titles
all_jobs = []
for job in job_title:
    if job not in all_jobs:
        all_jobs.append(job)
    else:
        pass
print len(all_jobs)


1094

In [92]:
# Create list of employee names
all_names = []
for each in name:
    if each not in all_names:
        all_names.append(each)
    else:
        pass
print len(all_names)


31893

In [75]:
# Create list of all salaries
all_salary = []
for pay in salary:
    if pay not in all_salary:
        all_salary.append(pay)
    else:
        pass
print len(all_salary)
print type(salary[0])


1106
<type 'unicode'>

In [ ]:
# Which departments have the most employees?
for office in all_offices:
    """
    Every time an office from all_offices apears in departments,
    Create a counter called that office name, and add one    
    """

In [97]:
# How many of the employees are male? Female?

# Splits up the list of names into first and last names
lastname = []
firstname = []
for person in all_names:
    lastname.append(str(person).split(",")[0])
    firstname.append(str(person).split(",")[1])
print len(lastname)
print len(firstname)


31893
31893

In [ ]: