In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import sqlite3
import pandas as pd
import seaborn as sns
sns.set_style("white")
conn = sqlite3.connect('../data/output/database.sqlite')
c = conn.cursor()
def execute(sql):
'''Executes a SQL command on the 'c' cursor and returns the results'''
c.execute(sql)
return c.fetchall()
def printByYear(data):
'''Given a list of tuples with (year, data), prints the data next to corresponding year'''
for datum in data:
print "{0}: {1}".format(datum[0], datum[1])
def adjustForInflation(value, year):
'''Adjust the dollar value based on year
Source (http://www.bls.gov/data/inflation_calculator.htm)
'''
if value == None:
return
return int(value * years[year])
In [3]:
query = """SELECT INSTNM, YEAR,
TUITIONFEE_IN, TUITIONFEE_OUT,
DEBT_MDN,
mn_earn_wne_p10, md_earn_wne_p10, pct10_earn_wne_p10, pct25_earn_wne_p10,
pct75_earn_wne_p10, pct90_earn_wne_p10
FROM Scorecard
WHERE MAIN='Main campus'
AND PREDDEG = 'Predominantly bachelor''s-degree granting'
"""
rawData = execute(query)
query = """SELECT INSTNM
FROM Scorecard
WHERE MAIN='Main campus'
AND PREDDEG = 'Predominantly bachelor''s-degree granting'
and CCBASIC NOT LIKE '%Special%'"""
institutions = execute(query)
Now that I have the raw data I want to structure it in a JSON like fashion. instnm -> year -> data
In [67]:
JSON_data = {} # Inflation adjusted JSON structured tree about different institutions
# Get main campus, predominantly bachelor's, non-medical schools that had data for 2013
for inst in institutions:
JSON_data[inst[0]] = {}
def setValue(data, key, value, year):
if value and value != 'PrivacySuppressed':
data[key] = adjustForInflation(value, year)
for row in rawData:
instnm = row[0]
year = row[1]
if instnm in JSON_data:
financialData = {}
setValue(financialData, 'tuition_in', row[2], year)
setValue(financialData, 'tuition_out', row[3], year)
setValue(financialData, 'debt_mdn', row[4], year)
setValue(financialData, 'mn_earn', row[5], year)
setValue(financialData, 'md_earn', row[6], year)
setValue(financialData, 'md_earn_10', row[7], year)
setValue(financialData, 'md_earn_25', row[8], year)
setValue(financialData, 'md_earn_75', row[9], year)
setValue(financialData, 'md_earn_90', row[10], year)
if len(financialData) != 0:
JSON_data[instnm][year] = financialData
# Create averages
JSON_data['avg'] = {}
counts = {}
i=0
for school in JSON_data:
for year in JSON_data[school]:
if year not in JSON_data['avg']:
JSON_data['avg'][year] = JSON_data[school][year]
counts[year] = {}
for elem in JSON_data[school][year]:
counts[year][elem] = 1
else:
for elem in JSON_data[school][year]:
if elem not in JSON_data['avg'][year]:
JSON_data['avg'][year][elem] = JSON_data[school][year][elem]
counts[year][elem] = 1
else:
JSON_data['avg'][year][elem] += JSON_data[school][year][elem]
counts[year][elem] += 1
for year in JSON_data['avg']:
for elem in JSON_data['avg'][year]:
JSON_data['avg'][year][elem] = JSON_data['avg'][year][elem] / counts[year][elem]
print JSON_data['avg']
In [68]:
JSON_data['Harvard University']
Out[68]:
In [ ]: