In [31]:
from pandas import Series, DataFrame
import pandas as pd
import matplotlib.pyplot as plt
from itertools import cycle, islice

In [32]:
df = pd.read_csv('combined.csv')
#print df.columns
newtable = df[['NAME TEXT','NBD 1 DOUBLE', 'NBR 1 DOUBLE', 'DRATE 1 DOUBLE','NBD 2 DOUBLE', 'NBR 2 DOUBLE', 'DRATE 2 DOUBLE']]
newtable.columns = ['Name of Institution', 'Default Number: 1 yr', 'Repayment Number: 1 yr', 'Default Rate: 1 yr', 'Default Number: 2 yr', 'Repayment Number: 2 yr', 'Default Rate: 2 yr']
newtable = newtable[1:]
newtable[(newtable['Name of Institution']).str.contains('URBANA')]
ratedf = newtable[[0,6]]
ratedf.columns
ratedf.to_csv('two-year-default-stats.csv')

In [79]:
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 14}

matplotlib.rc('font', **font)

plt.xkcd()
plt.figure()
sdf = pd.read_csv('schools.csv')
schools = list(sdf['School Name'].values)
subdf = DataFrame()
for i in schools:
    subdf = pd.concat([subdf, ratedf[(ratedf['Name of Institution']).str.contains(i)]])
subdf = subdf.drop_duplicates(cols=['Name of Institution'])
subdf = subdf.sort(['Default Rate: 2 yr'], ascending=True)
#subdf = ratedf[(ratedf['Name of Institution']).str.contains('CHAMPAIGN')]
#plt.xticks(subdf.index.values, subdf['Name of Institution'])
my_colors = ['b']*3 + ['g']*4 + ['y'] + ['r'] + ['y'] + ['r'] + ['y'] + ['r']*6
print my_colors
subdf = subdf.set_index('Name of Institution')
fig = subdf.plot(kind='barh', stacked=False, color=my_colors)
#fig.set_yticklabels(subdf['Name of Institution'].values)
fig.tick_params(axis='both', labelsize=10)
plt.savefig('plot.png', bbox_inches='tight')
plt.show()


['b', 'b', 'b', 'g', 'g', 'g', 'g', 'y', 'r', 'y', 'r', 'y', 'r', 'r', 'r', 'r', 'r', 'r']
<matplotlib.figure.Figure at 0x7f789c98f550>

In [3]: