In [58]:
# Libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
import random as rng
import numpy as np
%matplotlib inline

In [59]:
data = pd.read_csv("ocr_data/table_iterations/ocr_corrected.csv")

In [60]:
data.head()


Out[60]:
2015:Team OPR Rank 2014:Team OPR Rank.1 2013:Team OPR Rank.2 2012:Team OPR Rank.3 2011:Team OPR Rank.4 2010:Team OPR Rank.5 2009:Team OPR Rank.6 2008:Team OPR Rank.7
0 1 229 1 1207 1 958 1 984 1 330 1 53 1 317 1 161
1 4 1085 4 1404 4 1007 4 2099 4 1338 4 364 4 436 4 1242
2 8 1969 8 1262 8 2001 7 1861 7 1159 7 572 7 328 5 197
3 11 231 11 410 11 54 8 113 8 946 8 234 8 1041 7 1208
4 16 74 16 13 16 261 11 223 11 40 11 302 11 429 8 635

In [73]:
# make individual dataframes for each year
df_2015 = data[['2015:Team','OPR Rank']]
df_2014 = data[['2014:Team','OPR Rank.1']]
df_2013 = data[['2013:Team','OPR Rank.2']]
df_2012 = data[['2012:Team','OPR Rank.3']]
df_2011 = data[['2011:Team','OPR Rank.4']]
df_2010 = data[['2010:Team','OPR Rank.5']]
df_2009 = data[['2009:Team','OPR Rank.6']]
df_2008 = data[['2008:Team','OPR Rank.7']]

# display head summaries of each individual dataframe
print df_2015.head()
print df_2014.head()
print df_2013.head()
print df_2012.head()
print df_2011.head()
print df_2010.head()
print df_2009.head()
print df_2008.head()


   2015:Team  OPR Rank
0          1       229
1          4      1085
2          8      1969
3         11       231
4         16        74
   2014:Team  OPR Rank.1
0          1        1207
1          4        1404
2          8        1262
3         11         410
4         16          13
   2013:Team  OPR Rank.2
0          1         958
1          4        1007
2          8        2001
3         11          54
4         16         261
   2012:Team  OPR Rank.3
0          1         984
1          4        2099
2          7        1861
3          8         113
4         11         223
   2011:Team  OPR Rank.4
0          1         330
1          4        1338
2          7        1159
3          8         946
4         11          40
   2010:Team  OPR Rank.5
0          1          53
1          4         364
2          7         572
3          8         234
4         11         302
   2009:Team  OPR Rank.6
0          1         317
1          4         436
2          7         328
3          8        1041
4         11         429
   2008:Team  OPR Rank.7
0          1         161
1          4        1242
2          5         197
3          7        1208
4          8         635

In [ ]:
plt.boxplot(data["Unnamed: 3"][1:])
plt.show()