notebook.community

Edit and run



In [4]:

    
# Libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
import random as rng
import numpy as np
%matplotlib inline



In [5]:

    
data = pd.read_csv("ocr_data/table_iterations/ocr_corrected.csv")



In [8]:

    
print data.head()









    



   2015:Team  OPR Rank  2014:Team  OPR Rank.1  2013:Team  OPR Rank.2  \
0          1       229          1        1207          1         958   
1          4      1085          4        1404          4        1007   
2          8      1969          8        1262          8        2001   
3         11       231         11         410         11          54   
4         16        74         16          13         16         261   

   2012:Team  OPR Rank.3  2011:Team  OPR Rank.4  2010:Team  OPR Rank.5  \
0          1         984          1         330          1          53   
1          4        2099          4        1338          4         364   
2          7        1861          7        1159          7         572   
3          8         113          8         946          8         234   
4         11         223         11          40         11         302   

   2009:Team  OPR Rank.6  2008:Team  OPR Rank.7  
0          1         317          1         161  
1          4         436          4        1242  
2          7         328          5         197  
3          8        1041          7        1208  
4         11         429          8         635



In [7]:

    
# make individual dataframes for each year
df_2015 = data[['2015:Team','OPR Rank']]
df_2014 = data[['2014:Team','OPR Rank.1']]
df_2013 = data[['2013:Team','OPR Rank.2']]
df_2012 = data[['2012:Team','OPR Rank.3']]
df_2011 = data[['2011:Team','OPR Rank.4']]
df_2010 = data[['2010:Team','OPR Rank.5']]
df_2009 = data[['2009:Team','OPR Rank.6']]
df_2008 = data[['2008:Team','OPR Rank.7']]

# display head summaries of each individual dataframe
print df_2015.head()
print df_2014.head()
print df_2013.head()
print df_2012.head()
print df_2011.head()
print df_2010.head()
print df_2009.head()
print df_2008.head()

# extract teams
itterated_df = df_2015.itteritems("2015:Team")









    



   2015:Team  OPR Rank
0          1       229
1          4      1085
2          8      1969
3         11       231
4         16        74
   2014:Team  OPR Rank.1
0          1        1207
1          4        1404
2          8        1262
3         11         410
4         16          13
   2013:Team  OPR Rank.2
0          1         958
1          4        1007
2          8        2001
3         11          54
4         16         261
   2012:Team  OPR Rank.3
0          1         984
1          4        2099
2          7        1861
3          8         113
4         11         223
   2011:Team  OPR Rank.4
0          1         330
1          4        1338
2          7        1159
3          8         946
4         11          40
   2010:Team  OPR Rank.5
0          1          53
1          4         364
2          7         572
3          8         234
4         11         302
   2009:Team  OPR Rank.6
0          1         317
1          4         436
2          7         328
3          8        1041
4         11         429
   2008:Team  OPR Rank.7
0          1         161
1          4        1242
2          5         197
3          7        1208
4          8         635






    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-5ad33f406638> in <module>()
     20 
     21 # extract teams
---> 22 itterated_df = df_2015.itteritems("2015:Team")

/Users/dinbecevic/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   2358                 return self[name]
   2359             raise AttributeError("'%s' object has no attribute '%s'" %
-> 2360                                  (type(self).__name__, name))
   2361 
   2362     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'itteritems'



In [ ]:

    
plt.boxplot(data["Unnamed: 3"][1:])
plt.show()