In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
import matplotlib
matplotlib.style.use("ggplot")
%matplotlib inline

In [14]:
fingerlakes_data = pd.read_csv("Norm_Trans_OPR_Fingerlakes - Sheet1.csv")

In [15]:
fingerlakes_data


Out[15]:
Fingerlakes 20 73 174 191 250 340 369 378 395 ... 3838 3951 4023 4093 4930 5030 5254 5349 5433 5590
0 OPR 2015 0.782362 0.131340 0.490159 0.765708 0.753217 0.884557 0.242241 0.767222 0.482210 ... 0.819076 0.612793 NaN 0.394777 0.445496 0.88645 1.000000 0.214989 0.045042 0.723316
1 OPR 2014 0.957689 0.663954 0.676159 0.612693 0.598454 0.790073 0.847437 0.870627 0.534581 ... 0.096420 0.511391 0.109032 0.173312 0.770545 0.81367 0.913344 0.786412 NaN NaN
2 OPR 2013 1.000000 0.039917 0.633264 0.843243 0.855717 0.944283 0.940541 0.883992 0.437838 ... 0.702703 0.331393 0.116008 0.232848 NaN NaN NaN NaN NaN NaN
3 OPR 2012 0.625440 0.494278 0.923415 0.863116 0.762764 0.974032 0.889085 0.541813 0.778609 ... 0.740757 0.690581 0.000000 0.273327 NaN NaN NaN NaN NaN NaN
4 OPR 2011 0.916688 0.666494 0.214748 0.035705 0.142303 0.919017 0.138680 0.558603 0.254334 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
5 OPR 2010 1.000000 0.140302 0.909343 0.863398 0.117484 0.799568 NaN 0.000000 0.238051 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
6 OPR 2009 0.945262 0.796784 0.535751 0.619569 0.922682 0.878549 0.063975 0.872733 0.868628 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7 OPR 2008 0.320376 0.000000 0.770049 0.436170 0.540098 0.734452 0.356792 0.044599 0.868249 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

8 rows × 50 columns


In [16]:
def make_data_groups(dataframe, column_names):
    master_group = []
    for i in column_names:
        group = []
        group.append(dataframe[i])
        master_group.append(group)
    return master_group

check_it_out = make_data_groups(fingerlakes_data, team_numbers_list)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-16-2312f6ea0541> in <module>()
      7     return master_group
      8 
----> 9 check_it_out = make_data_groups(fingerlakes_data, team_numbers_list)

<ipython-input-16-2312f6ea0541> in make_data_groups(dataframe, column_names)
      3     for i in column_names:
      4         group = []
----> 5         group.append(dataframe[i])
      6         master_group.append(group)
      7     return master_group

/Users/dinbecevic/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1967             return self._getitem_multilevel(key)
   1968         else:
-> 1969             return self._getitem_column(key)
   1970 
   1971     def _getitem_column(self, key):

/Users/dinbecevic/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   1974         # get column
   1975         if self.columns.is_unique:
-> 1976             return self._get_item_cache(key)
   1977 
   1978         # duplicate columns & possible reduce dimensionality

/Users/dinbecevic/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1089         res = cache.get(item)
   1090         if res is None:
-> 1091             values = self._data.get(item)
   1092             res = self._box_item_values(item, values)
   1093             cache[item] = res

/Users/dinbecevic/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   3209 
   3210             if not isnull(item):
-> 3211                 loc = self.items.get_loc(item)
   3212             else:
   3213                 indexer = np.arange(len(self.items))[isnull(self.items)]

/Users/dinbecevic/anaconda/lib/python2.7/site-packages/pandas/core/index.pyc in get_loc(self, key, method, tolerance)
   1757                                  'backfill or nearest lookups')
   1758             key = _values_from_object(key)
-> 1759             return self._engine.get_loc(key)
   1760 
   1761         indexer = self.get_indexer([key], method=method,

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3979)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3843)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12265)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12216)()

KeyError: 'OPR 2015'

In [ ]:
team_numbers = fingerlakes_data[:0]
team_numbers_list = list(team_numbers)
del team_numbers_list[0]
# team_numbers_list = map(int, team_numbers_list)
print team_numbers_list

medians = fingerlakes_data.median()
sorted_medians = medians.sort_values()
sorted_med_list = list(sorted_medians)
median_keys = list(sorted_medians.keys())
real_keys = median_keys.reverse()

In [ ]:
check_it_out = make_data_groups(fingerlakes_data, median_keys)

labels = median_keys

figure = plot.boxplot(check_it_out)
plot.title('Standardized OPR from 2008-2015 (Fingerlakes)', fontsize=32)
plot.ylabel('Adjusted OPR', fontsize=24)
plot.xlabel('Team Number', fontsize=24)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(20, 10)
plot.xticks(range(52), labels, rotation='vertical')
plot.savefig('opr_boxplots_fingerlakes.png')