In [1]:
import pandas as pd
import cPickle as pickle
import os
import re


/Users/colingerber/anaconda/lib/python2.7/site-packages/pytz-2013b-py2.7.egg/pytz/__init__.py:35: UserWarning: Module argparse was already imported from /Users/colingerber/anaconda/lib/python2.7/argparse.pyc, but /Users/colingerber/anaconda/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream

In [2]:
df_comp = pickle.load(open('real_reactor_comparison_df.pkl', 'rb'))

In [3]:
#load in data files to run
data_list = []
data_files = os.listdir('data/')
for f in data_files:
    if 'xlsx' in f:
        data_list.append(f)

In [4]:
df = pd.read_excel('data/'+data_list[0], 'Sheet2')

In [5]:
zip(df.index,df.reactor)


Out[5]:
[(u'PC3/PC1', u'BWR'), (u'PC2/PC1', u'RBMK'), (u'PC2/PC3', u'RBMK')]

In [6]:
m = re.search('[0-9]+', data_list[1])
key = m.group(0)

In [7]:
result_dict = {}
for f in data_list:
    #find the sample number from file name
    #use for key of dict
    df = pd.read_excel('data/'+f, 'Sheet2')
    m = re.search('[0-9]+', f)
    key = int(m.group(0))
    #create a zip of the reactors and the principle components as the values
    result_dict[key] = zip(df.index,df.reactor)

In [13]:
#the correct reactor list
correct_reactor = list(df_comp.RealReactor)
results_tallied = {}
wrong_dict = {}
for key, value in result_dict.items():
    correct = correct_reactor[key-1]
    pc, reactor = zip(*value)
    if correct not in results_tallied:
        results_tallied[correct] = {pc[0]:{'right':0, 'wrong':0},
                                   pc[1]:{'right':0, 'wrong':0},
                                   pc[2]:{'right':0, 'wrong':0}}
    for ix, r in enumerate(reactor):
        if r == correct:
            results_tallied[correct][pc[ix]]['right'] += 1
        else:
            results_tallied[correct][pc[ix]]['wrong'] += 1
            wkey = pc[ix]+correct
            if wkey not in wrong_dict:
                wrong_dict[wkey] = {'BWR':0, 'RBMK':0, 'VVER':0}
            wrong_dict[wkey][r] += 1

In [14]:
wrong_dict9


Out[14]:
{u'PC2/PC1BWR': {'BWR': 0, 'RBMK': 12, 'VVER': 2},
 u'PC2/PC1RBMK': {'BWR': 7, 'RBMK': 0, 'VVER': 0},
 u'PC2/PC1VVER': {'BWR': 9, 'RBMK': 0, 'VVER': 0},
 u'PC2/PC3BWR': {'BWR': 0, 'RBMK': 17, 'VVER': 1},
 u'PC2/PC3RBMK': {'BWR': 7, 'RBMK': 0, 'VVER': 0},
 u'PC2/PC3VVER': {'BWR': 13, 'RBMK': 0, 'VVER': 0},
 u'PC3/PC1BWR': {'BWR': 0, 'RBMK': 7, 'VVER': 10},
 u'PC3/PC1RBMK': {'BWR': 11, 'RBMK': 0, 'VVER': 4},
 u'PC3/PC1VVER': {'BWR': 3, 'RBMK': 1, 'VVER': 0}}

In [9]:
results_tallied


Out[9]:
{'BWR': {u'PC2/PC1': {'right': 20, 'wrong': 14},
  u'PC2/PC3': {'right': 16, 'wrong': 18},
  u'PC3/PC1': {'right': 17, 'wrong': 17}},
 'RBMK': {u'PC2/PC1': {'right': 34, 'wrong': 7},
  u'PC2/PC3': {'right': 34, 'wrong': 7},
  u'PC3/PC1': {'right': 26, 'wrong': 15}},
 'VVER': {u'PC2/PC1': {'right': 27, 'wrong': 9},
  u'PC2/PC3': {'right': 23, 'wrong': 13},
  u'PC3/PC1': {'right': 32, 'wrong': 4}}}

In [10]:
#get the percent correct
for r in results_tallied:
    print r
    for pc in results_tallied[r]:
        tot = float(results_tallied[r][pc]['right']) + float(results_tallied[r][pc]['wrong'])
        print pc, (results_tallied[r][pc]['right']/tot)


BWR
PC3/PC1 0.5
PC2/PC1 0.588235294118
PC2/PC3 0.470588235294
RBMK
PC3/PC1 0.634146341463
PC2/PC1 0.829268292683
PC2/PC3 0.829268292683
VVER
PC3/PC1 0.888888888889
PC2/PC1 0.75
PC2/PC3 0.638888888889

In [ ]: