In [1]:
import pandas as pd
import cPickle as pickle
import os
import re
In [2]:
df_comp = pickle.load(open('real_reactor_comparison_df.pkl', 'rb'))
In [3]:
#load in data files to run
data_list = []
data_files = os.listdir('data/')
for f in data_files:
if 'xlsx' in f:
data_list.append(f)
In [4]:
df = pd.read_excel('data/'+data_list[0], 'Sheet2')
In [5]:
zip(df.index,df.reactor)
Out[5]:
In [6]:
m = re.search('[0-9]+', data_list[1])
key = m.group(0)
In [7]:
result_dict = {}
for f in data_list:
#find the sample number from file name
#use for key of dict
df = pd.read_excel('data/'+f, 'Sheet2')
m = re.search('[0-9]+', f)
key = int(m.group(0))
#create a zip of the reactors and the principle components as the values
result_dict[key] = zip(df.index,df.reactor)
In [13]:
#the correct reactor list
correct_reactor = list(df_comp.RealReactor)
results_tallied = {}
wrong_dict = {}
for key, value in result_dict.items():
correct = correct_reactor[key-1]
pc, reactor = zip(*value)
if correct not in results_tallied:
results_tallied[correct] = {pc[0]:{'right':0, 'wrong':0},
pc[1]:{'right':0, 'wrong':0},
pc[2]:{'right':0, 'wrong':0}}
for ix, r in enumerate(reactor):
if r == correct:
results_tallied[correct][pc[ix]]['right'] += 1
else:
results_tallied[correct][pc[ix]]['wrong'] += 1
wkey = pc[ix]+correct
if wkey not in wrong_dict:
wrong_dict[wkey] = {'BWR':0, 'RBMK':0, 'VVER':0}
wrong_dict[wkey][r] += 1
In [14]:
wrong_dict9
Out[14]:
In [9]:
results_tallied
Out[9]:
In [10]:
#get the percent correct
for r in results_tallied:
print r
for pc in results_tallied[r]:
tot = float(results_tallied[r][pc]['right']) + float(results_tallied[r][pc]['wrong'])
print pc, (results_tallied[r][pc]['right']/tot)
In [ ]: