In [94]:
from training_set import *
from preds import train_and_predict
from sklearn.preprocessing import scale
import numpy as np
import pandas as pd
import csv
import glob
import os

In [95]:
def label_data(labels, data):
    col = len(data.columns)
    burnups, coolings = loop_labels(labels['Burnup'], labels['CoolingInts'])
    # inserting 4 labels into columns
    data.insert(loc = col, column = 'ReactorType', value = labels['ReactorType'])
    data.insert(loc = col+1, column = 'Enrichment', value = labels['Enrichment'])
    data.insert(loc = col+2, column = 'Burnup', value = burnups)
    data.insert(loc = col+3, column = 'CoolingTime', value = coolings)
    return data

def loop_labels(burnup, cooling):
    steps_per_case = len(COOLING_INTERVALS)
    burnup_lbl = [0,]
    cooling_lbl = [0,]
    for case in range(0, len(burnup)):
        if case == 0:
            pass
        else:
            # corresponds to previous material logging step
            burnup_lbl.append(burnup[case-1])
            cooling_lbl.append(0)
        # corresponds to irradiation step
        burnup_lbl.append(burnup[case])
        cooling_lbl.append(0)
        for step in range(0, steps_per_case):
            # corresponds to 5 cooling times
            burnup_lbl.append(burnup[case])
            cooling_lbl.append(COOLING_INTERVALS[step])
    return burnup_lbl, cooling_lbl

def splitXY(dfXY, info):
    if info == '_gammas':
        lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup']
    else:
        lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'total']
    dfX = dfXY.drop(lbls, axis=1)
    r_dfY = dfXY.loc[:, lbls[0]]
    c_dfY = dfXY.loc[:, lbls[1]]
    e_dfY = dfXY.loc[:, lbls[2]]
    b_dfY = dfXY.loc[:, lbls[3]]
    return dfX, r_dfY, c_dfY, e_dfY, b_dfY

In [96]:
def dataframeXY(all_files, info):
    all_data = []
    for f in all_files:
        idx = all_files.index(f)
        if info == '_gammas':
            data = format_gdf(f)
        else:
            data = format_df(f)
        labels = {'ReactorType': TRAIN_LABELS['ReactorType'][idx],
                  #'OrigenReactor': TRAIN_LABELS['OrigenReactor'][idx],
                  'Enrichment': TRAIN_LABELS['Enrichment'][idx], 
                  'Burnup': TRAIN_LABELS['Burnup'][idx], 
                  'CoolingInts': COOLING_INTERVALS
                  }
        labeled = label_data(labels, data)
        print(labeled.shape)
        labeled.drop_duplicates(keep='last', inplace=True)
        print(labeled.shape)
        all_data.append(labeled)
    dfXY = pd.concat(all_data)
    dfXY.fillna(value=0, inplace=True)
    return dfXY

In [97]:
def format_gdf(filename):
    time_idx = []
    spectrum = []
    spectra = []
    gamma_bins = []
    with open(filename) as f:
        gamma = csv.reader(f, delimiter=',')
        i = 1
        for row in gamma:
            if len(row) > 0:
                if i < 6:
                    pass
                elif i == 6:
                    time_idx.append(row[0])
                elif row[1]=='days':
                    spectra.append(spectrum)
                    time_idx.append(row[0])
                    spectrum = []
                else:
                    # this is horrible bc opus gamma output isn't friendly
                    if i in range(7, 209):
                        if (i > 7 and gamma_bins[-1]==row[0]):
                            row[0] = row[0] + '.1'
                        gamma_bins.append(row[0])
                    spectrum.append(row[1])
                i = i + 1
        spectra.append(spectrum)
    data = pd.DataFrame(spectra, index=time_idx, columns=gamma_bins) 
    return data

In [98]:
def format_df(filename):
    data = pd.read_csv(filename, header=5, index_col=0).T
    data.drop('subtotal', axis=1, inplace=True)
    return data

In [100]:
nucs_tracked = '_fissact'
info_src = ['_nucs', '_gammas']
datapath = "../origen-data/8dec2017/"
for src in info_src:
    train_files = []
    for i in range(0, len(O_RXTRS)):
        o_rxtr = O_RXTRS[i]
        for j in range(0, len(ENRICH[i])):
            enrich = ENRICH[i][j]
            rxtrpath = datapath + o_rxtr + "/"
            csvfile = o_rxtr + "_enr" + str(enrich) + nucs_tracked + src + ".csv"
            trainpath = os.path.join(rxtrpath, csvfile)
            train_files.append(trainpath)
    trainXY = dataframeXY(train_files, src)
        
    #if info_src.index(src) == 0:
        #trainX, rY, cY, eY, bY = splitXY(trainXY, src)
        #trainX = scale(trainX)
        #train_and_predict(trainX, rY, cY, eY, bY, src, nucs_tracked)
    #else:
        #trainX, rY, cY, eY, bY = splitXY(trainXY, src)
        #trainX = scale(trainX, with_mean=False)
        #train_and_predict(trainX, rY, cY, eY, bY, src, nucs_tracked)


(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(147, 45)
(127, 45)
(9779, 51)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(147, 206)
(127, 206)
(9779, 206)

In [ ]:


In [ ]:


In [ ]:


In [ ]: