In [20]:
import pandas as pd
import numpy as np
import random
import json
from openpyxl import load_workbook
from openpyxl import Workbook
import numpy as np

#read excel
df = pd.read_excel("output.xlsx")[0:25]

#read Condition Name into 1D-array
name_arr = df.Name.unique()
name_arr = list(name_arr)


#Choose Params for disease

wb = load_workbook('output.xlsx') #Define workbook
ws = wb.get_sheet_by_name('Sheet1') #Define worksheet

A = np.array([[i.value for i in j] for j in ws['C1':'I1']]).ravel() #Read BiRads into list
B = np.array([[i.value for i in j] for j in ws['C2':'I2']]).ravel() #Read BiRads Probabilities into list
C = np.array([[i.value for i in j] for j in ws['O1':'Q1']]).ravel() #Read Params Probabilities into list

#Convert from np-arr to 1D arr

a = list(A) #BiRads list
b = list(B) #BiRad probs integer values
c = list(C) #Params probs integer values

#Define function to normalize arr values

def normalize(items):
    sum_n = 0
    for x in items:
        sum_n += x
    problist = [x/sum_n for x in items]
    return(problist)




for i in range(10):
    br_p = normalize(b)
    a = list(A)
    br = np.random.choice(a, 1, br_p)
    name = df['Name'].values.tolist()[0:1]
    "create list of values and slice empty entities from list"
    cd = df['Condition description'].values.tolist()
    rm = df['Relevant modalities'].values.tolist()
    r = random.choice(rm)
    #mammo params
    if r == 'Mammography':
        f_list = df['Relevant findings'].values.tolist()[0:9]
        "random finding"
        f = random.choice(f_list)
        if f == 'Mass':
            p = normalize(c)
            s = np.array([[i.value for i in j] for j in ws['O2':'Q2']]).ravel()
            s = list(s)
            shape = np.random.choice(s, 1, p)
            m = np.array([[i.value for i in j] for j in ws['O3':'Q3']]).ravel()
            m = list(m)
            margin = np.random.choice(m, 1, p)
            d = np.array([[i.value for i in j] for j in ws['O4':'Q4']]).ravel()
            d = list(d)
            density = np.random.choice(d, 1, p)
            print(name, br, r, f, shape, margin, density)
        elif f == 'Calcifications':
            p = normalize(c)
            tb = np.array([[i.value for i in j] for j in ws['O5':'Q5']]).ravel()
            tb = list(tb)
            t_benigh = np.random.choice(tb, 1, p)
            sm = np.array([[i.value for i in j] for j in ws['O6':'Q6']]).ravel()
            sm = list(sm)
            s_morph = np.random.choice(sm, 1, p)
            d = np.array([[i.value for i in j] for j in ws['O7':'Q7']]).ravel()
            d = list(d)
            distrib = np.random.choice(d, 1, p)
            print(name, br, r, f, tb, s_morph, distrib)    
        elif f == 'Assymetry':
            p = normalize(c)
            a = np.array([[i.value for i in j] for j in ws['O8':'Q8']]).ravel()
            a = list(a)
            assymetry = np.random.choice(a, 1, p)
            print(name, br, r, f, assymetry)
        else:
            p = normalize(c)
            ln = np.array([[i.value for i in j] for j in ws['O9':'Q9']]).ravel()
            ln = list(ln)
            print(name, br, r, f, ln)   
    #us params
    elif r == 'US':
        f_list = df['Relevant findings'].values.tolist()[10:16]
        f = random.choice(f_list)
        if f == 'Mass':
            p = normalize(c)
            s = np.array([[i.value for i in j] for j in ws['O10':'Q10']]).ravel()
            s = list(s)
            shape = np.random.choice(s, 1, p)
            m = np.array([[i.value for i in j] for j in ws['O11':'Q11']]).ravel()
            m = list(m)
            margin = np.random.choice(m, 1, p)
            e = np.array([[i.value for i in j] for j in ws['O12':'Q12']]).ravel()
            e = list(e)
            echo = np.random.choice(e, 1, p)
            pos = np.array([[i.value for i in j] for j in ws['O13':'Q13']]).ravel()
            pos = list(pos)
            posterior = np.random.choice(pos, 1, p)
            print(name, br, r, f, shape, margin, echo, posterior)
        elif f == 'Calcifications US':
            p = normalize(c)
            calc = np.array([[i.value for i in j] for j in ws['O14':'Q14']]).ravel()
            calc = list(calc)
            calc_us = np.random.choice(calc, 1, p)
            print(name, br, r, f, calc_us)
        elif f == 'Lymph nodes':
            p = normalize(c)
            ln = np.array([[i.value for i in j] for j in ws['O15':'Q15']]).ravel()
            ln = list(ln)
            l_nodes = np.random.choice(ln, 1, p)
            print(name, br, r, f, l_nodes)
        else:
            p = normalize(c)
            sc = np.array([[i.value for i in j] for j in ws['O16':'Q16']]).ravel()
            sc = list(sc)
            sp_cases = np.random.choice(sc, 1, p)
            print(name, br, r, f, sp_cases)
    else:
        f_list = df['Relevant findings'].values.tolist()[17:25]
        f = random.choice(f_list)
        if f == 'Mass':
            p = normalize(c)
            s = np.array([[i.value for i in j] for j in ws['O17':'Q17']]).ravel()
            s = list(s)
            shape = np.random.choice(m, 1, p)
            m = np.array([[i.value for i in j] for j in ws['O18':'Q18']]).ravel()
            m = list(m)
            margin = np.random.choice(m, 1, p)
            ie = np.array([[i.value for i in j] for j in ws['O19':'Q19']]).ravel()
            ie = list(ie)
            int_e = np.random.choice(ie, 1, p)
            print(name, br, r, f, shape, margin, int_e)
        elif f == 'MRI features':
            p = normalize(c)
            m_f = np.array([[i.value for i in j] for j in ws['O20':'Q20']]).ravel()
            m_f = list(m_f)
            mri_f = np.random.choice(m_f, 1, p)
            print(name, br, r, f, mri_f)
        elif f == 'Kinetic curve assessment':
            p = normalize(c)
            kca = np.array([[i.value for i in j] for j in ws['O21':'Q21']]).ravel()
            kca = list(kca)
            kin_ca = np.random.choice(kca, 1, p)
            print(name, br, r, f, kin_ca)
        elif f == 'Non-mass enhancement (NME)':
            p = normalize(c)
            distr = np.array([[i.value for i in j] for j in ws['O22':'Q22']]).ravel()
            distr = list(distr)
            distrib = np.random.choice(distr, 1, p)
            iep = np.array([[i.value for i in j] for j in ws['O23':'Q23']]).ravel()
            print(name, br, r, f, distrib, iep)
        elif f == 'Non-enhancing findings':
            p = normalize(c)
            nef = np.array([[i.value for i in j] for j in ws['O21':'Q22']]).ravel()
            nef = list(nef)
            ne_f = np.random.choice(nef, 1, p)
            print(name, br, r, f, ne_f)
        elif f == 'Lymph nodes':
            p = normalize(c)
            ln = np.array([[i.value for i in j] for j in ws['O22':'Q22']]).ravel()
            ln = list(ln)
            l_nodes = np.random.choice(ln, 1, p)
            print(name, br, r, f, l_nodes)
        else:
            p = normalize(c)
            fcl = np.array([[i.value for i in j] for j in ws['O23':'Q23']]).ravel()
            fcl= list(fcl)
            fat_cl = np.random.choice(fcl, 1, p)
            print(name, br, r, f, fat_cl)
        

un_list = df['Unique findings'].values.tolist()
un = random.choice(un_list)
p_list = df['Parameters'].values.tolist()
p = random.choice(p_list)
g_list = df['General'].values.tolist()
g = random.choice(g_list)
u_list = df['Unrelated'].values.tolist()
u = random.choice(u_list)
i_list = df['Ignore'].values.tolist()
i = random.choice(i_list)
a_list = df['Associated conditions'].values.tolist()
a = random.choice(a_list)
dd_list = df['Differential diagnosis'].values.tolist()
dd = random.choice(dd_list)
nt_list = df['Notes'].values.tolist()
nt = random.choice(nt_list)

params = ["name", "condition description", "relevant finding", "unique finding", "parameters", "general", "unrelated", "ignore", "associated conditions", "notes"]
keys = [name, cd, f, un, p, g, u, i, a, dd, nt]

rep = {}
for i in range(len(params)):
    
    rep[params[i]] = keys[i]
    
rep = json.dumps(rep)
#print(rep)

##### information for the entire sheet ####
num_rows_in_cond = 25 #"height" of each condition
#width could be different for each condition - so not writing down
starting_column_for_typical = 15 # for all conditions, Typical, possible, none columns, start with typical at col 15
# possible at 15+1, and none at 15+2
first_row = 1 #this is the row with the numbers for typical, possible and none

def determine_width(starting_column):
    # traverse first_row to determine width
    width = 0
    item = df.iloc[first_row, starting_column_for_typical] #our first value
    while isinstance(item, float) and (item!=0):
        width = width + 1
        item = df.iloc[first_row, starting_column_for_typical + width]
        # each time incrementing a counter (count_numbers or width)
    #for example, width should equal 3 at this point
    return width;


####print(get_names_and_probs(2,3)) ->> [a,p],[['Oval', 'Round', 'Irregular']


# run this for each ROW in each condition (aka run function 25 times for each condition)
# row =  number of starting row for condition
# width = number of rows with numbers at top
#returns: "a and p arrays" for reach row in condition - aka 25 a arrays and 25 p arrays
def get_names_and_probs(row, width):
  
    
    #debugger
    #from IPython.core.debugger import Tracer; Tracer()()

    
    first_row = row
    first_col = starting_column_for_typical
    
    
    # this array_to_normalize will store all the values to normalize
    array_to_normalize = [] #the "a" array
    p_values = [] #the "p" array
    
    
    #for each colmn from the first row until the last row (see "depth" of disease above
    current_row = first_row
    current_col = first_col #set the current_col to first_col to start
    i = 0
    for i in range(width):
        #determine if cell has text, or is empty
        if isinstance(df.iloc[current_row, current_col + i], float): #if cell is empty, python calls it a float
            i = i+1
            continue
        #if we get to this point, there is text in the cell
        temp_array = df.iloc[current_row, current_col + i].replace(" ","").split(",") #look at a cell
        j = 0
        for j in range(len(temp_array)):
            p_values.append(df.iloc[1,current_col + i])
            j = j+1
        array_to_normalize.append(temp_array)
        i = i+1
        #array_to_normalize looks like [["oval","rectangular], "irregular"]
        #p_values looks like [50, 50 , 1]
        
    #flatten array_to_normalize
    #https://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python    
    flat_list = [item for sublist in array_to_normalize for item in sublist]
    #for sublist in array_to_normalize:
    #    for item in sublist:
    #        flat_list.append(item)
    array_to_normalize = flat_list
    #now array to normalize should look like ['Oval', 'Round', 'Irregular']
    
    #a = ['Oval', 'Round', 'Irregular'] #for each row
    #p = [50, 50, 1] #one for the entire sheeet
       
    print("p_values:")
    print(p_values)
    #convert p_values from array of strings to array of ints
    #https://stackoverflow.com/questions/5306079/python-how-do-i-convert-an-array-of-strings-to-an-array-of-numbers
    #p_values = map(int, p_values)
    #p_values = [int(numeric_string) for numeric_string in p_values]

    #normalize p_values / "p array"
    normalize(p_values)
    return array_to_normalize, p_values
# ['Oval', 'Round', 'Irregular'] , [50, 50, 1]
    
    
#row = starting row of condition
#return array of arrays for condition
# pri[[a,p],[['Oval', 'Round', 'Irregular'],[50, 50, 1]],...25 times]
def cond_calculate(row):
    cond_big_array[num_rows_in_cond]
    return cond_big_array
my_width = determine_width(starting_column_for_typical) #my_width should equal 3
get_names_and_probs(2, my_width) #2 represents the 2nd row, aka, first row of Fibroadenoma





#wd


['Fibroadenoma'] ['birad[5]'] Mammography Mass ['Oval, Round'] [None] ['Equal density, Low density']
['Fibroadenoma'] ['birad[0]'] Mammography Mass ['Oval, Round'] [None] ['High density']
['Fibroadenoma'] ['birad[2]'] MRI Fat containing lesions [None]
['Fibroadenoma'] ['birad[5]'] Mammography Mass ['Irregular'] [None] ['High density']
['Fibroadenoma'] ['birad[0]'] US Lymph nodes [None]
['Fibroadenoma'] ['birad[0]'] MRI Lymph nodes [None]
['Fibroadenoma'] ['birad[3]'] MRI MRI features [None]
['Fibroadenoma'] ['birad[1]'] US Lymph nodes [None]
['Fibroadenoma'] ['birad[4]'] MRI Mass [None] ['Circumscribed'] ['Dark internal septations']
['Fibroadenoma'] ['birad[4]'] US Lymph nodes [None]
p_values:
['Equal density, Low density', 'Equal density, Low density']
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-20-3071e1c35b0a> in <module>()
    304     return cond_big_array
    305 my_width = determine_width(starting_column_for_typical) #my_width should equal 3
--> 306 get_names_and_probs(2, my_width) #2 represents the 2nd row, aka, first row of Fibroadenoma
    307 
    308 

<ipython-input-20-3071e1c35b0a> in get_names_and_probs(row, width)
    292 
    293     #normalize p_values / "p array"
--> 294     normalize(p_values)
    295     return array_to_normalize, p_values
    296 # ['Oval', 'Round', 'Irregular'] , [50, 50, 1]

<ipython-input-20-3071e1c35b0a> in normalize(items)
     35     sum_n = 0
     36     for x in items:
---> 37         sum_n += x
     38     problist = [x/sum_n for x in items]
     39     return(problist)

TypeError: unsupported operand type(s) for +=: 'int' and 'str'