import numpy as np
import pandas as pd
import scipy.stats as sps
import matplotlib.pyplot as plt
%matplotlib inline

filepath = 'yw_all_RelativePosition.csv'

def TestPasses(pval, cutoff):
    if pval <= cutoff: 
        return 'different'
    elif pval > cutoff: 
        return 'same'

def IndivStatTest(simdata, filename_out):
# IN: 3D np array, list of strings with length=arr[X,:,:] (array axis 0), name of csv file

    test_ks = sps.ks_2samp(invivo_d, simdata)
    # outputs [ks-statistic, p-value]

    with open(filename_out, 'a') as f:
        csv.writer(f).writerows([[column,  test_ks[0], test_ks[1], TestPasses(test_ks[1], 0.05)]])
    return  test_ks[1], TestPasses(test_ks[1], 0.05)

Generate uniform random distributions based on the number of cells given

cellstosim = [(2,12)] #,(2,1140),(3,476),(4,130)]
iterations = 10

for elem in cellstosim: 
    dent, cells = elem

    positions = np.zeros(((cells*dent),iterations))
    fname = str(dent)+'_montecarlo_positions_replicates.csv'
    for it in range(0,iterations): 
        this = np.reshape(np.random.rand(cells, dent),(1,-1))       
        positions[:,it] = this

    np.savetxt(fname, positions, delimiter=',')

calculate KS test data, and count how many tests pass for each dentincell number (output in summarydata.csv file)

dicmap = ['null','A','B','C','D']

invivo_file = 'yw_all_RelativePosition.csv'
dentnumbers = [1,2,3,4]

invivo_data = pd.read_csv(invivo_file)

for dentincell in dentnumbers: 
    # clear out missing data
    invivo = invivo_data[dicmap[dentincell]]
    invivo = invivo.replace(0,np.nan)       # turn zeros into NaNs
    invivo = invivo.dropna(how='all')       # drop any column (axis=0) or row (axis=1) where ALL values are NaN

    invivo_d = invivo/100

    mcname = str(dentincell)+'_montecarlo_positions_replicates.csv'
    sfname = 'summarydata.csv'

    montecarlo = pd.read_csv(mcname,header=None)
    pf = []

    for column in montecarlo: 
        pval, dif = IndivStatTest(montecarlo[column], 'montecarlo_kstests_'+str(dentincell)+'dent.csv')

    pfr = pd.Series(pf)
    with open(sfname,'a') as f:
        f.write(str(dentincell) + ',' + str(pfr[pfr == 'same'].count()) + ',\n')

make basic plots

mu, sigma = 100, 15 x = mu + sigma * np.random.randn(10000) hist, bins = np.histogram(x, bins=50) width = 0.7 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:]) / 2, hist, align='center', width=width)

hist, bins = np.histogram(positions,bins=50)
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2, hist, align='center', width=width)

pick out first 25 for plotting

dentincell = 1

mcname = str(dentincell)+'_montecarlo_positions_replicates.csv'

mc = pd.read_csv(mcname,header=None)
mc = mc.loc[:,0:49]


