In [ ]:
import numpy as np
import pandas as pd
import scipy.stats as sps
import matplotlib.pyplot as plt
%matplotlib inline
filepath = 'yw_all_RelativePosition.csv'
In [ ]:
def TestPasses(pval, cutoff):
if pval <= cutoff:
return 'different'
elif pval > cutoff:
return 'same'
def IndivStatTest(simdata, filename_out):
# IN: 3D np array, list of strings with length=arr[X,:,:] (array axis 0), name of csv file
test_ks = sps.ks_2samp(invivo_d, simdata)
# outputs [ks-statistic, p-value]
with open(filename_out, 'a') as f:
csv.writer(f).writerows([[column, test_ks[0], test_ks[1], TestPasses(test_ks[1], 0.05)]])
return test_ks[1], TestPasses(test_ks[1], 0.05)
Generate uniform random distributions based on the number of cells given
In [ ]:
cellstosim = [(2,12)] #,(2,1140),(3,476),(4,130)]
iterations = 10
for elem in cellstosim:
dent, cells = elem
positions = np.zeros(((cells*dent),iterations))
fname = str(dent)+'_montecarlo_positions_replicates.csv'
for it in range(0,iterations):
this = np.reshape(np.random.rand(cells, dent),(1,-1))
positions[:,it] = this
np.savetxt(fname, positions, delimiter=',')
In [ ]:
positions.shape
calculate KS test data, and count how many tests pass for each dentincell number (output in summarydata.csv file)
In [ ]:
def TestPasses(pval, cutoff):
if pval <= cutoff:
return 'different'
elif pval > cutoff:
return 'same'
def IndivStatTest(simdata, filename_out):
# IN: 3D np array, list of strings with length=arr[X,:,:] (array axis 0), name of csv file
test_ks = sps.ks_2samp(invivo_d, simdata)
# outputs [ks-statistic, p-value]
with open(filename_out, 'a') as f:
csv.writer(f).writerows([[column, test_ks[0], test_ks[1], TestPasses(test_ks[1], 0.05)]])
return test_ks[1], TestPasses(test_ks[1], 0.05)
dicmap = ['null','A','B','C','D']
invivo_file = 'yw_all_RelativePosition.csv'
dentnumbers = [1,2,3,4]
invivo_data = pd.read_csv(invivo_file)
for dentincell in dentnumbers:
# clear out missing data
invivo = invivo_data[dicmap[dentincell]]
invivo = invivo.replace(0,np.nan) # turn zeros into NaNs
invivo = invivo.dropna(how='all') # drop any column (axis=0) or row (axis=1) where ALL values are NaN
invivo_d = invivo/100
mcname = str(dentincell)+'_montecarlo_positions_replicates.csv'
sfname = 'summarydata.csv'
montecarlo = pd.read_csv(mcname,header=None)
pf = []
for column in montecarlo:
pval, dif = IndivStatTest(montecarlo[column], 'montecarlo_kstests_'+str(dentincell)+'dent.csv')
pf.append(dif)
pfr = pd.Series(pf)
with open(sfname,'a') as f:
f.write(str(dentincell) + ',' + str(pfr[pfr == 'same'].count()) + ',\n')
In [ ]:
pfr = pd.Series(pf)
with open(sfname,'a') as f:
f.write(str(dentincell) + ',' + str(pfr[pfr == 'same'].count()) + ',\n')
make basic plots
In [ ]:
In [ ]:
hist, bins = np.histogram(positions,bins=50)
width = 0.7 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
pick out first 25 for plotting
In [ ]:
dentincell = 1
mcname = str(dentincell)+'_montecarlo_positions_replicates.csv'
mc = pd.read_csv(mcname,header=None)
mc = mc.loc[:,0:49]
mc.to_csv('25reps_'+mcname)
In [ ]:
mc
In [ ]: