In [1]:
from __future__ import division
from sys import argv
import glob
import os
import itertools
import time
import string
import csv
import numpy as np
import pandas as pd
import scipy.stats as sps
import seaborn
import matplotlib.pyplot as plt
from num2words import num2words

In [3]:
os.getcwd()
os.chdir('MonteCarlo_uniformdist_Output_SummedDVlength_20160220_152731/')

In [6]:
terms = {'same':1,'different':0}

In [8]:
stats = pd.read_csv('MonteCarlo_absolute_distances_statistics.csv',header=None)

In [15]:
stats.columns = ['a','ksstat','pval','same','b','c','d']

In [ ]:


In [16]:
st = stats.same.map(terms)

In [18]:
st.sum()


Out[18]:
0

In [9]:
stats


Out[9]:
0 1 2 3 4 5 6
0 AbsoluteDistances 0.347318 5.587245e-145 different 2659667 1.669206e-78 different
1 AbsoluteDistances 0.326158 6.673415e-128 different 2805840 1.646795e-59 different
2 AbsoluteDistances 0.349507 8.305173e-147 different 2669428 3.757050e-77 different
3 AbsoluteDistances 0.346954 1.123887e-144 different 2719550 2.136449e-70 different
4 AbsoluteDistances 0.351332 2.439881e-148 different 2677316 4.559406e-76 different
5 AbsoluteDistances 0.340022 5.715956e-139 different 2761041 4.804658e-65 different
6 AbsoluteDistances 0.345859 9.107179e-144 different 2684770 4.743636e-75 different
7 AbsoluteDistances 0.355710 4.765179e-152 different 2696436 1.794871e-73 different
8 AbsoluteDistances 0.346589 2.259065e-144 different 2668512 2.808347e-77 different
9 AbsoluteDistances 0.332725 4.358946e-133 different 2787047 8.424498e-62 different
10 AbsoluteDistances 0.346589 2.259065e-144 different 2767452 3.085056e-64 different
11 AbsoluteDistances 0.344400 1.467121e-142 different 2687518 1.120331e-74 different
12 AbsoluteDistances 0.344400 1.467121e-142 different 2700727 6.762506e-73 different
13 AbsoluteDistances 0.345494 1.826556e-143 different 2682205 2.122619e-75 different
14 AbsoluteDistances 0.340387 2.881541e-139 different 2752051 3.470995e-66 different
15 AbsoluteDistances 0.330901 1.230318e-131 different 2778264 6.910730e-63 different
16 AbsoluteDistances 0.334549 1.516250e-134 different 2742256 1.929555e-67 different
17 AbsoluteDistances 0.346954 1.123887e-144 different 2738225 5.827284e-68 different
18 AbsoluteDistances 0.363736 5.733377e-159 different 2640412 3.308147e-81 different
19 AbsoluteDistances 0.347683 2.775580e-145 different 2724166 8.626761e-70 different
20 AbsoluteDistances 0.336009 1.018920e-135 different 2780735 1.399738e-62 different
21 AbsoluteDistances 0.342941 2.335846e-141 different 2734007 1.656376e-68 different
22 AbsoluteDistances 0.352061 5.920301e-149 different 2663820 6.300225e-78 different
23 AbsoluteDistances 0.347318 5.587245e-145 different 2744674 3.948126e-67 different
24 AbsoluteDistances 0.347683 2.775580e-145 different 2677445 4.748667e-76 different
25 AbsoluteDistances 0.355710 4.765179e-152 different 2673985 1.592505e-76 different
26 AbsoluteDistances 0.352426 2.913083e-149 different 2753412 5.174598e-66 different
27 AbsoluteDistances 0.335279 3.936348e-135 different 2772586 1.356008e-63 different
28 AbsoluteDistances 0.348778 3.387721e-146 different 2680263 1.153203e-75 different
29 AbsoluteDistances 0.357534 1.315178e-153 different 2635833 7.411770e-82 different
... ... ... ... ... ... ... ...
9970 AbsoluteDistances 0.348413 6.834531e-146 different 2721387 3.725994e-70 different
9971 AbsoluteDistances 0.353885 1.695114e-150 different 2719380 2.029165e-70 different
9972 AbsoluteDistances 0.343670 5.862639e-142 different 2645810 1.914471e-80 different
9973 AbsoluteDistances 0.344035 2.933858e-142 different 2686627 8.480756e-75 different
9974 AbsoluteDistances 0.327618 4.798055e-129 different 2818020 4.761622e-58 different
9975 AbsoluteDistances 0.361182 9.481359e-157 different 2598309 2.795063e-87 different
9976 AbsoluteDistances 0.340022 5.715956e-139 different 2649057 5.481693e-80 different
9977 AbsoluteDistances 0.358628 1.512516e-154 different 2685573 6.099213e-75 different
9978 AbsoluteDistances 0.338198 1.736299e-137 different 2857888 2.135894e-53 different
9979 AbsoluteDistances 0.352426 2.913083e-149 different 2640906 3.886107e-81 different
9980 AbsoluteDistances 0.345859 9.107179e-144 different 2687856 1.245053e-74 different
9981 AbsoluteDistances 0.331996 1.661854e-132 different 2758201 2.100140e-65 different
9982 AbsoluteDistances 0.360088 8.372482e-156 different 2654436 3.110532e-79 different
9983 AbsoluteDistances 0.358993 7.344579e-155 different 2630952 1.494510e-82 different
9984 AbsoluteDistances 0.341116 7.307009e-140 different 2805676 1.573398e-59 different
9985 AbsoluteDistances 0.353885 1.695114e-150 different 2695186 1.218377e-73 different
9986 AbsoluteDistances 0.335279 3.936348e-135 different 2794880 7.689407e-61 different
9987 AbsoluteDistances 0.348048 1.377814e-145 different 2672038 8.598300e-77 different
9988 AbsoluteDistances 0.339657 1.133011e-138 different 2783678 3.236801e-62 different
9989 AbsoluteDistances 0.333455 1.139972e-133 different 2764809 1.435322e-64 different
9990 AbsoluteDistances 0.346589 2.259065e-144 different 2693583 7.408449e-74 different
9991 AbsoluteDistances 0.342576 4.657376e-141 different 2743959 3.195398e-67 different
9992 AbsoluteDistances 0.349507 8.305173e-147 different 2719293 1.976356e-70 different
9993 AbsoluteDistances 0.335644 2.003440e-135 different 2751528 2.976800e-66 different
9994 AbsoluteDistances 0.367019 7.640766e-162 different 2636183 8.311375e-82 different
9995 AbsoluteDistances 0.355710 4.765179e-152 different 2708781 8.037139e-72 different
9996 AbsoluteDistances 0.339292 2.244192e-138 different 2775193 2.867474e-63 different
9997 AbsoluteDistances 0.350602 1.002577e-147 different 2656836 6.730396e-79 different
9998 AbsoluteDistances 0.331631 3.241306e-132 different 2759122 2.747364e-65 different
9999 AbsoluteDistances 0.364830 6.351217e-160 different 2635523 6.696439e-82 different

10000 rows × 7 columns


In [ ]:


In [ ]:


In [5]:
stats.ix[1:.sum()


Out[5]:
AbsoluteDistances     AbsoluteDistancesAbsoluteDistancesAbsoluteDist...
0.347318496899                                                  3455.21
5.58724541987e-145                                         1.33779e-119
different             differentdifferentdifferentdifferentdifferentd...
2659667.0                                                   2.70824e+10
1.66920580963e-78                                           9.96905e-49
different.1           differentdifferentdifferentdifferentdifferentd...
dtype: object

In [ ]:


In [ ]:


In [ ]:


In [ ]:
# for jupyter notebook
input_file, cells_to_simulate = 'yw_CellbyCell.csv', 1
data = pd.read_csv(input_file)


# get genotype from filename
# split the string at the underscore, returns a list of the parts (no
# underscores), take the first/before part
genotype = input_file.split('_')[0]


# clear out missing data
data = data.replace(0, np.nan)       # turn zeros into NaNs
# drop any column (axis=0) or row (axis=1) where ALL values are NaN
data = data.dropna(how='all')
# turn NaNs back into zeros, so arithmetic can be normal for 'true-zero' values
data = data.replace(np.nan, 0)
# data = data[data.dentincell != 1]   # drop columns where the cell has
# only 1 denticle

# save the invivo denticle separation distances to a 1xN matrix
invivo_d = data.as_matrix(columns=data.columns[10:]).flatten()
invivo_d = invivo_d[np.nonzero(invivo_d)]


# using the 'absolute' DV length (dist between edge markers):
# dvlen_type = 'AbsoluteDVlength'
# invivo_ln = pd.DataFrame(data,columns=['dentincell','Dvlen'])
# invivo_ln.columns = ['dentincell','dvlength']

# using the summed DVlength (sum of dent-edge, dent-dent ... dent-dent,
# dent-edge); sum to get the additive, rather than absolute, DV length:
dvlen_type = 'SummedDVlength'
invivo_ln = pd.DataFrame(data, columns=['dentincell'])
invivo_ln['dvlength'] = data['dentEdgeL'] + data['dentEdgeR'] + (data[data.columns[10:]]).sum(axis=1, skipna=True, numeric_only=True)

invivo_ln = invivo_ln.T

In [369]:
def IndivStatTest(simdata, model, filename_out):
# IN: 3D np array, list of strings with length=arr[X,:,:] (array axis 0), name of csv file
    simdata = simdata[np.nonzero(simdata)]

    test_ks = sps.ks_2samp(invivo_d, simdata)
    # outputs [ks-statistic, p-value]
    # If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same.
    test_mwu = sps.mannwhitneyu(invivo_d[np.nonzero(invivo_d)], simdata)
    # returns [mwu-statistic, p-value];  One-sided p-value assuming a asymptotic normal distribution.
    # Use only when the number of observation in each sample is > 20 and you have 2 independent samples of ranks. Mann-Whitney U is significant if the u-obtained is LESS THAN or equal to the critical value of U.
    # This test corrects for ties and by default uses a continuity correction. The reported p-value is for a one-sided hypothesis, to get the two-sided p-value multiply the returned p-value by 2.

    with open(filename_out, 'a') as f:
        csv.writer(f).writerows([[model,  test_ks[0], test_ks[1], TestPasses(test_ks[1], 0.05), test_mwu[0], test_mwu[1], TestPasses(test_mwu[1], 0.05)]])
                      

def TestPasses(pval, cutoff):
    if pval < cutoff: 
        return 'different'
    elif pval >= cutoff: 
        return 'same'

In [458]:
# maxDent = int(max(invivo_ln.ix['dentincell']))
maxDent = 6
cells_to_simulate = 4
iteration = 0

randompositions = np.zeros((len(invivo_ln.T), (cells_to_simulate), maxDent))
randomdistances_rel = np.zeros((len(invivo_ln.T), (cells_to_simulate), (maxDent-1)))
randomdistances_abs = np.zeros((len(invivo_ln.T), (cells_to_simulate), (maxDent-1)))

# randompositions[:,0:2,0] = invivo_ln.T

info = invivo_ln.T


# generate random positions
for iteration in range(0, int(cells_to_simulate)):
    
    ittime = time.clock()
    
    positions = np.zeros((len(invivo_ln.T),maxDent))
    
    for dex, cellinfo in enumerate(invivo_ln.T.values):
        denticlenumber, celllength = cellinfo
        denticlenumber = int(denticlenumber)

        positions[dex,:denticlenumber] = np.sort(np.random.rand(int(denticlenumber)),axis=0)
    
    # sort smallest to largest
    relativedistances = positions[:,1:] - positions[:, 0:-1]
    relativedistances[relativedistances<0] = 0 
    # calculate the distance between the points
    absolutedistances =(info.dvlength.values * relativedistances.T).T

    IndivStatTest(absolutedistances.reshape(absolutedistances.size,1), 'AbsoluteDistances','MonteCarlo_absdist.csv')

    np.savetxt(genotype +'_'+str(iteration) +'_relativepositions_' + dvlen_type +'_'+ time.strftime("%Y%m%d_%H%M%S") + '_iterationtime=' + str(elapsedtime) + '.csv',positions,delimiter=',')
    np.savetxt(genotype +'_'+str(iteration) +'_'+'_relativedistances_' + dvlen_type +'_'+ time.strftime("%Y%m%d_%H%M%S") + '_iterationtime=' + str(elapsedtime) + '.csv',relativedistances,delimiter=',')
    
    
    adframe = pd.DataFrame(np.concatenate([info.values, absolutedistances], axis=1),columns=[['dentincell','celllength']+list(string.ascii_lowercase[:maxDent-1])])
    adframe['dentword'] = ad_frame.dentincell.map(lambda x: num2words.num2words(x))
    adframe = adframe.drop('dentincell',axis=1)
    adframe = adframe.replace(0,np.nan)
    adframe = adframe.set_index(['dentword','celllength'])
    adframe = adframe.sort_index(axis=0)
    adframe = adframe.stack()
    
    adframe.to_csv(genotype +'_'+ str(iteration) + '_MonteCarlo_absolute_distances_' + dvlen_type +'_'+ time.strftime("%Y%m%d_%H%M%S") + '_iterationtime=' + str(elapsedtime) + '.csv')


    
    for number in (num2words.num2words(x) for x in range(1,maxDent)):
        dentnumb = adframe[number]
        dentnumb.to_csv(genotype+'_'+number+'_absolute_distances.csv')

In [465]:
for number in (num2words.num2words(x) for x in range(1,maxDent)):
    dentnumb = adframe[number]
    dentnumb.to_csv(genotype+'_'+number+'_absolute_distances.csv')

In [466]:
dentnumb


Out[466]:
celllength   
9.67718     a    1.659946
            b    2.066415
            c    0.160126
            d    3.689835
            a    0.016149
            b    1.121398
            c    0.245377
            d    4.607272
10.21517    a    0.125799
            b    0.262658
            c    2.512803
            d    3.295522
10.41745    a    1.221223
            b    2.006036
            c    2.636347
            d    0.131533
10.51450    a    2.042553
            b    2.293802
            c    0.718075
            d    0.537345
10.53762    a    0.671986
            b    1.604401
            c    2.720298
            d    2.162862
10.96350    a    1.478867
            b    2.097462
            c    2.756109
            d    0.593485
11.06340    a    0.814446
            b    2.697442
                   ...   
17.78640    c    4.086276
            d    5.550086
18.19690    a    3.540237
            b    2.515269
            c    3.729032
            d    2.645921
18.24600    a    0.447654
            b    0.286859
            c    1.619027
            d    3.820170
18.51760    a    8.961437
            b    0.236213
            c    1.639815
            d    0.204872
18.69580    a    0.858045
            b    1.925537
            c    4.697373
            d    2.623577
18.71490    a    3.119482
            b    2.552434
            c    0.946630
            d    1.146220
19.16300    a    0.522947
            b    0.285079
            c    5.431250
            d    0.874135
23.10970    a    1.222795
            b    1.013146
            c    7.389611
            d    5.012776
dtype: float64

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [463]:
a = list(num2words.num2words(x) for x in range(1,maxDent))
a


Out[463]:
['one', 'two', 'three', 'four', 'five']

In [373]:
absolutedistances


Out[373]:
array([[ 1.31277041,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.45327546,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 5.83897091,  3.20982497,  0.        ,  0.        ,  0.        ],
       ..., 
       [ 0.19579841,  0.25255789,  0.92068801,  0.        ,  0.        ],
       [ 0.24121981,  2.13581543,  0.41587206,  0.        ,  0.        ],
       [ 0.67081088,  1.40168191,  0.        ,  0.        ,  0.        ]])

In [386]:
ad_frame = pd.DataFrame(np.concatenate([info.values, absolutedistances], axis=1),columns=[['dentincell','celllength']+list(string.ascii_lowercase[:maxDent-1])])

In [385]:
['dentincell','celllength']+list(string.ascii_lowercase[:maxDent-1])


Out[385]:
['dentincell', 'celllength', 'a', 'b', 'c', 'd', 'e']

In [ ]:


In [377]:
info.values.shape, absolutedistances.shape


Out[377]:
((3092, 2), (3092, 5))

In [424]:
ad_frame['dentword'] = ad_frame.dentincell.map(lambda x: num2words.num2words(x))

ad_frame = ad_frame.replace(0,np.nan)

ad_frame2 = ad_frame
ad_frame.columns

ad_frame2 = ad_frame2.set_index(['dentword','celllength'])

ad_frame2 = ad_frame2.sort_index(axis=0)

ad_frame2.drop('dentincell',axis=1, inplace=True)

In [425]:
ad_frame2


Out[425]:
a b c d e
dentword celllength
five 9.67718 3.828522 2.871234 0.355452 1.042493 NaN
9.67718 2.402419 3.286663 0.802451 0.735989 NaN
10.21517 0.783379 4.772773 2.165122 2.062015 NaN
10.41745 0.164591 4.594247 0.267190 4.204657 NaN
10.51450 2.864351 0.468276 0.527729 0.642132 NaN
10.53762 0.048563 2.489708 1.237831 1.047637 NaN
10.96350 3.928547 2.187833 1.664557 2.209698 NaN
11.06340 4.469184 1.530701 0.697426 1.895386 NaN
11.07281 2.645895 0.131380 2.097610 1.616655 NaN
11.22730 1.332680 0.366858 3.616972 2.443087 NaN
11.42990 0.203648 0.291427 1.244580 5.675577 NaN
11.67750 3.614705 0.289535 0.490944 1.899235 NaN
12.04870 1.940853 1.426006 1.529915 6.117114 NaN
12.09701 0.972421 2.772569 0.424236 3.407286 NaN
12.16500 1.277424 0.024706 3.179137 0.369212 NaN
12.48030 0.886150 2.022908 1.988939 5.441669 NaN
12.63780 4.041213 0.269960 1.270017 1.766618 NaN
12.76749 0.632161 2.648148 2.738735 2.512870 NaN
12.80940 1.200181 7.535931 0.638466 2.036141 NaN
12.84940 3.808293 0.514118 0.016913 4.484419 NaN
13.05080 3.090497 1.473241 1.600133 1.522388 NaN
13.06470 1.067093 0.281919 2.490658 6.141534 NaN
13.18580 1.399770 3.557556 2.664825 0.047595 NaN
13.24520 1.411969 1.769969 3.613478 3.605749 NaN
13.37472 2.033891 0.411309 1.183861 0.097300 NaN
13.38670 3.965328 1.136020 0.888077 2.995084 NaN
13.52500 1.073605 2.078142 4.446620 0.808212 NaN
13.53526 1.759331 0.725860 4.123947 3.121908 NaN
13.60707 5.029873 1.299504 0.775014 3.256749 NaN
13.63820 0.206922 3.980435 4.665208 1.818099 NaN
... ... ... ... ... ... ...
two 14.43170 5.469604 NaN NaN NaN NaN
14.47050 1.318760 NaN NaN NaN NaN
14.55290 3.972786 NaN NaN NaN NaN
14.59680 0.684392 NaN NaN NaN NaN
14.71280 10.758311 NaN NaN NaN NaN
14.78700 1.941473 NaN NaN NaN NaN
14.84730 7.239552 NaN NaN NaN NaN
15.04140 2.011138 NaN NaN NaN NaN
15.07880 0.987775 NaN NaN NaN NaN
15.13270 9.980307 NaN NaN NaN NaN
15.14630 11.445231 NaN NaN NaN NaN
15.32270 4.530592 NaN NaN NaN NaN
15.36940 4.961876 NaN NaN NaN NaN
15.37030 5.902896 NaN NaN NaN NaN
15.46460 6.914277 NaN NaN NaN NaN
15.83390 0.959342 NaN NaN NaN NaN
15.83440 11.476654 NaN NaN NaN NaN
15.88760 8.808612 NaN NaN NaN NaN
15.97420 7.856730 NaN NaN NaN NaN
16.27840 5.518533 NaN NaN NaN NaN
16.36160 0.817832 NaN NaN NaN NaN
16.93880 5.822191 NaN NaN NaN NaN
17.08130 0.503493 NaN NaN NaN NaN
17.11750 3.287683 NaN NaN NaN NaN
17.29230 14.173894 NaN NaN NaN NaN
18.26680 8.795424 NaN NaN NaN NaN
19.13400 5.052840 NaN NaN NaN NaN
19.36980 6.038775 NaN NaN NaN NaN
20.37980 11.267017 NaN NaN NaN NaN
20.53000 5.736310 NaN NaN NaN NaN

3092 rows × 5 columns


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
`

In [270]:
a = b.T

In [275]:
a['dvlength'] = info['dvlength']

In [284]:
a.columns


Out[284]:
MultiIndex(levels=[[0, 1, 2, 3, 'dvlength'], ['a', 'b', 'c', 'd', 'e', '']],
           labels=[[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4], [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5]],
           names=['major', 'minor'])

In [ ]:


In [250]:
b.ix[:6].T


Out[250]:
major 0 1
minor a b c d e a
cellnumber
0 0.684563 0.000000 0.000000 0.000000 0.000000 0.170753
1 0.712199 0.000000 0.000000 0.000000 0.000000 0.196939
2 0.379713 0.418265 0.000000 0.000000 0.000000 0.070901
3 0.151623 0.578987 0.000000 0.000000 0.000000 0.160660
4 0.398447 0.000000 0.000000 0.000000 0.000000 0.000969
5 0.246148 0.028076 0.000000 0.000000 0.000000 0.001087
6 0.594546 0.000000 0.000000 0.000000 0.000000 0.716732
7 0.111093 0.617338 0.000000 0.000000 0.000000 0.235437
8 0.668389 0.000000 0.000000 0.000000 0.000000 0.169330
9 0.520975 0.005552 0.000000 0.000000 0.000000 0.559264
10 0.170766 0.056913 0.000000 0.000000 0.000000 0.061986
11 0.057887 0.449663 0.000000 0.000000 0.000000 0.187913
12 0.279399 0.335330 0.000000 0.000000 0.000000 0.082658
13 0.012875 0.140702 0.000000 0.000000 0.000000 0.168273
14 0.821847 0.000000 0.000000 0.000000 0.000000 0.378374
15 0.112708 0.000000 0.000000 0.000000 0.000000 0.128506
16 0.333873 0.287504 0.000000 0.000000 0.000000 0.430111
17 0.018567 0.008132 0.000000 0.000000 0.000000 0.013845
18 0.464106 0.275789 0.000000 0.000000 0.000000 0.030970
19 0.188076 0.696480 0.000000 0.000000 0.000000 0.338075
20 0.640560 0.000000 0.000000 0.000000 0.000000 0.938801
21 0.130828 0.000000 0.000000 0.000000 0.000000 0.339206
22 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
23 0.257142 0.000000 0.000000 0.000000 0.000000 0.416956
24 0.828290 0.000000 0.000000 0.000000 0.000000 0.300075
25 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
26 0.178426 0.000000 0.000000 0.000000 0.000000 0.596614
27 0.266072 0.358862 0.000000 0.000000 0.000000 0.238044
28 0.035902 0.000000 0.000000 0.000000 0.000000 0.883807
29 0.110420 0.000000 0.000000 0.000000 0.000000 0.871265
... ... ... ... ... ... ...
3062 0.073644 0.190448 0.013344 0.647403 0.000000 0.062959
3063 0.208024 0.200383 0.185575 0.124452 0.178634 0.165405
3064 0.234018 0.020301 0.224565 0.084511 0.340012 0.042954
3065 0.251541 0.210658 0.028885 0.425822 0.000000 0.396402
3066 0.189661 0.518076 0.000000 0.000000 0.000000 0.393190
3067 0.508572 0.196391 0.026927 0.000000 0.000000 0.032911
3068 0.134156 0.000000 0.000000 0.000000 0.000000 0.135890
3069 0.025797 0.561563 0.258384 0.000000 0.000000 0.281705
3070 0.134498 0.491820 0.000000 0.000000 0.000000 0.053766
3071 0.017045 0.098565 0.000000 0.000000 0.000000 0.051213
3072 0.214286 0.195322 0.136441 0.000000 0.000000 0.574997
3073 0.191219 0.000000 0.000000 0.000000 0.000000 0.228372
3074 0.015754 0.415485 0.376624 0.000000 0.000000 0.324631
3075 0.871368 0.017282 0.000000 0.000000 0.000000 0.482828
3076 0.136178 0.000000 0.000000 0.000000 0.000000 0.670493
3077 0.667126 0.012222 0.000000 0.000000 0.000000 0.270011
3078 0.107080 0.276561 0.325498 0.071273 0.000000 0.204122
3079 0.182775 0.263775 0.000000 0.000000 0.000000 0.081063
3080 0.322938 0.024303 0.000000 0.000000 0.000000 0.575224
3081 0.538387 0.000000 0.000000 0.000000 0.000000 0.615988
3082 0.076175 0.589649 0.000000 0.000000 0.000000 0.227687
3083 0.199175 0.538761 0.000000 0.000000 0.000000 0.050376
3084 0.296362 0.565497 0.000000 0.000000 0.000000 0.008741
3085 0.245402 0.393580 0.000000 0.000000 0.000000 0.099847
3086 0.018483 0.197234 0.339311 0.000000 0.000000 0.027366
3087 0.509667 0.152343 0.000000 0.000000 0.000000 0.578103
3088 0.177066 0.266821 0.000000 0.000000 0.000000 0.207768
3089 0.158290 0.054368 0.296359 0.000000 0.000000 0.012362
3090 0.088087 0.423367 0.229064 0.000000 0.000000 0.347663
3091 0.413484 0.235083 0.000000 0.000000 0.000000 0.423948

3092 rows × 6 columns


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
info = invivo_ln.T
info.columns

In [ ]:
info['dicword'] = info.dentincell.map(lambda x: num2words.num2words(x))

In [ ]:
info.ix[:5]

In [ ]:
randomdistance_abs_frame['basicinfo','dentword'] = randomdistance_abs_frame.basicinfo.dentincell.map(lambda x: num2words.num2words(x))

In [ ]:
randomdistance_abs_frame.columns

In [ ]:
randomdistance_abs_frame.drop(['basicinfo','dentword'],level='B')

In [ ]:
a = randomdistance_rel_frame.ix[:5]
a

In [ ]:
a.columns

In [ ]:


In [ ]:


In [ ]:


In [ ]:
randomdistance_rel_frame.ix[:5].

In [ ]:


In [ ]:
a = pd.read_csv('yw_MonteCarlo_relative_distances_SummedDVlength20160219_180731_iterationtime=0.9710289999999979.csv')

In [ ]:
a

In [ ]:
rl = pd.read_csv('./yw_iteration0_random_distances_20160218_161159_iterationtime=23.862026999999998.csv')

In [ ]:
rl

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
maxDent = int(max(invivo_ln.ix['dentincell']))

for dentno in range(1,maxDent):

    celldata = invivo_ln[invivo_ln['dentincell']==dentno].T
    

    for dex, row in enumerate(celldata):
        celllength = celldata[row]['dvlength']
        


        positions = np.zeros(((cells * dentno), iterations))
        fname = str(dent) + '_montecarlo_positions_replicates.csv'

        for it in range(0, iterations):
            this = np.reshape(np.random.rand(cells, dent), (1, -1))
            positions[:, it] = this

        np.savetxt(fname, positions, delimiter=',')

        distances = positions[1:, :] - positions[0:-1,:]
        np.savetxt(fname2, distances, delimiter=',')

In [ ]:
invivo_ln.columns

In [ ]:
a = invivo_ln[invivo_ln['dentincell']==1]['dvlength']
a.repeat(2)

In [ ]:
cellstosim = [(2,12)] #,(3,476),(4,130)]   (1,1284),
iterations = 10

for elem in cellstosim: 
    dent, cells = elem

    positions = np.zeros(((cells*dent),iterations))
    fname = str(dent)+'_montecarlo_positions_replicates.csv'
    
    for it in range(0,iterations): 
        this = np.reshape(np.random.rand(cells, dent),(1,-1))       
        positions[:,it] = this

In [ ]:
np.random.rand(5)

In [ ]:
enumerate

In [ ]:
invivo_ln

In [ ]:
invivo_ln.T

In [ ]:
for dex, cellinfo in enumerate(invivo_ln.values):
    1

In [ ]:
a = np.zeros((5,6))
a

In [ ]:
a[0,:] = [1,1,1,1] + [0,0]

In [ ]:
def GenerateDist_Uniformrandom(celllength, denticlenumber):
    positions = np.random.rand(denticlenumber)
    positions = np.sort(positions,axis=0)
    # sort smallest to largest
    relativedistances = positions[1:] - positions[0:-1]
    absolutedistances = relativedistances * celllength
    # calculate the distance between the points

    return positions, relativedistances, absolutedistances

In [ ]:
denticlenumber

In [ ]:
invivo_ln = invivo_ln.T

In [ ]:
invivo_ln.shape

In [ ]:
invivo_ln.T.values

In [ ]:
type(cellinfo)

In [ ]:
cellinfo.shape, positions.shape, np.zeros(maxDent - denticlenumber).shape

In [ ]:
randompositions.shape

In [ ]:
np.concatenate((cellinfo, positions, np.zeros(maxDent - denticlenumber)))

In [ ]:
randompositions[:,0:2,0] = invivo_ln.T

In [ ]:
randompositionpanel #.T.index

In [ ]:
# maxDent = int(max(invivo_ln.ix['dentincell']))
maxDent = 6
cells_to_simulate = 1
iteration = 0

randompositions = np.zeros((len(invivo_ln.T), (cells_to_simulate), maxDent))
randomdistances_rel = np.zeros((len(invivo_ln.T), (cells_to_simulate), (maxDent-1)))
randomdistances_abs = np.zeros((len(invivo_ln.T), (cells_to_simulate), (maxDent-1)))

# randompositions[:,0:2,0] = invivo_ln.T

# generate random positions
for iteration in range(0, int(cells_to_simulate)):
    
    ittime = time.clock()
    
    for dex, cellinfo in enumerate(invivo_ln.T.values):
        denticlenumber, cellsize = cellinfo
        denticlenumber = int(denticlenumber)

        positions = np.sort(np.random.rand(int(denticlenumber)),axis=0)
        # sort smallest to largest
        relativedistances = positions[1:] - positions[0:-1]
        absolutedistances = relativedistances * celllength
        # calculate the distance between the points


        randompositions[dex, iteration, :] = np.concatenate((positions, np.zeros(maxDent - denticlenumber)))
        randomdistances_rel[dex, iteration, :] = np.concatenate((relativedistances, np.zeros(maxDent - denticlenumber)))
        randomdistances_abs[dex, iteration, :] = np.concatenate((absolutedistances, np.zeros(maxDent - denticlenumber)))

    randompositionpanel = pd.Panel(randompositions,
                                   minor_axis=[list(string.ascii_lowercase[:maxDent])]).to_frame()
    randomdist_relpanel = pd.Panel(randomdistances_rel,
                                   minor_axis=[list(string.ascii_lowercase[:maxDent-1])]).to_frame()
    randomdist_abspanel = pd.Panel(randomdistances_abs,
                                   minor_axis=[list(string.ascii_lowercase[:maxDent-1])]).to_frame()
    
    info = invivo_ln.T
    info.columns = [['basicinfo','basicinfo'], ['dentincell', 'dvlength']]
    info.index = randompositionpanel.columns
    
    randompositionframe = pd.concat([info.T,randompositionpanel]).T
    randomdistance_rel_frame = pd.concat([info.T,randomdist_relpanel]).T
    randomdistance_abs_frame = pd.concat([info.T, randomdist_abspanel]).T
    
    elapsedtime = time.clock() - ittime
    
    randompositionframe.to_csv(genotype + '_' + str(iteration) + '_MonteCarlo_positions_' + dvlen_type + time.strftime("%Y%m%d_%H%M%S") + '_iterationtime=' + str(elapsedtime) + '.csv')
    randomdistance_rel_frame.to_csv(genotype + '_' + str(iteration) + '_MonteCarlo_relative_distances_' + dvlen_type + time.strftime("%Y%m%d_%H%M%S") + '_iterationtime=' + str(elapsedtime) + '.csv')
    randomdistance_abs_frame.to_csv(genotype + '_' + str(iteration) + '_MonteCarlo_absolute_distances_' + dvlen_type + time.strftime("%Y%m%d_%H%M%S") + '_iterationtime=' + str(elapsedtime) + '.csv')

In [ ]:
randomdistance_abs_frame

In [ ]:


In [ ]:


In [ ]:
a = randompositionpanel.to_frame()

In [ ]:
a = a.replace(0,np.nan)
b = a.T
b

# b['basicinfo']['dentnumber','celllength'] = invivo_ln.T
b.ix[:5]

In [ ]:
b.columns.names

In [ ]:
d = invivo_ln.T

In [ ]:
d.shape
d.ix[:5]

In [ ]:
d.columns

In [ ]:
d = invivo_ln.T
d.columns = [['basicinfo','basicinfo'], ['dentincell', 'dvlength']]
d.index = b.index
f = pd.concat([b.T,d.T])
f.T

In [ ]:
d.columns = [['basicinfo','basicinfo'], ['dentincell', 'dvlength']]
d[:5]

In [ ]:
d.columns.names = b.columns.names

In [ ]:
d.shape

In [ ]:
b.shape

In [ ]:
d.ix[:5]

In [ ]:
b.index

In [ ]:
d.index = b.index

In [ ]:
f = pd.concat([b.T,d.T])
f.T

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: