In [1]:
import scipy as sp
from scipy.stats import chisquare
from scipy.stats import binom_test

In [2]:
import pandas as pd
import numpy as np

In [4]:
rts_colony = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Bishayee Colony Counts 10.27.97-3.8.01.csv',
                         skiprows=2,usecols=range(3,7),na_values=' ')
rts_colony['col_min'] = rts_colony.apply(lambda row: min(row['col1'],row['col2'],row['col3']) ,axis=1)
rts_colony['col_max'] = rts_colony.apply(lambda row: max(row['col1'],row['col2'],row['col3']) ,axis=1)
rts_colony['col_gap'] = rts_colony['col_max']-rts_colony['col_min']
rts_colony['has_na'] = ((pd.isnull(rts_colony['col1'])) | (pd.isnull(rts_colony['col2'])) | (pd.isnull(rts_colony['col3'])))

total = np.sum(rts_colony['has_na'] == False)
complete = np.sum((rts_colony['has_na'] == False) & (rts_colony['col_gap'] >= 2))
no_mean = sum(((rts_colony['col1'] == round(rts_colony['average'])) | (rts_colony['col2'] == round(rts_colony['average'])) | (rts_colony['col3'] == round(rts_colony['average']))) & (rts_colony['col_gap'] >= 2) & (rts_colony['has_na'] == False))
print(complete, total, no_mean)


1343 1361 690

In [5]:
rts_coulter = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Bishayee Coulter Counts.10.20.97-7.16.01.csv', 
                          skiprows=1,usecols=range(2,6),na_values=' ')

rts_coulter['col_min'] = rts_coulter.apply(lambda row: min(row['Count 1'],row['Count 2'],row['Count 3']) ,axis=1)
rts_coulter['col_max'] = rts_coulter.apply(lambda row: max(row['Count 1'],row['Count 2'],row['Count 3']) ,axis=1)
rts_coulter['col_gap'] = rts_coulter['col_max']-rts_coulter['col_min']
rts_coulter['has_na'] = ((pd.isnull(rts_coulter['Count 1'])) | (pd.isnull(rts_coulter['Count 2'])) | (pd.isnull(rts_coulter['Count 3'])))

total = np.sum(rts_coulter['has_na'] == False)
complete = np.sum((rts_coulter['has_na'] == False) & (rts_coulter['col_gap'] >= 2))
no_mean = sum(((rts_coulter['Count 1'] == round(rts_coulter['Average'])) | (rts_coulter['Count 2'] == round(rts_coulter['Average'])) | (rts_coulter['Count 3'] == round(rts_coulter['Average']))) & (rts_coulter['col_gap'] >= 2) & (rts_coulter['has_na'] == False))
print(complete, total, no_mean)


1726 1727 176

In [6]:
others_colony = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Other Investigators in Lab.Colony Counts.4.23.92-11.27.02.csv', 
                          skiprows=1,usecols=range(3,7),na_values=' ')
others_colony['col_min'] = others_colony.apply(lambda row: min(row['col1'],row['col2'],row['col3']) ,axis=1)
others_colony['col_max'] = others_colony.apply(lambda row: max(row['col1'],row['col2'],row['col3']) ,axis=1)
others_colony['col_gap'] = others_colony['col_max'] - others_colony['col_min']
others_colony['has_na'] = ((pd.isnull(others_colony['col1'])) | (pd.isnull(others_colony['col2'])) | (pd.isnull(others_colony['col3'])))

total = np.sum(others_colony['has_na'] == False)
complete = np.sum((others_colony['has_na'] == False) & (others_colony['col_gap'] >= 2))
no_mean = sum(((others_colony['col1'] == round(others_colony['average'])) | (others_colony['col2'] == round(others_colony['average'])) | (others_colony['col3'] == round(others_colony['average']))) & (others_colony['col_gap'] >= 2) & (others_colony['has_na'] == False))
print(complete, total, no_mean)


578 597 109

In [7]:
others_coulter = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Other Investigators in Lab.Coulter Counts.4.15.92-5.21.05.csv', 
                          skiprows=1,na_values=' ')
others_coulter['col_min'] = others_coulter.apply(lambda row: min(row['Coul 1'],row['Coul 2'],row['Coul 3']) ,axis=1)
others_coulter['col_max'] = others_coulter.apply(lambda row: max(row['Coul 1'],row['Coul 2'],row['Coul 3']) ,axis=1)
others_coulter['col_gap'] = others_coulter['col_max'] - others_coulter['col_min']
others_coulter['has_na'] = ((pd.isnull(others_coulter['Coul 1'])) | (pd.isnull(others_coulter['Coul 2'])) | (pd.isnull(others_coulter['Coul 3'])))

total = np.sum(others_coulter['has_na'] == False)
complete = np.sum((others_coulter['has_na'] == False) & (others_coulter['col_gap'] >= 2))
no_mean = sum(((others_coulter['Coul 1'] == round(others_coulter['Average'])) | (others_coulter['Coul 2'] == round(others_coulter['Average'])) | (others_coulter['Coul 3'] == round(others_coulter['Average']))) & (others_coulter['col_gap'] >= 2) & (others_coulter['has_na'] == False))
print(complete, total, no_mean)


929 929 36

In [8]:
Lab1_colony = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Outside Lab 1.Coulter Counts.6.7.91-4.9.99.csv', 
                          na_values=' ')
Lab1_colony['col_min'] = Lab1_colony.apply(lambda row: min(row['Unnamed: 1'],row['Unnamed: 2'],row['Unnamed: 3']) ,axis=1)
Lab1_colony['col_max'] = Lab1_colony.apply(lambda row: max(row['Unnamed: 1'],row['Unnamed: 2'],row['Unnamed: 3']) ,axis=1)
Lab1_colony['col_gap'] = Lab1_colony['col_max'] - Lab1_colony['col_min']
Lab1_colony['has_na'] = ((pd.isnull(Lab1_colony['Unnamed: 1'])) | (pd.isnull(Lab1_colony['Unnamed: 2'])) | (pd.isnull(Lab1_colony['Unnamed: 3'])))

total = np.sum(Lab1_colony['has_na'] == False)
complete = np.sum((Lab1_colony['has_na'] == False) & (Lab1_colony['col_gap'] >= 2))
no_mean = sum(((Lab1_colony['Unnamed: 1'] == round(Lab1_colony['Unnamed: 4'])) | (Lab1_colony['Unnamed: 2'] == round(Lab1_colony['Unnamed: 4'])) | (Lab1_colony['Unnamed: 3'] == round(Lab1_colony['Unnamed: 4']))) & (Lab1_colony['col_gap'] >= 2) & (Lab1_colony['has_na'] == False))
print(complete, total, no_mean)


97 97 0

In [9]:
Lab2_colony = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Outside Lab 2.Coulter Counts.6.6.08-7.7.08.csv', 
                          na_values=' ',skiprows=1)
Lab2_colony['col_min'] = Lab2_colony.apply(lambda row: min(row['Count 1'],row['Count 2'],row['Count 3']) ,axis=1)
Lab2_colony['col_max'] = Lab2_colony.apply(lambda row: max(row['Count 1'],row['Count 2'],row['Count 3']) ,axis=1)
Lab2_colony['col_gap'] = Lab2_colony['col_max'] - Lab2_colony['col_min']
Lab2_colony['has_na'] = ((pd.isnull(Lab2_colony['Count 1'])) | (pd.isnull(Lab2_colony['Count 2'])) | (pd.isnull(Lab2_colony['Count 3'])))

total = np.sum(Lab2_colony['has_na'] == False)
complete = np.sum((Lab2_colony['has_na'] == False) & (Lab2_colony['col_gap'] >= 2))
no_mean = sum(((Lab2_colony['Count 1'] == round(Lab2_colony['Average'])) | (Lab2_colony['Count 2'] == round(Lab2_colony['Average'])) | (Lab2_colony['Count 3'] == round(Lab2_colony['Average']))) & (Lab2_colony['col_gap'] >= 2) & (Lab2_colony['has_na'] == False))
print(complete, total, no_mean)


120 120 1

In [10]:
Lab3_colony = pd.read_csv('C:/Users/courtney/Documents/Schoolwork/Stat_215a/Outside Lab 3.Colony Counts.2.4.10-5.21.12.csv', 
                          na_values=' ',skiprows=1)

Lab3_colony['col_min'] = Lab3_colony.apply(lambda row: min(row['c1'],row['c2'],row['c3']) ,axis=1)
Lab3_colony['col_max'] = Lab3_colony.apply(lambda row: max(row['c1'],row['c2'],row['c3']) ,axis=1)
Lab3_colony['col_gap'] = Lab3_colony['col_max'] - Lab3_colony['col_min']
Lab3_colony['has_na'] = ((pd.isnull(Lab3_colony['c1'])) | (pd.isnull(Lab3_colony['c2'])) | (pd.isnull(Lab3_colony['c3'])))

total = np.sum(Lab3_colony['has_na'] == False)
complete = np.sum((Lab3_colony['has_na'] == False) & (Lab3_colony['col_gap'] >= 2))
no_mean = sum(((Lab3_colony['c1'] == round(Lab3_colony['average'])) | (Lab3_colony['c2'] == round(Lab3_colony['average'])) | (Lab3_colony['c3'] == round(Lab3_colony['average']))) & (Lab3_colony['col_gap'] >= 2) & (Lab3_colony['has_na'] == False))
print(complete, total, no_mean)


49 50 3

In [11]:
# Calculate the p-value for Hypothesis 1. For now I just used the numbers from their paper for x and p
H1_pvalue = binom_test(x=690,n=1343,p=0.42,alternative='greater')
print(H1_pvalue)


2.85336516548e-12

In [12]:
# re-creat Table 3 in the paper, RTS COULTER
rts_coulter_col1_terminal = rts_coulter['Count 1']
rts_coulter_col1_terminal = rts_coulter_col1_terminal[pd.notnull(rts_coulter_col1_terminal)]
rts_coulter_col1_terminal= rts_coulter_col1_terminal.astype(str).str[-1:].astype(int)
rts_coulter_col2_terminal = rts_coulter['Count 2']
rts_coulter_col2_terminal = rts_coulter_col2_terminal[pd.notnull(rts_coulter_col2_terminal)]
rts_coulter_col2_terminal = rts_coulter_col2_terminal.astype(str).str[-1:].astype(int)
rts_coulter_col3_terminal = rts_coulter['Count 3']
rts_coulter_col3_terminal = rts_coulter_col3_terminal[pd.notnull(rts_coulter_col3_terminal)]
rts_coulter_col3_terminal = rts_coulter_col3_terminal.astype(str).str[:-2].str[-1:].astype(int)
rts_coulter_terminal= pd.concat([rts_coulter_col1_terminal,rts_coulter_col2_terminal,rts_coulter_col3_terminal])
chi_pvalue_rts_coulter = chisquare(f_obs=rts_coulter_terminal.value_counts() )
print(chi_pvalue_rts_coulter,rts_coulter_terminal.value_counts(),len(rts_coulter_terminal))


Power_divergenceResult(statistic=466.87463837994221, pvalue=7.0622726934816284e-95) 2    736
5    732
9    718
1    613
0    475
7    425
3    416
8    372
6    363
4    335
dtype: int64 5185

In [13]:
# re-creat Table 3 in the paper, RTS COLONY


rts_colony_col1_terminal = rts_colony['col1']
rts_colony_col1_terminal = rts_colony_col1_terminal[pd.notnull(rts_colony_col1_terminal)]
rts_colony_col1_terminal = rts_colony_col1_terminal.astype(str).str[-1:].astype(int)
rts_colony_col2_terminal = rts_colony['col2']
rts_colony_col2_terminal = rts_colony_col2_terminal[pd.notnull(rts_colony_col2_terminal)]
rts_colony_col2_terminal = rts_colony_col2_terminal.astype(str).str[-1:].astype(int)
rts_colony_col3_terminal = rts_colony['col3']
rts_colony_col3_terminal = rts_colony_col3_terminal[pd.notnull(rts_colony_col3_terminal)]
rts_colony_col3_terminal = rts_colony_col3_terminal.astype(str).str[:-2].str[-1:].astype(int)

rts_colony_terminal= pd.concat([rts_colony_col1_terminal,rts_colony_col2_terminal,rts_colony_col3_terminal])

rts_colony_terminal= pd.concat([rts_colony_col1_terminal,rts_colony_col2_terminal,rts_colony_col3_terminal])
chi_pvalue_rts_colony = chisquare(f_obs=rts_colony_terminal.value_counts())
print(chi_pvalue_rts_colony,rts_colony_terminal.value_counts())
len(rts_colony_terminal)


Power_divergenceResult(statistic=200.72582619339045, pvalue=2.3337800143638883e-38) 0    564
9    526
5    478
2    463
7    408
8    383
6    336
1    324
3    313
4    290
dtype: int64
Out[13]:
4085

In [14]:
# re-creat Table 3 in the paper, OTHERS COLONY

others_colony_col1_terminal = others_colony['col1']
others_colony_col1_terminal = others_colony_col1_terminal[pd.notnull(others_colony_col1_terminal)]
others_colony_col1_terminal = others_colony_col1_terminal.astype(str).str[-1:].astype(int)
others_colony_col2_terminal = others_colony['col2']
others_colony_col2_terminal = others_colony_col2_terminal[pd.notnull(others_colony_col2_terminal)]
others_colony_col2_terminal = others_colony_col2_terminal.astype(str).str[:-2].str[-1:].astype(int)
others_colony_col3_terminal = others_colony['col3']
others_colony_col3_terminal = others_colony_col3_terminal[pd.notnull(others_colony_col3_terminal)]
others_colony_col3_terminal = others_colony_col3_terminal.astype(str).str[:-2].str[-1:].astype(int)

others_colony_terminal= pd.concat([others_colony_col1_terminal,others_colony_col2_terminal,others_colony_col3_terminal])
chi_pvalue_others_colony = chisquare(f_obs=others_colony_terminal.value_counts())
print(chi_pvalue_others_colony,others_colony_terminal.value_counts(),len(others_colony_terminal))


Power_divergenceResult(statistic=1.7906215921483095, pvalue=0.99436253377195383) 2    195
0    191
8    185
7    185
4    184
9    181
1    181
3    179
6    178
5    175
dtype: int64 1834

In [ ]:
# re-creat Table 3 in the paper, OTHERS COULTER

others_coulter_col1_terminal = others_coulter['Coul 1']
others_coulter_col1_terminal = others_coulter_col1_terminal[pd.notnull(others_coulter_col1_terminal)]
others_coulter_col1_terminal = others_coulter_col1_terminal.astype(str).str[:-2].str[-1:].astype(int)
others_coulter_col2_terminal = others_coulter['Coul 2']
others_coulter_col2_terminal = others_coulter_col2_terminal[pd.notnull(others_coulter_col2_terminal)]
others_coulter_col2_terminal = others_coulter_col2_terminal.astype(str).str[:-2].str[-1:].astype(int)
others_coulter_col3_terminal = others_coulter['Coul 3']
others_coulter_col3_terminal = others_coulter_col3_terminal[pd.notnull(others_coulter_col3_terminal)]
others_coulter_col3_terminal = others_coulter_col3_terminal.astype(str).str[:-2].str[-1:].astype(int)

others_coulter_terminal= pd.concat([others_coulter_col1_terminal,others_coulter_col2_terminal,others_coulter_col3_terminal])
chi_pvalue_others_coulter = chisquare(f_obs=others_coulter_terminal.value_counts())
print(chi_pvalue_others_coulter,others_coulter_terminal.value_counts(),len(others_coulter_terminal))

In [38]:
# re-creat Table 3 in the paper, Lab 1 Colony

Lab1_colony_col1_terminal = Lab1_colony['Unnamed: 1']
Lab1_colony_col1_terminal = Lab1_colony_col1_terminal[pd.notnull(Lab1_colony_col1_terminal)]
Lab1_colony_col1_terminal = Lab1_colony_col1_terminal.astype(str).str[-1:].astype(int)
Lab1_colony_col2_terminal = Lab1_colony['Unnamed: 2']
Lab1_colony_col2_terminal = Lab1_colony_col2_terminal[pd.notnull(Lab1_colony_col2_terminal)]
Lab1_colony_col2_terminal = Lab1_colony_col2_terminal.astype(str).str[-1:].astype(int)
Lab1_colony_col3_terminal = Lab1_colony['Unnamed: 3']
Lab1_colony_col3_terminal = Lab1_colony_col3_terminal[pd.notnull(Lab1_colony_col3_terminal)]
Lab1_colony_col3_terminal = Lab1_colony_col3_terminal.astype(str).str[:-2].str[-1:].astype(int)

Lab1_colony_terminal= pd.concat([Lab1_colony_col1_terminal,Lab1_colony_col2_terminal,Lab1_colony_col3_terminal])
chi_pvalue_Lab1_colony = chisquare(f_obs=Lab1_colony_terminal.value_counts())
print(chi_pvalue_Lab1_colony,Lab1_colony_terminal.value_counts(),len(Lab1_colony_terminal))


Power_divergenceResult(statistic=9.4761904761904763, pvalue=0.39452725669582323) 6    44
5    36
1    34
9    33
7    33
2    29
0    28
4    27
8    26
3    25
dtype: int64 315

In [44]:
# re-creat Table 3 in the paper, Lab 2 Colony

Lab2_colony_col1_terminal = Lab2_colony['Count 1']
Lab2_colony_col1_terminal = Lab2_colony_col1_terminal[pd.notnull(Lab2_colony_col1_terminal)]
Lab2_colony_col1_terminal = Lab2_colony_col1_terminal.astype(str).str[-1:].astype(int)
Lab2_colony_col2_terminal = Lab2_colony['Count 2']
Lab2_colony_col2_terminal = Lab2_colony_col2_terminal[pd.notnull(Lab2_colony_col2_terminal)]
Lab2_colony_col2_terminal = Lab2_colony_col2_terminal.astype(str).str[-1:].astype(int)
Lab2_colony_col3_terminal = Lab2_colony['Count 3']
Lab2_colony_col3_terminal = Lab2_colony_col3_terminal[pd.notnull(Lab2_colony_col3_terminal)]
Lab2_colony_col3_terminal = Lab2_colony_col3_terminal.astype(str).str[-1:].astype(int)

Lab2_colony_terminal= pd.concat([Lab2_colony_col1_terminal,Lab2_colony_col2_terminal,Lab2_colony_col3_terminal])
chi_pvalue_Lab2_colony = chisquare(f_obs=Lab2_colony_terminal.value_counts())
print(chi_pvalue_Lab2_colony,Lab2_colony_terminal.value_counts(),len(Lab2_colony_terminal))


Power_divergenceResult(statistic=4.9444444444444446, pvalue=0.83912402419536658) 2    45
5    42
1    38
8    35
7    35
3    35
0    34
9    33
4    32
6    31
dtype: int64 360

In [49]:
# re-creat Table 3 in the paper, Lab 3 Colony

Lab3_colony_col1_terminal = Lab3_colony['c1']
Lab3_colony_col1_terminal = Lab3_colony_col1_terminal[pd.notnull(Lab3_colony_col1_terminal)]
Lab3_colony_col1_terminal = Lab3_colony_col1_terminal.astype(str).str[-1:].astype(int)
Lab3_colony_col2_terminal = Lab3_colony['c2']
Lab3_colony_col2_terminal = Lab3_colony_col2_terminal[pd.notnull(Lab3_colony_col2_terminal)]
Lab3_colony_col2_terminal = Lab3_colony_col2_terminal.astype(str).str[-1:].astype(int)
Lab3_colony_col3_terminal = Lab3_colony['c3']
Lab3_colony_col3_terminal = Lab3_colony_col3_terminal[pd.notnull(Lab3_colony_col3_terminal)]
Lab3_colony_col3_terminal = Lab3_colony_col3_terminal.astype(str).str[-1:].astype(int)

Lab3_colony_terminal= pd.concat([Lab3_colony_col1_terminal,Lab3_colony_col2_terminal,Lab3_colony_col3_terminal])
chi_pvalue_Lab3_colony = chisquare(f_obs=Lab3_colony_terminal.value_counts())
print(chi_pvalue_Lab3_colony,Lab3_colony_terminal.value_counts(),len(Lab2_colony_terminal))


Power_divergenceResult(statistic=12.133333333333333, pvalue=0.20589657510344794) 0    21
7    19
5    19
4    19
3    16
2    15
9    12
8    11
6     9
1     9
dtype: int64 360

In [29]:
## Binomial test for last two digits in RTS coulter data
rts_coulter_col1_terminal = rts_coulter['Count 1']
rts_coulter_col1_terminal = rts_coulter_col1_terminal[pd.notnull(rts_coulter_col1_terminal)]
rts_coulter_col1_terminal2= rts_coulter_col1_terminal.astype(str).str[-2:].astype(int)
x1 = np.sum((rts_coulter_col1_terminal2 == 00) | (rts_coulter_col1_terminal2 == 11) | (rts_coulter_col1_terminal2 == 22) | (rts_coulter_col1_terminal2 == 33) | (rts_coulter_col1_terminal2 == 44) | (rts_coulter_col1_terminal2 == 55) | (rts_coulter_col1_terminal2 == 66) | (rts_coulter_col1_terminal2 == 77) | (rts_coulter_col1_terminal2 == 88) | (rts_coulter_col1_terminal2 == 99))
print(x1)

rts_coulter_col2_terminal = rts_coulter['Count 2']
rts_coulter_col2_terminal = rts_coulter_col2_terminal[pd.notnull(rts_coulter_col2_terminal)]
rts_coulter_col2_terminal2= rts_coulter_col2_terminal.astype(str).str[-2:].astype(int)
x2 = np.sum((rts_coulter_col2_terminal2 == 00) | (rts_coulter_col2_terminal2 == 11) | (rts_coulter_col2_terminal2 == 22) | (rts_coulter_col2_terminal2 == 33) | (rts_coulter_col2_terminal2 == 44) | (rts_coulter_col2_terminal2 == 55) | (rts_coulter_col2_terminal2 == 66) | (rts_coulter_col2_terminal2 == 77) | (rts_coulter_col2_terminal2 == 88) | (rts_coulter_col2_terminal2 == 99))
print(x2)

rts_coulter_col3_terminal = rts_coulter['Count 3']
rts_coulter_col3_terminal = rts_coulter_col3_terminal[pd.notnull(rts_coulter_col3_terminal)]
rts_coulter_col3_terminal2 = rts_coulter_col3_terminal.astype(str).str[:-2].str[-2:].astype(int)
x3 = np.sum((rts_coulter_col3_terminal2 == 00) | (rts_coulter_col3_terminal2 == 11) | (rts_coulter_col3_terminal2 == 22) | (rts_coulter_col3_terminal2 == 33) | (rts_coulter_col3_terminal2 == 44) | (rts_coulter_col3_terminal2 == 55) | (rts_coulter_col3_terminal2 == 66) | (rts_coulter_col3_terminal2 == 77) | (rts_coulter_col3_terminal2 == 88) | (rts_coulter_col3_terminal2 == 99))
print(x3)

myx = (x1+x2+x3)
myn = (len(rts_coulter_col1_terminal2) + len(rts_coulter_col2_terminal2) + len(rts_coulter_col3_terminal2))
print(myn,myx)


229
222
193
5185 644

In [16]:
Terminal2_pvalue = binom_test(x=myx,n=myn,p=0.1,alternative='greater')
print(Terminal2_pvalue)


1.04266591861e-08

In [27]:
## Binomial test for last two digits in Others Coulter data
others_coulter_col1_terminal = others_coulter['Coul 1']
others_coulter_col1_terminal = others_coulter_col1_terminal[pd.notnull(others_coulter_col1_terminal)]
others_coulter_col1_terminal2= others_coulter_col1_terminal.astype(str).str[:-2].str[-2:].astype(int)
x1 = np.sum((others_coulter_col1_terminal2 == 00) | (others_coulter_col1_terminal2 == 11) | (others_coulter_col1_terminal2 == 22) | (others_coulter_col1_terminal2 == 33) | (others_coulter_col1_terminal2 == 44) | (others_coulter_col1_terminal2 == 55) | (others_coulter_col1_terminal2 == 66) | (others_coulter_col1_terminal2 == 77) | (others_coulter_col1_terminal2 == 88) | (others_coulter_col1_terminal2 == 99))
print(x1)

others_coulter_col2_terminal = others_coulter['Coul 2']
others_coulter_col2_terminal = others_coulter_col2_terminal[pd.notnull(others_coulter_col2_terminal)]
others_coulter_col2_terminal2= others_coulter_col2_terminal.astype(str).str[:-2].str[-2:].astype(int)
x2 = np.sum((others_coulter_col2_terminal2 == 00) | (others_coulter_col2_terminal2 == 11) | (others_coulter_col2_terminal2 == 22) | (others_coulter_col2_terminal2 == 33) | (others_coulter_col2_terminal2 == 44) | (others_coulter_col2_terminal2 == 55) | (others_coulter_col2_terminal2 == 66) | (others_coulter_col2_terminal2 == 77) | (others_coulter_col2_terminal2 == 88) | (others_coulter_col2_terminal2 == 99))
print(x2)

others_coulter_col3_terminal = others_coulter['Coul 3']
others_coulter_col3_terminal = others_coulter_col3_terminal[pd.notnull(others_coulter_col3_terminal)]
others_coulter_col3_terminal2 = others_coulter_col3_terminal.astype(str).str[:-2].str[-2:].astype(int)
x3 = np.sum((others_coulter_col3_terminal2 == 00) | (others_coulter_col3_terminal2 == 11) | (others_coulter_col3_terminal2 == 22) | (others_coulter_col3_terminal2 == 33) | (others_coulter_col3_terminal2 == 44) | (others_coulter_col3_terminal2 == 55) | (others_coulter_col3_terminal2 == 66) | (others_coulter_col3_terminal2 == 77) | (others_coulter_col3_terminal2 == 88) | (others_coulter_col3_terminal2 == 99))
print(x3)


myx = (x1+x2+x3)
myn = (len(others_coulter_col1_terminal2) + len(others_coulter_col2_terminal2) + len(others_coulter_col3_terminal2))
print(myn,myx)


96
98
97
2942 291

In [28]:
Terminal2_pvalue_others_coulter = binom_test(x=myx,n=myn,p=0.1,alternative='greater')
print(Terminal2_pvalue_others_coulter)


0.586904796156

In [ ]: