notebook.community

Edit and run



In [1]:

    
import statsmodels.stats.power as pow
import pandas as pan
import math as math

First import the csv file with all the estimated Cohen's d



In [2]:

    
data = pan.read_csv('/home/pbellec/Desktop/adnet/adnet_dcohen_effect_size.csv')
data

Now use statsmodel to convert the $d$ estimates into required sample size, for group comparisons with balanced group composition, a sensitivity of $80\%$ and a false-positive rate $\alpha$ of $5\%$.



In [6]:

    
help(pow.TTestIndPower)









    



Help on class TTestIndPower in module statsmodels.stats.power:

class TTestIndPower(Power)
 |  Statistical Power calculations for t-test for two independent sample
 |  
 |  currently only uses pooled variance
 |  
 |  Method resolution order:
 |      TTestIndPower
 |      Power
 |      __builtin__.object
 |  
 |  Methods defined here:
 |  
 |  power(self, effect_size, nobs1, alpha, ratio=1, df=None, alternative='two-sided')
 |      Calculate the power of a t-test for two independent sample
 |      
 |      Parameters
 |      ----------
 |      effect_size : float
 |          standardized effect size, difference between the two means divided
 |          by the standard deviation. `effect_size` has to be positive.
 |      nobs1 : int or float
 |          number of observations of sample 1. The number of observations of
 |          sample two is ratio times the size of sample 1,
 |          i.e. ``nobs2 = nobs1 * ratio``
 |      alpha : float in interval (0,1)
 |          significance level, e.g. 0.05, is the probability of a type I
 |          error, that is wrong rejections if the Null Hypothesis is true.
 |      ratio : float
 |          ratio of the number of observations in sample 2 relative to
 |          sample 1. see description of nobs1
 |          The default for ratio is 1; to solve for ratio given the other
 |          arguments, it has to be explicitly set to None.
 |      df : int or float
 |          degrees of freedom. By default this is None, and the df from the
 |          ttest with pooled variance is used, ``df = (nobs1 - 1 + nobs2 - 1)``
 |      alternative : string, 'two-sided' (default), 'larger', 'smaller'
 |          extra argument to choose whether the power is calculated for a
 |          two-sided (default) or one sided test. The one-sided test can be
 |          either 'larger', 'smaller'.
 |      
 |      Returns
 |      -------
 |      power : float
 |          Power of the test, e.g. 0.8, is one minus the probability of a
 |          type II error. Power is the probability that the test correctly
 |          rejects the Null Hypothesis if the Alternative Hypothesis is true.
 |  
 |  solve_power(self, effect_size=None, nobs1=None, alpha=None, power=None, ratio=1.0, alternative='two-sided')
 |      solve for any one parameter of the power of a two sample t-test
 |      
 |      for t-test the keywords are:
 |          effect_size, nobs1, alpha, power, ratio
 |      
 |      exactly one needs to be ``None``, all others need numeric values
 |      
 |      Parameters
 |      ----------
 |      effect_size : float
 |          standardized effect size, difference between the two means divided
 |          by the standard deviation. `effect_size` has to be positive.
 |      nobs1 : int or float
 |          number of observations of sample 1. The number of observations of
 |          sample two is ratio times the size of sample 1,
 |          i.e. ``nobs2 = nobs1 * ratio``
 |      alpha : float in interval (0,1)
 |          significance level, e.g. 0.05, is the probability of a type I
 |          error, that is wrong rejections if the Null Hypothesis is true.
 |      power : float in interval (0,1)
 |          power of the test, e.g. 0.8, is one minus the probability of a
 |          type II error. Power is the probability that the test correctly
 |          rejects the Null Hypothesis if the Alternative Hypothesis is true.
 |      ratio : float
 |          ratio of the number of observations in sample 2 relative to
 |          sample 1. see description of nobs1
 |          The default for ratio is 1; to solve for ratio given the other
 |          arguments it has to be explicitly set to None.
 |      alternative : string, 'two-sided' (default), 'larger', 'smaller'
 |          extra argument to choose whether the power is calculated for a
 |          two-sided (default) or one sided test. The one-sided test can be
 |          either 'larger', 'smaller'.
 |      
 |      Returns
 |      -------
 |      value : float
 |          The value of the parameter that was set to None in the call. The
 |          value solves the power equation given the remaining parameters.
 |      
 |      
 |      Notes
 |      -----
 |      The function uses scipy.optimize for finding the value that satisfies
 |      the power equation. It first uses ``brentq`` with a prior search for
 |      bounds. If this fails to find a root, ``fsolve`` is used. If ``fsolve``
 |      also fails, then, for ``alpha``, ``power`` and ``effect_size``,
 |      ``brentq`` with fixed bounds is used. However, there can still be cases
 |      where this fails.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from Power:
 |  
 |  __init__(self, **kwds)
 |  
 |  plot_power(self, dep_var='nobs', nobs=None, effect_size=None, alpha=0.05, ax=None, title=None, plt_kwds=None, **kwds)
 |      plot power with number of observations or effect size on x-axis
 |      
 |      Parameters
 |      ----------
 |      dep_var : string in ['nobs', 'effect_size', 'alpha']
 |          This specifies which variable is used for the horizontal axis.
 |          If dep_var='nobs' (default), then one curve is created for each
 |          value of ``effect_size``. If dep_var='effect_size' or alpha, then
 |          one curve is created for each value of ``nobs``.
 |      nobs : scalar or array_like
 |          specifies the values of the number of observations in the plot
 |      effect_size : scalar or array_like
 |          specifies the values of the effect_size in the plot
 |      alpha : float or array_like
 |          The significance level (type I error) used in the power
 |          calculation. Can only be more than a scalar, if ``dep_var='alpha'``
 |      ax : None or axis instance
 |          If ax is None, than a matplotlib figure is created. If ax is a
 |          matplotlib axis instance, then it is reused, and the plot elements
 |          are created with it.
 |      title : string
 |          title for the axis. Use an empty string, ``''``, to avoid a title.
 |      plt_kwds : None or dict
 |          not used yet
 |      kwds : optional keywords for power function
 |          These remaining keyword arguments are used as arguments to the
 |          power function. Many power function support ``alternative`` as a
 |          keyword argument, two-sample test support ``ratio``.
 |      
 |      Returns
 |      -------
 |      fig : matplotlib figure instance
 |      
 |      Notes
 |      -----
 |      This works only for classes where the ``power`` method has
 |      ``effect_size``, ``nobs`` and ``alpha`` as the first three arguments.
 |      If the second argument is ``nobs1``, then the number of observations
 |      in the plot are those for the first sample.
 |      TODO: fix this for FTestPower and GofChisquarePower
 |      
 |      TODO: maybe add line variable, if we want more than nobs and effectsize
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from Power:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)



In [9]:

    
list_site = [ 'pooled' , 'ADNI2' , 'CRIUGMa' , 'CRIUGMb' , 'MNI' ]
ssize = data.copy()
for site in list_site:
    for rr in range(0,30):
     ssize.loc[rr,site] = 2*math.ceil(pow.TTestIndPower().solve_power(effect_size=math.fabs(data.loc[rr,site]), power=0.8, ratio=1, alpha=0.05))
ssize

Now simply write the results into a csv



In [10]:

    
ssize.to_csv('/home/pbellec/Desktop/adnet/adnet_sample_size.csv')

	Seed	Connection	pooled	ADNI2	CRIUGMa	CRIUGMb	MNI
0	22	7	0.4145	0.3618	1.4910	0.3228	0.1523
1	22	9	0.3079	0.2680	0.7692	0.3094	0.1734
2	22	11	0.3035	0.2185	0.9931	0.0604	0.5118
3	22	17	0.3216	0.1853	0.8669	0.3478	0.4969
4	22	20	0.2793	0.2109	0.6371	0.0871	0.5520
5	22	23	0.2885	0.2469	1.6770	0.0480	0.0319
6	22	28	0.3064	0.2296	1.2467	0.3550	0.0208
7	22	30	0.2803	0.1700	1.0418	0.0726	0.5100
8	22	31	0.3430	0.2294	1.1769	0.0967	0.6050
9	2	1	0.4501	0.3020	0.6207	0.5683	0.8152
10	2	4	0.2915	0.2403	0.3460	0.1644	0.6182
11	2	7	0.3404	0.1046	1.4018	0.3998	0.6383
12	2	14	0.3934	0.2541	0.5958	0.6490	0.5318
13	2	17	0.4265	0.3060	1.0641	0.6963	0.2243
14	2	20	0.3596	0.3849	0.1638	0.3841	0.3640
15	2	31	0.3928	0.1799	0.9057	0.8256	0.4560
16	2	33	0.3632	0.2327	0.3245	0.4497	0.8414
17	9	5	0.4210	0.5660	0.1482	0.3707	0.1097
18	9	10	0.4475	0.4108	0.5355	0.6368	0.3224
19	9	16	0.3105	0.2297	1.0588	0.3139	0.1806
20	9	17	0.2894	0.4264	0.6001	0.0133	-0.1172
21	9	22	0.3079	0.2680	0.7692	0.3094	0.1734
22	9	25	0.3546	0.2962	1.2266	0.3544	0.0872
23	9	26	0.3305	0.2806	0.6138	0.5236	0.1275
24	12	8	0.3674	0.4551	0.4566	0.1929	0.1738
25	12	10	0.3842	0.4355	0.0969	0.2626	0.5220
26	12	16	0.3735	0.3865	0.9229	-0.0064	0.4317
27	12	20	0.3471	0.2720	0.6641	0.3732	0.4104
28	12	23	0.3192	0.2561	0.9392	0.3202	0.1893
29	12	28	0.2851	0.1613	0.7195	0.4142	0.3524

	Seed	Connection	pooled	ADNI2	CRIUGMa	CRIUGMb	MNI
0	22	7	186	242	18	304	1356
1	22	9	334	440	56	330	1048
2	22	11	344	660	34	8608	122
3	22	17	306	918	44	262	130
4	22	20	406	708	80	4142	106
5	22	23	380	518	14	13630	30856
6	22	28	338	598	24	252	72570
7	22	30	402	1090	32	5960	124
8	22	31	270	600	26	3360	88
9	2	1	158	348	84	100	50
10	2	4	372	546	266	1164	86
11	2	7	274	2872	20	200	80
12	2	14	206	490	92	78	114
13	2	17	176	338	30	68	626
14	2	20	246	214	1174	216	240
15	2	31	206	972	42	50	154
16	2	33	240	582	302	158	48
17	9	5	180	100	1432	232	2612
18	9	10	160	188	112	80	304
19	9	16	328	598	32	322	966
20	9	17	378	176	90	177488	2288
21	9	22	334	440	56	330	1048
22	9	25	252	360	24	252	4132
23	9	26	290	402	86	118	1934
24	12	8	236	154	154	846	1042
25	12	10	216	168	3346	458	118
26	12	16	228	214	40	766494	172
27	12	20	264	428	74	228	190
28	12	23	312	482	38	310	880
29	12	28	390	1210	64	186	256