notebook.community

Edit and run



In [ ]:

    
%load_ext autoreload
%autoreload 2


import numpy as np
import sys



In [ ]:

    
import json



In [ ]:

    
with open('/tmp/web_test_2017_06_21/2017_06_21_R0_10_jpg_x512_4096x4096.json', 'r') as f:
   aaa = json.load(f)



In [ ]:

    
bb = aaa['all_rates'][1]



In [ ]:

    
cc = aaa['all_rates'][2]



In [ ]:



In [ ]:

    
print np.median(bb)
print np.median(cc)



In [ ]:

    
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = aaa['all_rates'][0] + aaa['all_rates'][1] + aaa['all_rates'][2] + aaa['all_rates'][3]
  
df['conditions'] = ['512']*len(aaa['all_rates'][1]) + ['1k']*len(aaa['all_rates'][1]) + ['2k']*len(aaa['all_rates'][2]) + ['4k']*len(aaa['all_rates'][3])

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(aaa['all_rates'][1],  aaa['all_rates'][2], equal_var=False)
print '1k vs. 2k', t,p



In [ ]:

    
# There was a significant effect of which tile size was used
# at the p<.05 level for the four conditions 512, 1k, 2k, 4k
# F(3,36) = 301.168, p < 0.001


# Post hoc comparisons (after Bonferroni correction) indicate that the speed of 1k tiles was significantly higher than for 2k tiles
# (t_36 = 3.9012, p = 0.00148)



In [ ]:

    
print df.anova1way.__doc__



In [ ]:

    
with open('/tmp/web_test_2017_06_21/2017_06_21_R0_500_jpg_x512_4096x4096.json', 'r') as f:
   R0_500 = json.load(f)

full_times = R0_500['all_times']

# Tile sizes and full shape
tiles_x = R0_500['tiles'] 
full_shape = R0_500['shape'] 
# Get the number of tiles
n_tiles = np.prod(full_shape / np.c_[tiles_x, tiles_x], 1) 
# Get the times per tile
tile_times = (full_times / n_tiles[:,np.newaxis]).tolist()
# print R0_500['all_times']
print len(full_times)
print len(tile_times)



In [ ]:

    
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

test_array = full_times

df = DataFrame()
df['data'] = test_array[0] + test_array[1] + test_array[2] + test_array[3]

df['conditions'] = ['512']*len(test_array[1]) + ['1k']*len(test_array[1]) + ['2k']*len(test_array[2]) + ['4k']*len(test_array[3])

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(test_array[1],  test_array[2], equal_var=False)
print '1k vs. 2k', t,p
t, p = ttest_ind(test_array[1],  test_array[3], equal_var=False)
print '1k vs. 4k', t,p
t, p = ttest_ind(test_array[1],  test_array[0], equal_var=False)
print '1k vs. 512', t,p



In [ ]:

    
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

test_array = tile_times

df = DataFrame()
df['data'] = test_array[0] + test_array[1] + test_array[2] + test_array[3]

df['conditions'] = ['512']*len(test_array[1]) + ['1k']*len(test_array[1]) + ['2k']*len(test_array[2]) + ['4k']*len(test_array[3])

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(test_array[1],  test_array[2], equal_var=False)
print '1k vs. 2k', t,p



In [39]:

    
###
# Begining of File Storage Experiment
###

import os
import json
from glob import glob1
%load_ext autoreload
%autoreload 2

import numpy as np
import sys

graph_dir = "/tmp/2017_06_27_32K_tiff"

tile_shapes = []
file_counts = []

# Open json trial
def open_json(in_file):
    in_path = os.path.join(graph_dir, in_file)
    with open(in_path,'r') as fd:
        # One trial of json data
        return json.load(fd)

# Load one trial
def load_rates(in_file):
    # Get only the rates
    return open_json(in_file)['mbps']

# Load constants
def load_K(in_file):
    json_trial = open_json(in_file) 
    constants = ['tile_shape','file_shape']
    print json_trial.keys()
    return map(json_trial.get, constants) 

# Load all the trials
all_json = glob1(graph_dir,'*.json')
rates = np.array(map(load_rates, all_json))
tiles, files = load_K(all_json[0])

# Get the four smallest tile sides
four_tiles = zip(*tiles[:4])[0]
files = zip(*files)[0]









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[u'n_files', u'file_shape', u'seconds', u'mbps', u'tile_shape', u'full_shape']



In [40]:

    
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}

# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
    # Add dictionaries 
    all_data[str(t)] = {}
    for fi, f in enumerate(files):
        # Get the tiles per file
        t_f = (f/t)**2
        # Only the first four cases
        if t_f < 1 or t_f > 64:
            continue
        # Get all the data
        ft_rates = rates[:,fi,ti].tolist()
        # All cases flattened
        all_rates += ft_rates
        all_tiles += [str(t)]*len(ft_rates)
        all_files += [str(t_f)]*len(ft_rates)
        # All labels have ids
        ft_id = '{}_{}'.format(t, f)
        all_id += [ft_id]*len(ft_rates)
        # All cases labeled
        all_data[str(t)][str(t_f)] = ft_rates
        
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id

# Tiles size and File size all between-subjects variables because 
# different files are created and destroyed for all conditions

aov_pyvttbl = df.anova('data', sub='id', bfactors=['tile','file'])
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats

t, p = ttest_ind(all_data['4096']['1'], all_data['2048']['1'], equal_var=False)
print 'One 4k tile per file vs. One 2k tile per file', t,p









    



data ~ tile * file

TESTS OF BETWEEN-SUBJECTS EFFECTS

Measure: data
  Source        Type III      df        MS           F        Sig.   et2_G   Obs.   SE   95% CI     lambda     Obs.  
                   SS                                                                                          Power 
====================================================================================================================
tile             31577.667      3    10525.889   -1.903e+17      1   1.000      4   --       --   -2.002e+15      -- 
file             71031.692      3    23677.231   -4.281e+17      1   1.000      4   --       --   -4.504e+15      -- 
tile * file      49649.614      9     5516.624   -9.975e+16      1   1.000      1   --       --   -7.506e+14      -- 
Error           -6.548e-11   1184   -5.531e-14                                                                       
====================================================================================================================
Total         13093372.526   1199                                                                                    

TABLES OF ESTIMATED MARGINAL MEANS

Estimated Marginal Means for tile
tile    Mean     Std. Error   95% Lower Bound   95% Upper Bound 
===============================================================
512     22.944        0.642            21.686            24.202 
1024    50.965        2.033            46.980            54.950 
2048    93.840        5.247            83.557           104.124 
4096   140.332        9.354           121.999           158.665 

Estimated Marginal Means for file
file    Mean     Std. Error   95% Lower Bound   95% Upper Bound 
===============================================================
1      181.109        8.977           163.515           198.704 
4       87.634        2.408            82.914            92.354 
16      30.449        0.434            29.597            31.300 
64       8.889        0.066             8.759             9.019 

Estimated Marginal Means for tile * file
tile   file    Mean     Std. Error   95% Lower Bound   95% Upper Bound 
======================================================================
512    1       28.292        0.536            27.241            29.342 
512    4       34.103        0.907            32.326            35.881 
512    16      21.512        0.605            20.326            22.697 
512    64       7.869        0.118             7.638             8.100 
1024   1       87.909        2.201            83.595            92.223 
1024   4       77.474        1.871            73.808            81.141 
1024   16      29.570        0.651            28.294            30.847 
1024   64       8.906        0.116             8.678             9.133 
2048   1      221.535        7.972           205.910           237.161 
2048   4      110.754        3.409           104.073           117.436 
2048   16      33.632        0.541            32.572            34.693 
2048   64       9.440        0.104             9.237             9.643 
4096   1      386.701       14.405           358.468           414.934 
4096   4      128.204        2.866           122.587           133.821 
4096   16      37.080        0.383            36.330            37.830 
4096   64       9.342        0.109             9.129             9.555 


One 4k tile per file vs. One 2k tile per file 10.0322954818 2.06960909779e-17



In [60]:

    
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}

print four_tiles

# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
    # Add dictionaries 
    all_data[str(t)] = {}
    for fi, f in enumerate(files):
        # Only when tile size equals file size
        if t != f:
            continue
        # Get all the data
        ft_rates = rates[:,fi,ti].tolist()
        # All cases flattened
        all_rates += ft_rates
        all_tiles += [str(t)]*len(ft_rates)
        all_files += [str(f)]*len(ft_rates)
        # All labels have ids
        ft_id = '{}_{}'.format(t, f)
        all_id += [ft_id]*len(ft_rates)
        # All cases labeled
        all_data[str(t)][str(f)] = ft_rates

# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id

# Tiles size and File size all between-subjects variables because 
# different files are created and destroyed for all conditions

aov_pyvttbl = df.anova1way('data', 'tile')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats

t, p = ttest_ind(all_data['2048']['2048'], all_data['1024']['1024'], equal_var=False)
print 'One 1k tile per file vs. One 2k tile per file', t,p
t, p = ttest_ind(all_data['4096']['4096'], all_data['2048']['2048'], equal_var=False)
print 'One 2k tile per file vs. One 4k tile per file', t,p









    



(512, 1024, 2048, 4096)
Anova: Single Factor on data

SUMMARY
Groups   Count      Sum      Average   Variance  
================================================
512         75    2121.869    28.292      21.549 
1024        75    6593.174    87.909     363.390 
2048        75   16615.142   221.535    4766.564 
4096        75   29002.577   386.701   15561.728 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df       MS         F       P-value    eta^2   Obs. power 
===========================================================================================
Treatments            1.183e+10     3   3.944e+09   36.139   6.271e-20   0.268        1.000 
Error                 3.230e+10   296   1.091e+08                                           
===========================================================================================
Total                 4.414e+10   299                                                       

ANOVA
Source of Variation       SS        df        MS           F       P-value    eta^2   Obs. power 
================================================================================================
Treatments            5695632.099     3   1898544.033   366.634   2.512e-99   0.788            1 
Error                 1532779.056   296      5178.308                                            
================================================================================================
Total                 7228411.154   299                                                          

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       512     1024       2048        4096    
=============================================
512    0     7.175 **   23.256 **   43.134 ** 
1024         0          16.082 **   35.959 ** 
2048                    0           19.877 ** 
4096                                0         
=============================================
  + p < .10 (q-critical[4, 296] = 3.25463863273)
  * p < .05 (q-critical[4, 296] = 3.65402149588)
 ** p < .01 (q-critical[4, 296] = 4.44066616141)
One 1k tile per file vs. One 2k tile per file 16.1571820426 1.12483824939e-27
One 2k tile per file vs. One 4k tile per file 10.0322954818 2.06960909779e-17



In [51]:

    
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}

# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
    # Add dictionaries 
    all_data[str(t)] = {}
    for fi, f in enumerate(files):
        # Only the one case
        if t != 512:
            continue
        if f < t:
            continue
        # Get all the data
        ft_rates = rates[:,fi,ti].tolist()
        # All cases flattened
        all_rates += ft_rates
        all_tiles += [str(t)]*len(ft_rates)
        all_files += [str(f)]*len(ft_rates)
        # All labels have ids
        ft_id = '{}_{}'.format(t, f)
        all_id += [ft_id]*len(ft_rates)
        # All cases labeled
        all_data[str(t)][str(f)] = ft_rates

# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id

# Tiles size and File size all between-subjects variables because 
# different files are created and destroyed for all conditions

aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats

t, p = ttest_ind(all_data['512']['512'], all_data['512']['1024'], equal_var=False)
print '512px tile in 512px file vs. 512px tile in 1024px file', t,p
t, p = ttest_ind(all_data['512']['2048'], all_data['512']['1024'], equal_var=False)
print '512px tile in 2048px file vs. 512px tile in 1024px file', t,p









    



Anova: Single Factor on data

SUMMARY
Groups   Count     Sum      Average   Variance  
===============================================
512         75   2121.869    28.292      21.549 
1024        75   2557.751    34.103      61.679 
2048        75   1613.367    21.512      27.449 
4096        75    590.174     7.869       1.045 
8192        75    170.610     2.275       0.058 
16384       75     43.370     0.578       0.003 
32768       75     11.295     0.151   2.012e-04 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation       SS       df       MS         F       P-value    eta^2   Obs. power 
============================================================================================
Treatments            242859.568     6   40476.595   29.166   4.045e-30   0.253        1.000 
Error                 718873.724   518    1387.787                                           
============================================================================================
Total                 961733.292   524                                                       

ANOVA
Source of Variation      SS       df       MS          F       P-value     eta^2   Obs. power 
=============================================================================================
Treatments            90776.559     6   15129.426   947.421   1.554e-275   0.916            1 
Error                  8271.978   518      15.969                                             
=============================================================================================
Total                 99048.536   524                                                         

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
        512     1024        2048        4096        8192        16384       32768   
===================================================================================
512     0     12.595 **   14.693 **   44.259 **   56.383 **   60.059 **   60.986 ** 
1024          0           27.288 **   56.854 **   68.978 **   72.654 **   73.581 ** 
2048                      0           29.566 **   41.689 **   45.366 **   46.293 ** 
4096                                  0           12.123 **   15.800 **   16.727 ** 
8192                                              0           3.677 ns    4.603 *   
16384                                                         0           0.927 ns  
32768                                                                     0         
===================================================================================
  + p < .10 (q-critical[7, 518] = 3.81953002998)
  * p < .05 (q-critical[7, 518] = 4.18597832629)
 ** p < .01 (q-critical[7, 518] = 4.91021887958)
512px tile in 512px file vs. 512px tile in 1024px file -5.51701951823 2.01563707252e-07
512px tile in 2048px file vs. 512px tile in 1024px file -11.5507613467 1.25407397439e-21



In [61]:

    
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}

# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
    # Add dictionaries 
    all_data[str(t)] = {}
    for fi, f in enumerate(files):
        # Only the one case
        if t != 1024:
            continue
        if f < t:
            continue
        # Get all the data
        ft_rates = rates[:,fi,ti].tolist()
        # All cases flattened
        all_rates += ft_rates
        all_tiles += [str(t)]*len(ft_rates)
        all_files += [str(f)]*len(ft_rates)
        # All labels have ids
        ft_id = '{}_{}'.format(t, f)
        all_id += [ft_id]*len(ft_rates)
        # All cases labeled
        all_data[str(t)][str(f)] = ft_rates

# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id

# Tiles size and File size all between-subjects variables because 
# different files are created and destroyed for all conditions

aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats

t, p = ttest_ind(all_data['1024']['1024'], all_data['1024']['2048'], equal_var=False)
print '1024px tile in 1024px file vs. 1024px tile in 2048px file', t,p
t, p = ttest_ind(all_data['1024']['2048'], all_data['1024']['4096'], equal_var=False)
print '1024px tile in 4096px file vs. 1024px tile in 2048px file', t,p









    



Anova: Single Factor on data

SUMMARY
Groups   Count     Sum      Average   Variance 
==============================================
1024        75   6593.174    87.909    363.390 
2048        75   5810.582    77.474    262.481 
4096        75   2217.755    29.570     31.810 
8192        75    667.927     8.906      1.009 
16384       75    175.761     2.343      0.055 
32768       75     44.603     0.595      0.003 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation        SS        df        MS          F       P-value    eta^2   Obs. power 
================================================================================================
Treatments             9722736.985     5   1944547.397   17.649   5.828e-16   0.166        1.000 
Error                 48918515.236   444    110176.836                                           
================================================================================================
Total                 58641252.221   449                                                         

ANOVA
Source of Variation       SS       df        MS          F        P-value     eta^2   Obs. power 
================================================================================================
Treatments            567172.375     5   113434.475   1033.181   6.265e-242   0.921            1 
Error                  48747.400   444      109.791                                              
================================================================================================
Total                 615919.774   449                                                           

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
        1024     2048       4096        8192        16384       32768   
=======================================================================
1024    0      8.624 **   48.218 **   65.297 **   70.720 **   72.166 ** 
2048           0          39.593 **   56.673 **   62.096 **   63.542 ** 
4096                      0           17.079 **   22.503 **   23.948 ** 
8192                                  0           5.424 **    6.869 **  
16384                                             0           1.445 ns  
32768                                                         0         
=======================================================================
  + p < .10 (q-critical[6, 444] = 3.67293571769)
  * p < .05 (q-critical[6, 444] = 4.04774949078)
 ** p < .01 (q-critical[6, 444] = 4.78768717088)
1024px tile in 1024px file vs. 1024px tile in 2048px file 3.61212171552 0.000418489780936
1024px tile in 4096px file vs. 1024px tile in 2048px file 24.1833880655 1.45235302683e-41



In [53]:

    
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}

# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
    # Add dictionaries 
    all_data[str(t)] = {}
    for fi, f in enumerate(files):
        # Only the one case
        if t != 2048:
            continue
        if f < t:
            continue
        # Get all the data
        ft_rates = rates[:,fi,ti].tolist()
        # All cases flattened
        all_rates += ft_rates
        all_tiles += [str(t)]*len(ft_rates)
        all_files += [str(f)]*len(ft_rates)
        # All labels have ids
        ft_id = '{}_{}'.format(t, f)
        all_id += [ft_id]*len(ft_rates)
        # All cases labeled
        all_data[str(t)][str(f)] = ft_rates

# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id

# Tiles size and File size all between-subjects variables because 
# different files are created and destroyed for all conditions

aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats

t, p = ttest_ind(all_data['2048']['2048'], all_data['2048']['4096'], equal_var=False)
print '2048px tile in 2048px file vs. 2048px tile in 4096px file', t,p









    



Anova: Single Factor on data

SUMMARY
Groups   Count      Sum      Average   Variance 
===============================================
2048        75   16615.142   221.535   4766.564 
4096        75    8306.574   110.754    871.552 
8192        75    2522.429    33.632     21.961 
16384       75     708.002     9.440      0.806 
32768       75     174.876     2.332      0.055 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df         MS          F       P-value    eta^2   Obs. power 
==============================================================================================
Treatments            1.280e+09     4      3.201e+08   29.452   2.824e-21   0.242        1.000 
Error                 4.021e+09   370   10867855.099                                           
==============================================================================================
Total                 5.301e+09   374                                                          

ANOVA
Source of Variation       SS        df        MS          F       P-value     eta^2   Obs. power 
================================================================================================
Treatments            2552967.423     4   638241.856   563.724   6.101e-156   0.859            1 
Error                  418909.405   370     1132.188                                             
================================================================================================
Total                 2971876.827   374                                                          

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
        2048     4096        8192        16384       32768   
============================================================
2048    0      28.513 **   48.362 **   54.589 **   56.418 ** 
4096           0           19.849 **   26.076 **   27.906 ** 
8192                       0           6.227 **    8.056 **  
16384                                  0           1.830 ns  
32768                                              0         
============================================================
  + p < .10 (q-critical[5, 370] = 3.491431522)
  * p < .05 (q-critical[5, 370] = 3.87679549143)
 ** p < .01 (q-critical[5, 370] = 4.63667398406)
2048px tile in 2048px file vs. 2048px tile in 4096px file 12.7769895858 9.31596122925e-23



In [55]:

    
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}

# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
    # Add dictionaries 
    all_data[str(t)] = {}
    for fi, f in enumerate(files):
        # Only the one case
        if t != 4096:
            continue
        if f < t:
            continue
        # Get all the data
        ft_rates = rates[:,fi,ti].tolist()
        # All cases flattened
        all_rates += ft_rates
        all_tiles += [str(t)]*len(ft_rates)
        all_files += [str(f)]*len(ft_rates)
        # All labels have ids
        ft_id = '{}_{}'.format(t, f)
        all_id += [ft_id]*len(ft_rates)
        # All cases labeled
        all_data[str(t)][str(f)] = ft_rates

# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id

# Tiles size and File size all between-subjects variables because 
# different files are created and destroyed for all conditions

aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl

# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats

t, p = ttest_ind(all_data['4096']['4096'], all_data['4096']['8192'], equal_var=False)
print '4096px tile in 4096px file vs. 4096px tile in 8192px file', t,p









    



Anova: Single Factor on data

SUMMARY
Groups   Count      Sum      Average   Variance  
================================================
4096        75   29002.577   386.701   15561.728 
8192        75    9615.292   128.204     616.043 
16384       75    2781.018    37.080      10.987 
32768       75     700.660     9.342       0.886 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df         MS          F       P-value    eta^2   Obs. power 
==============================================================================================
Treatments            1.328e+10     3      4.426e+09   46.178   1.649e-24   0.319        1.000 
Error                 2.837e+10   296   95836886.526                                           
==============================================================================================
Total                 4.164e+10   299                                                          

ANOVA
Source of Variation       SS        df        MS           F       P-value     eta^2   Obs. power 
=================================================================================================
Treatments            6649804.283     3   2216601.428   547.659   1.965e-120   0.847            1 
Error                 1198033.667   296      4047.411                                             
=================================================================================================
Total                 7847837.950   299                                                           

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
        4096     8192        16384       32768   
================================================
4096    0      35.188 **   47.593 **   51.368 ** 
8192           0           12.404 **   16.180 ** 
16384                      0           3.776 *   
32768                                  0         
================================================
  + p < .10 (q-critical[4, 296] = 3.25463863273)
  * p < .05 (q-critical[4, 296] = 3.65402149588)
 ** p < .01 (q-critical[4, 296] = 4.44066616141)
4096px tile in 4096px file vs. 4096px tile in 8192px file 17.6005819561 3.26921025393e-29



In [ ]: