In [ ]:
%load_ext autoreload
%autoreload 2
import numpy as np
import sys
In [ ]:
import json
In [ ]:
with open('/tmp/web_test_2017_06_21/2017_06_21_R0_10_jpg_x512_4096x4096.json', 'r') as f:
aaa = json.load(f)
In [ ]:
bb = aaa['all_rates'][1]
In [ ]:
cc = aaa['all_rates'][2]
In [ ]:
In [ ]:
print np.median(bb)
print np.median(cc)
In [ ]:
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = aaa['all_rates'][0] + aaa['all_rates'][1] + aaa['all_rates'][2] + aaa['all_rates'][3]
df['conditions'] = ['512']*len(aaa['all_rates'][1]) + ['1k']*len(aaa['all_rates'][1]) + ['2k']*len(aaa['all_rates'][2]) + ['4k']*len(aaa['all_rates'][3])
aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(aaa['all_rates'][1], aaa['all_rates'][2], equal_var=False)
print '1k vs. 2k', t,p
In [ ]:
# There was a significant effect of which tile size was used
# at the p<.05 level for the four conditions 512, 1k, 2k, 4k
# F(3,36) = 301.168, p < 0.001
# Post hoc comparisons (after Bonferroni correction) indicate that the speed of 1k tiles was significantly higher than for 2k tiles
# (t_36 = 3.9012, p = 0.00148)
In [ ]:
print df.anova1way.__doc__
In [ ]:
with open('/tmp/web_test_2017_06_21/2017_06_21_R0_500_jpg_x512_4096x4096.json', 'r') as f:
R0_500 = json.load(f)
full_times = R0_500['all_times']
# Tile sizes and full shape
tiles_x = R0_500['tiles']
full_shape = R0_500['shape']
# Get the number of tiles
n_tiles = np.prod(full_shape / np.c_[tiles_x, tiles_x], 1)
# Get the times per tile
tile_times = (full_times / n_tiles[:,np.newaxis]).tolist()
# print R0_500['all_times']
print len(full_times)
print len(tile_times)
In [ ]:
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
test_array = full_times
df = DataFrame()
df['data'] = test_array[0] + test_array[1] + test_array[2] + test_array[3]
df['conditions'] = ['512']*len(test_array[1]) + ['1k']*len(test_array[1]) + ['2k']*len(test_array[2]) + ['4k']*len(test_array[3])
aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(test_array[1], test_array[2], equal_var=False)
print '1k vs. 2k', t,p
t, p = ttest_ind(test_array[1], test_array[3], equal_var=False)
print '1k vs. 4k', t,p
t, p = ttest_ind(test_array[1], test_array[0], equal_var=False)
print '1k vs. 512', t,p
In [ ]:
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
test_array = tile_times
df = DataFrame()
df['data'] = test_array[0] + test_array[1] + test_array[2] + test_array[3]
df['conditions'] = ['512']*len(test_array[1]) + ['1k']*len(test_array[1]) + ['2k']*len(test_array[2]) + ['4k']*len(test_array[3])
aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(test_array[1], test_array[2], equal_var=False)
print '1k vs. 2k', t,p
In [39]:
###
# Begining of File Storage Experiment
###
import os
import json
from glob import glob1
%load_ext autoreload
%autoreload 2
import numpy as np
import sys
graph_dir = "/tmp/2017_06_27_32K_tiff"
tile_shapes = []
file_counts = []
# Open json trial
def open_json(in_file):
in_path = os.path.join(graph_dir, in_file)
with open(in_path,'r') as fd:
# One trial of json data
return json.load(fd)
# Load one trial
def load_rates(in_file):
# Get only the rates
return open_json(in_file)['mbps']
# Load constants
def load_K(in_file):
json_trial = open_json(in_file)
constants = ['tile_shape','file_shape']
print json_trial.keys()
return map(json_trial.get, constants)
# Load all the trials
all_json = glob1(graph_dir,'*.json')
rates = np.array(map(load_rates, all_json))
tiles, files = load_K(all_json[0])
# Get the four smallest tile sides
four_tiles = zip(*tiles[:4])[0]
files = zip(*files)[0]
In [40]:
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}
# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
# Add dictionaries
all_data[str(t)] = {}
for fi, f in enumerate(files):
# Get the tiles per file
t_f = (f/t)**2
# Only the first four cases
if t_f < 1 or t_f > 64:
continue
# Get all the data
ft_rates = rates[:,fi,ti].tolist()
# All cases flattened
all_rates += ft_rates
all_tiles += [str(t)]*len(ft_rates)
all_files += [str(t_f)]*len(ft_rates)
# All labels have ids
ft_id = '{}_{}'.format(t, f)
all_id += [ft_id]*len(ft_rates)
# All cases labeled
all_data[str(t)][str(t_f)] = ft_rates
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id
# Tiles size and File size all between-subjects variables because
# different files are created and destroyed for all conditions
aov_pyvttbl = df.anova('data', sub='id', bfactors=['tile','file'])
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(all_data['4096']['1'], all_data['2048']['1'], equal_var=False)
print 'One 4k tile per file vs. One 2k tile per file', t,p
In [60]:
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}
print four_tiles
# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
# Add dictionaries
all_data[str(t)] = {}
for fi, f in enumerate(files):
# Only when tile size equals file size
if t != f:
continue
# Get all the data
ft_rates = rates[:,fi,ti].tolist()
# All cases flattened
all_rates += ft_rates
all_tiles += [str(t)]*len(ft_rates)
all_files += [str(f)]*len(ft_rates)
# All labels have ids
ft_id = '{}_{}'.format(t, f)
all_id += [ft_id]*len(ft_rates)
# All cases labeled
all_data[str(t)][str(f)] = ft_rates
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id
# Tiles size and File size all between-subjects variables because
# different files are created and destroyed for all conditions
aov_pyvttbl = df.anova1way('data', 'tile')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(all_data['2048']['2048'], all_data['1024']['1024'], equal_var=False)
print 'One 1k tile per file vs. One 2k tile per file', t,p
t, p = ttest_ind(all_data['4096']['4096'], all_data['2048']['2048'], equal_var=False)
print 'One 2k tile per file vs. One 4k tile per file', t,p
In [51]:
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}
# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
# Add dictionaries
all_data[str(t)] = {}
for fi, f in enumerate(files):
# Only the one case
if t != 512:
continue
if f < t:
continue
# Get all the data
ft_rates = rates[:,fi,ti].tolist()
# All cases flattened
all_rates += ft_rates
all_tiles += [str(t)]*len(ft_rates)
all_files += [str(f)]*len(ft_rates)
# All labels have ids
ft_id = '{}_{}'.format(t, f)
all_id += [ft_id]*len(ft_rates)
# All cases labeled
all_data[str(t)][str(f)] = ft_rates
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id
# Tiles size and File size all between-subjects variables because
# different files are created and destroyed for all conditions
aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(all_data['512']['512'], all_data['512']['1024'], equal_var=False)
print '512px tile in 512px file vs. 512px tile in 1024px file', t,p
t, p = ttest_ind(all_data['512']['2048'], all_data['512']['1024'], equal_var=False)
print '512px tile in 2048px file vs. 512px tile in 1024px file', t,p
In [61]:
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}
# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
# Add dictionaries
all_data[str(t)] = {}
for fi, f in enumerate(files):
# Only the one case
if t != 1024:
continue
if f < t:
continue
# Get all the data
ft_rates = rates[:,fi,ti].tolist()
# All cases flattened
all_rates += ft_rates
all_tiles += [str(t)]*len(ft_rates)
all_files += [str(f)]*len(ft_rates)
# All labels have ids
ft_id = '{}_{}'.format(t, f)
all_id += [ft_id]*len(ft_rates)
# All cases labeled
all_data[str(t)][str(f)] = ft_rates
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id
# Tiles size and File size all between-subjects variables because
# different files are created and destroyed for all conditions
aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(all_data['1024']['1024'], all_data['1024']['2048'], equal_var=False)
print '1024px tile in 1024px file vs. 1024px tile in 2048px file', t,p
t, p = ttest_ind(all_data['1024']['2048'], all_data['1024']['4096'], equal_var=False)
print '1024px tile in 4096px file vs. 1024px tile in 2048px file', t,p
In [53]:
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}
# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
# Add dictionaries
all_data[str(t)] = {}
for fi, f in enumerate(files):
# Only the one case
if t != 2048:
continue
if f < t:
continue
# Get all the data
ft_rates = rates[:,fi,ti].tolist()
# All cases flattened
all_rates += ft_rates
all_tiles += [str(t)]*len(ft_rates)
all_files += [str(f)]*len(ft_rates)
# All labels have ids
ft_id = '{}_{}'.format(t, f)
all_id += [ft_id]*len(ft_rates)
# All cases labeled
all_data[str(t)][str(f)] = ft_rates
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id
# Tiles size and File size all between-subjects variables because
# different files are created and destroyed for all conditions
aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(all_data['2048']['2048'], all_data['2048']['4096'], equal_var=False)
print '2048px tile in 2048px file vs. 2048px tile in 4096px file', t,p
In [55]:
# All data for testing:
all_rates = []
all_tiles = []
all_files = []
all_id = []
# All data in a dictionary
all_data = {}
# Reformat the data into three columns
for ti,t in enumerate(four_tiles):
# Add dictionaries
all_data[str(t)] = {}
for fi, f in enumerate(files):
# Only the one case
if t != 4096:
continue
if f < t:
continue
# Get all the data
ft_rates = rates[:,fi,ti].tolist()
# All cases flattened
all_rates += ft_rates
all_tiles += [str(t)]*len(ft_rates)
all_files += [str(f)]*len(ft_rates)
# All labels have ids
ft_id = '{}_{}'.format(t, f)
all_id += [ft_id]*len(ft_rates)
# All cases labeled
all_data[str(t)][str(f)] = ft_rates
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame
df = DataFrame()
df['data'] = all_rates
df['tile'] = all_tiles
df['file'] = all_files
df['id'] = all_id
# Tiles size and File size all between-subjects variables because
# different files are created and destroyed for all conditions
aov_pyvttbl = df.anova1way('data', 'file')
print aov_pyvttbl
# #
# # now t-test
# #
# # Welch's t-test between 1k and 2k
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(all_data['4096']['4096'], all_data['4096']['8192'], equal_var=False)
print '4096px tile in 4096px file vs. 4096px tile in 8192px file', t,p
In [ ]: