In [22]:
from pprint import pprint
import json

name = 'third-batch';

with open('{}.json'.format(name)) as f:
    data = json.load(f)['logs']
    
with open('images/_airlines.json') as f:
    airlines = json.load(f)

with open('images/_states.json') as f:
    states = json.load(f)

In [23]:
from collections import OrderedDict 
from copy import deepcopy

rows = []

for p in data.items():
    user = p[0]
    d = OrderedDict({
        'id': user
    })
    
    item = p[1]
    
    if not 'done' in item:
        print('ignoring {}'.format(p[0]))
        continue
    
    d['userAgent'] = item['userAgent']
    d['isMobile'] = item['isMobile']
    
    cond = item['cond']
    
    errorAirline = cond['errorAirline']
    seqAirline = cond['seqAirline']
    errorStates = cond['errorStates']
    seqStates = cond['seqStates']
    focusAirline = cond['focusAirline']
    focusState = cond['focusState']
    
    for k, v in cond.items():
        d[k] = v
        
    howManyIndex = 0
    for k, v in item['approximateVis_airline'].items():
        if k == 'done':
            continue
        t = v['type']
        
        if not 'confidence' in v:
            raise Exception(user, v)
        
        approx = airlines["{}_{}".format(errorAirline, seqAirline)]
        
        prefix = 'approx.airline.'
        
        if (t == 'HowMany'):
            p = prefix + 'howMany.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'approx'] = approx[focusAirline]
            d[p + 'confidence'] = v['confidence']
            
        if (t == 'HowManyCompare'):
            p = prefix + 'howManyCompare.' + str(howManyIndex) + '.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'data'] = v['data']
            d[p + 'approx'] = approx[v['data']]
            d[p + 'confidence'] = v['confidence']
            howManyIndex += 1
    
    howManyIndex = 0
    for k, v in item['approximateVis_states'].items():
        if k == 'done':
            continue
        t = v['type']
        
        if not 'confidence' in v:
            raise Exception(user, v)
        
        approx = states["{}_{}".format(errorStates, seqStates)]
        
        prefix = 'approx.states.'
        
        if (t == 'HowMany'):
            p = prefix + 'howMany.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'approx'] = approx[focusState]
            d[p + 'confidence'] = v['confidence']
            
        if (t == 'HowManyCompare'):
            p = prefix + 'howManyCompare.' + str(howManyIndex) + '.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'data'] = v['data']
            d[p + 'approx'] = approx[v['data']]
            d[p + 'confidence'] = v['confidence']
            howManyIndex += 1
     
    def jaccard_similarity(x,y):
        intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
        union_cardinality = len(set.union(*[set(x), set(y)]))
        return intersection_cardinality/float(union_cardinality)
    
    # precise
            
    howManyIndex = 0
    for k, v in item['preciseVis_airline'].items():
        if k == 'done':
            continue
        t = v['type']
        
        if not 'confidence' in v:
            raise Exception(user, v)
        
        precise = airlines["{}_{}".format(1, 1)]
        approx = airlines["{}_{}".format(errorAirline, seqAirline)]
        
        prefix = 'precise.airline.'
        
        if (t == 'HowMany'):
            p = prefix + 'howMany.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'approx'] = approx[focusAirline]
            d[p + 'precise'] = precise[focusAirline]
            d[p + 'confidence'] = v['confidence']
            
        if (t == 'HowManyCompare'):
            p = prefix + 'howManyCompare.' + str(howManyIndex) + '.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'data'] = v['data']
            d[p + 'approx'] = approx[v['data']]
            d[p + 'precise'] = precise[v['data']]
            d[p + 'confidence'] = v['confidence']
            howManyIndex += 1
            
        if (t == 'DidYouNotice'):
            p = prefix + 'didYouNotice.'
            d[p + 'answer'] = v['answer'] == 'yes'
            d[p + 'true'] = errorAirline > 1
            d[p + 'confidence'] = v['confidence']
            
        if (t == 'SelectAll'):
            v = deepcopy(v)
            p = prefix + 'SelectAll.'
            d[p + 'confidence'] = v['confidence']
            
            del v['confidence']
            del v['type']
            
            answers = v.keys()
            prec = [k for k,v in precise.items() if v]
            appr = [k for k,v in approx.items() if v]
            d[p + 'answer'] = ','.join(answers)
            d[p + 'approx'] = ','.join(appr)
            d[p + 'precise'] = ','.join(prec)
            d[p + 'jaccard_answer_precise'] = jaccard_similarity(answers, prec)
            d[p + 'jaccard_approx_precise'] = jaccard_similarity(appr, prec)
            d[p + 'jaccard_approx_answer'] = jaccard_similarity(appr, answers)
            
    howManyIndex = 0
    for k, v in item['preciseVis_states'].items():
        if k == 'done':
            continue
        t = v['type']
        
        if not 'confidence' in v:
            raise Exception(user, v)
        
        precise = states["{}_{}".format(1, 1)]
        approx = states["{}_{}".format(errorStates, seqStates)]
        
        prefix = 'precise.states.'
        
        if (t == 'HowMany'):
            p = prefix + 'howMany.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'approx'] = approx[focusState]
            d[p + 'precise'] = precise[focusState]
            d[p + 'confidence'] = v['confidence']
            
        if (t == 'HowManyCompare'):
            p = prefix + 'howManyCompare.' + str(howManyIndex) + '.'
            d[p + 'answer'] = int(v['answer'])
            d[p + 'data'] = v['data']
            d[p + 'approx'] = approx[v['data']]
            d[p + 'precise'] = precise[v['data']]
            d[p + 'confidence'] = v['confidence']
            howManyIndex += 1
            
        if (t == 'DidYouNotice'):
            p = prefix + 'didYouNotice.'
            d[p + 'answer'] = v['answer'] == 'yes'
            d[p + 'true'] = errorStates > 1
            d[p + 'confidence'] = v['confidence']
            
        if (t == 'SelectAll'):
            v = deepcopy(v)
            p = prefix + 'SelectAll.'
            d[p + 'confidence'] = v['confidence']
            
            del v['confidence']
            del v['type']
            
            answers = v.keys()
            prec = [k for k,v in precise.items() if v]
            appr = [k for k,v in approx.items() if v]
            d[p + 'answer'] = ','.join(answers)
            d[p + 'approx'] = ','.join(appr)
            d[p + 'precise'] = ','.join(prec)
            d[p + 'jaccard_answer_precise'] = jaccard_similarity(answers, prec)
            d[p + 'jaccard_approx_precise'] = jaccard_similarity(appr, prec)
            d[p + 'jaccard_approx_answer'] = jaccard_similarity(appr, answers)
    
    for k, v in item['timing'].items():
         d['timing.' + k] = v
            
    for k, v in item['demographics'].items():
         d['demographics.' + k] = v
    
    rows.append(d)

pprint(rows[0])


ignoring 26364697
ignoring 53499639
ignoring 48818156
ignoring 96211827
OrderedDict([('id', '7321301'),
             ('userAgent',
              'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, '
              'like Gecko) Chrome/54.0.2840.99 Safari/537.36'),
             ('isMobile', False),
             ('studyCondition', 'difference'),
             ('firstCondition', 'states'),
             ('seqAirline', 12),
             ('errorStates', 1),
             ('seqStates', 1),
             ('focusState', 'MA'),
             ('focusAirline', 'DL'),
             ('secondCondition', 'airline'),
             ('errorAirline', 100),
             ('approx.airline.howManyCompare.0.answer', 2500),
             ('approx.airline.howManyCompare.0.data', 'B6'),
             ('approx.airline.howManyCompare.0.approx', 4700),
             ('approx.airline.howManyCompare.0.confidence', 'weak_disagree'),
             ('approx.airline.howMany.answer', 5000),
             ('approx.airline.howMany.approx', 5000),
             ('approx.airline.howMany.confidence', 'agree'),
             ('approx.airline.howManyCompare.1.answer', 3300),
             ('approx.airline.howManyCompare.1.data', 'EV'),
             ('approx.airline.howManyCompare.1.approx', 1600),
             ('approx.airline.howManyCompare.1.confidence', 'strong_agree'),
             ('approx.states.howManyCompare.0.answer', 1400),
             ('approx.states.howManyCompare.0.data', 'PR'),
             ('approx.states.howManyCompare.0.approx', 950),
             ('approx.states.howManyCompare.0.confidence', 'disagree'),
             ('approx.states.howMany.answer', 3500),
             ('approx.states.howMany.approx', 3928),
             ('approx.states.howMany.confidence', 'agree'),
             ('approx.states.howManyCompare.1.answer', -1500),
             ('approx.states.howManyCompare.1.data', 'FL'),
             ('approx.states.howManyCompare.1.approx', 4986),
             ('approx.states.howManyCompare.1.confidence', 'agree'),
             ('precise.airline.didYouNotice.answer', True),
             ('precise.airline.didYouNotice.true', True),
             ('precise.airline.didYouNotice.confidence', 'agree'),
             ('precise.airline.howManyCompare.0.answer', 200),
             ('precise.airline.howManyCompare.0.data', 'AA'),
             ('precise.airline.howManyCompare.0.approx', 5300),
             ('precise.airline.howManyCompare.0.precise', 4088),
             ('precise.airline.howManyCompare.0.confidence', 'strong_disagree'),
             ('precise.airline.SelectAll.confidence', 'strong_agree'),
             ('precise.airline.SelectAll.answer', 'UA,AA,DL,EV,B6'),
             ('precise.airline.SelectAll.approx', 'UA,AA,DL,EV,B6,WN'),
             ('precise.airline.SelectAll.precise', 'UA,AA,VX,DL,EV,B6,WN'),
             ('precise.airline.SelectAll.jaccard_answer_precise',
              0.7142857142857143),
             ('precise.airline.SelectAll.jaccard_approx_precise',
              0.8571428571428571),
             ('precise.airline.SelectAll.jaccard_approx_answer',
              0.8333333333333334),
             ('precise.airline.howManyCompare.1.answer', 1200),
             ('precise.airline.howManyCompare.1.data', 'UA'),
             ('precise.airline.howManyCompare.1.approx', 800),
             ('precise.airline.howManyCompare.1.precise', 583),
             ('precise.airline.howManyCompare.1.confidence', 'weak_agree'),
             ('precise.airline.howMany.answer', 2800),
             ('precise.airline.howMany.approx', 5000),
             ('precise.airline.howMany.precise', 5163),
             ('precise.airline.howMany.confidence', 'weak_agree'),
             ('precise.airline.howManyCompare.2.answer', 1000),
             ('precise.airline.howManyCompare.2.data', 'B6'),
             ('precise.airline.howManyCompare.2.approx', 4700),
             ('precise.airline.howManyCompare.2.precise', 5113),
             ('precise.airline.howManyCompare.2.confidence', 'agree'),
             ('precise.states.SelectAll.confidence', 'agree'),
             ('precise.states.SelectAll.answer', 'VA,MA,NY,NJ,PR'),
             ('precise.states.SelectAll.approx', 'CA,FL,VA,MA,PR,NJ,NY'),
             ('precise.states.SelectAll.precise', 'CA,FL,VA,MA,PR,NJ,NY'),
             ('precise.states.SelectAll.jaccard_answer_precise',
              0.7142857142857143),
             ('precise.states.SelectAll.jaccard_approx_precise', 1.0),
             ('precise.states.SelectAll.jaccard_approx_answer',
              0.7142857142857143),
             ('precise.states.didYouNotice.answer', True),
             ('precise.states.didYouNotice.true', False),
             ('precise.states.didYouNotice.confidence', 'strong_agree'),
             ('precise.states.howMany.answer', 3400),
             ('precise.states.howMany.approx', 3928),
             ('precise.states.howMany.precise', 3928),
             ('precise.states.howMany.confidence', 'weak_agree'),
             ('precise.states.howManyCompare.0.answer', 3500),
             ('precise.states.howManyCompare.0.data', 'CA'),
             ('precise.states.howManyCompare.0.approx', 1887),
             ('precise.states.howManyCompare.0.precise', 1887),
             ('precise.states.howManyCompare.0.confidence', 'disagree'),
             ('precise.states.howManyCompare.1.answer', 2000),
             ('precise.states.howManyCompare.1.data', 'PR'),
             ('precise.states.howManyCompare.1.approx', 950),
             ('precise.states.howManyCompare.1.precise', 950),
             ('precise.states.howManyCompare.1.confidence', 'agree'),
             ('precise.states.howManyCompare.2.answer', 200),
             ('precise.states.howManyCompare.2.data', 'VA'),
             ('precise.states.howManyCompare.2.approx', 961),
             ('precise.states.howManyCompare.2.precise', 961),
             ('precise.states.howManyCompare.2.confidence', 'strong_disagree'),
             ('timing.approximate_questions_first_html', 25798),
             ('timing.approximate_second_html', 13724),
             ('timing.precise_questions_html', 111531),
             ('timing.approximate_questions_second_html', 25278),
             ('timing.precise_instructions_html', 3750),
             ('timing.demographics_html', 28502),
             ('timing.precise_first_html', 31174),
             ('timing.approximate_first_html', 22439),
             ('timing.instructions_html', 12449),
             ('timing.index_html', 5357),
             ('timing.precise_second_html', 19730),
             ('timing.thanks_html', 1154967),
             ('demographics.problems', '-'),
             ('demographics.troubles', 'no'),
             ('demographics.improved', '-'),
             ('demographics.gender', 'Male'),
             ('demographics.age', '28'),
             ('demographics.familiar', 'some'),
             ('demographics.cheat', 'no')])

In [25]:
import csv

with open('{}.csv'.format(name), 'w') as f:
    #global fieldnames
    #fieldnames = rows[0].keys()
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    
    for row in rows:
        writer.writerow(row)

import pandas as pd

pd.read_csv('{}.csv'.format(name))


Out[25]:
id userAgent isMobile studyCondition firstCondition seqAirline errorStates seqStates focusState focusAirline ... timing.index_html timing.precise_second_html timing.thanks_html demographics.problems demographics.troubles demographics.improved demographics.gender demographics.age demographics.familiar demographics.cheat
0 7321301 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False difference states 12 1 1 MA DL ... 5357.0 19730 1154967 - no - Male 28.0 some no
1 97626871 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKi... False onlyNew airline 11 100 7 MA DL ... 7220.0 14925 15627 None no The study was very straightforward and easy to... Male 21.0 expert no
2 29089701 Mozilla/5.0 (X11; Linux x86_64; rv:50.0) Gecko... False both states 2 500 13 NY DL ... 4081.0 27441 12584 No. no No. male 30.0 familiar no
3 76395409 Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.... False both states 14 200 20 MA B6 ... 8495.0 84788 5408 No no No Female 37.0 familiar yes
4 9782123 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False onlyNew airline 7 100 12 MA B6 ... 3249.0 17243 10516 No Problems no No suggestions male 30.0 some no
5 28428523 Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... False difference states 3 500 16 FL B6 ... 5174.0 20541 16204 none no none male 31.0 none no
6 29657674 Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... False difference states 7 1 1 FL B6 ... 59105.0 34038 8755 No no Not sure how to improve it, but looking at the... Male 33.0 familiar no
7 45694489 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... False difference airline 4 1000 10 FL AA ... 44786.0 45099 33973 No, not at all. no Very well-designed study. Great! Male 24.0 familiar no
8 79273300 Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKi... False onlyNew airline 1 1000 18 MA AA ... 511301.0 11370 14835 - no - female 39.0 expert no
9 43548417 Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... False difference states 14 100 18 MA B6 ... NaN 19665 12410 none no none that I can think of, Male 30.0 familiar no
10 79544988 Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple... False both airline 15 200 14 MA B6 ... 5109.0 11539 29816 none no - female 42.0 some no
11 20505415 Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple... False difference states 10 200 10 FL B6 ... 5072.0 22359 6036 NaN NaN NaN male NaN NaN NaN
12 88825154 Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) ... False onlyNew airline 11 500 2 NY AA ... 2532.0 20983 2005 No problems no No suggestions, seems like a good study. Male 52.0 some no
13 21045111 Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple... False difference states 5 200 4 NY AA ... 9493.0 27189 8711 - no - Male 30.0 some no
14 57315921 Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... False difference airline 20 200 10 MA DL ... 12481.0 77270 32863 No no No Female 43.0 some yes
15 71996586 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False both airline 14 500 8 MA AA ... 7564.0 12922 15943 No no No it was fine as is. Male 42.0 some no
16 83205605 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False both states 12 1 1 MA AA ... 35203.0 13886 267539 no no no male 26.0 some no
17 61154389 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6... False difference airline 12 1000 7 FL DL ... 2440.0 38892 13159 no no no Female 26.0 familiar no
18 71607423 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1... False both states 1 200 4 NY DL ... 5195.0 12148 20278 - no - male 26.0 some no
19 77642132 Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... False onlyNew states 11 100 15 MA DL ... 24997.0 38721 69782 No problems. no Nothing comes to mind. male 40.0 some no
20 62759895 Mozilla/5.0 (Windows NT 10.0; rv:50.0) Gecko/2... False both states 1 1 1 NY AA ... 30891.0 12445 12275 no no no male 31.0 familiar no
21 21418137 Mozilla/5.0 (Windows NT 6.3; Win64; x64) Apple... False difference airline 4 500 6 NY AA ... 52003.0 11919 13887 no technical problems no - male 28.0 none no
22 76024785 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False onlyNew airline 3 100 20 MA B6 ... 145667.0 62106 15231 No no Was fine Male 46.0 familiar no
23 45840218 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False difference states 1 1 1 MA DL ... 27924.0 77830 41253 no no no female 43.0 familiar yes
24 95378753 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False difference states 1 200 3 MA DL ... 4306.0 13149 23901 No no No Female 33.0 expert no
25 26308857 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False both airline 12 200 19 FL AA ... 11361.0 26819 26505 none no All good. Male 46.0 familiar no
26 89431160 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... False difference states 11 1 1 FL B6 ... 4084.0 27884 10095 I had no problems. no It seemed good overall. male 28.0 expert no
27 72925790 Mozilla/5.0 (Windows NT 6.3; WOW64; rv:50.0) G... False difference airline 3 1 1 FL AA ... 3063.0 14258 6309 NaN no NaN Female 35.0 familiar NaN
28 79707421 Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK... False difference airline 4 200 13 FL B6 ... 5420.0 26858 27626 no yes - feamle 29.0 some no
29 73348785 Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) ... False onlyNew states 5 1000 2 FL B6 ... 8705.0 31597 16437 none no NaN male 41.0 none no
30 77955785 Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple... False difference states 9 500 9 MA AA ... NaN 18487 62111 None. no Maybe improve on the scale to make estimations... Male 24.0 some no

31 rows × 111 columns


In [ ]: