Pre-Processing


In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

In [28]:
def load_csvs(subject_id):
    temp_events = "study_data/s%d_events%s.csv"
    temp_gamestatus = "study_data/s%d_gamestatus%s.csv"
    
    events = []
    gamestatus = []
    split_counter = 0
    split = ""

    while True:
        if split_counter > 0:
            split = "_%d" % split_counter
        try:
            ev_tmp = pd.read_csv(temp_events % (subject_id, split), sep=';')
            gs_tmp = pd.read_csv(temp_gamestatus % (subject_id, split), sep=';')
            ev_tmp['SubjectId'] = subject_id
            gs_tmp['SubjectId'] = subject_id
            ev_tmp['SplitCount'] = split_counter
            gs_tmp['SplitCount'] = split_counter
            events.append(ev_tmp)
            gamestatus.append(gs_tmp)
            
        except FileNotFoundError:
            break
            
        split_counter += 1
        
    return pd.concat(events).reset_index(), pd.concat(gamestatus).reset_index()

In [3]:
def move_custom_data(re, rg):
    for index, re_row in re.copy().iterrows():
        if re_row.TaskType == 'Custom':
            rg_split_idx = rg.index[rg.SplitCount == re_row.SplitCount]
            cam_cont_pos = re_row.TaskPos.strip('() ').split(',')
            print(cam_cont_pos)
            rg.loc[rg_split_idx, 'CamContXPos'] = cam_cont_pos[0]
            rg.loc[rg_split_idx, 'CamContYPos'] = cam_cont_pos[1]
            rg.loc[rg_split_idx, 'CamContZPos'] = cam_cont_pos[2]
            re.drop(index, inplace=True)
            continue
            
    return re, rg

In [4]:
def add_rc_count_to_events(re, rg):
    re['RepeatCount'] = pd.Series(np.array(np.zeros(re.size), dtype='uint8'))
    
    last_time = 0
    last_round_type = re.iloc[0].RoundType
    last_split_count = 0
    repeat_count = 0
    start_index = 0
    
    for index, re_row in re.iterrows():
        if (re_row.SplitCount != last_split_count) | (re_row.RoundType != last_round_type) | (index == re.index[-1]):
            re.loc[start_index : index, 'RepeatCount'] = repeat_count
            
            sc_rt_rg_mask = (rg.SplitCount == last_split_count) & (rg.RoundType == last_round_type)
            sc_rt_re_mask = (re.SplitCount == last_split_count) & (re.RoundType == last_round_type)

            last_rc_in_rg = rg.loc[(rg.Timestamp <= last_time) & sc_rt_rg_mask].RepeatCount.max()
            # print (last_round_type, "sc", last_split_count, "time", last_time, "rc", repeat_count, "lrc", last_rc_in_rg)

            if repeat_count < last_rc_in_rg:
                for rc in range(repeat_count+1, last_rc_in_rg+1):
                    rc_round = rg.loc[(rg.RepeatCount == rc) & sc_rt_rg_mask]
                    rc_start = rc_round.iloc[0].Timestamp
                    rc_end = rc_round.iloc[-1].Timestamp
                    
                    # print (last_round_type, repeat_count, last_rc_in_rg, rc_start, rc_end)
                    re.loc[(re.Timestamp >= rc_start) & (re.Timestamp <= rc_end) & sc_rt_re_mask, 'RepeatCount'] = rc
                 
            start_index = index
            repeat_count = 0
            last_round_type = re_row.RoundType
            last_split_count = re_row.SplitCount
            
        elif (re_row.Timestamp < last_time):
            re.loc[start_index : index, 'RepeatCount'] = repeat_count
            start_index = index
            repeat_count += 1
            
        last_time = re_row.Timestamp
        
    return re, rg

In [5]:
def create_rounds(re, rg):
    round_columns = ['SubjectId', 'Round', 'RoundType', 'Trial', 'RepeatCount', 'SplitCount', 'ValidTrial', 'StartTime', 'EndTime', 'Duration']
    round_data = []
    
    re["Trial"] = pd.Series(np.array(np.zeros(re.size), dtype='uint8'))
    re["ValidTrial"] = pd.Series([], dtype=bool)
    rg["Trial"] = pd.Series(np.array(np.empty(re.size), dtype='uint8'))
    rg["ValidTrial"] = pd.Series([], dtype=bool)
    
    # iterate over all available rounds
    for r in rg.Round.unique():
        trialNum = 0
        rg_round = rg.loc[rg.Round == r]
        round_type = rg_round.iloc[0].RoundType
        
        re_round_idx = re.index[(re.RoundType == round_type)]
        re.loc[re_round_idx, 'RoundType'] = round_type
        
        for sc in rg_round.SplitCount.unique():
            rg_split = rg_round.loc[rg_round.SplitCount == sc]
            
            for rc in rg_split.RepeatCount.unique():
                rg_trial = rg_split.loc[rg_split.RepeatCount == rc]
                
                trial_start = rg_trial.iloc[0].Timestamp
                trial_end = rg_trial.iloc[-1].Timestamp
                trial_dur = trial_end - trial_start
                
                rg_trial_idx = rg.index[(rg.Round == r) & (rg.SplitCount == sc) & (rg.RepeatCount == rc)]
                re_trial_idx = re.index[(re.RoundType == round_type) & (re.SplitCount == sc) & (re.RepeatCount == rc)]
                
                if trial_dur <= 15:
                    rg.drop(rg_trial_idx, inplace=True)
                    re.drop(re_trial_idx, inplace=True)
                    continue
                
                re_trial = re.loc[re_trial_idx]
                rings = re_trial.loc[(re_trial.TaskType == 'Ring') & (re_trial.TaskStatus != 'visible')].EventId.unique().size
                povs = re_trial.loc[(re_trial.TaskType == 'POV') & (re_trial.TaskStatus != 'visible')].EventId.unique().size
                
                valid_trial = False

                if (round_type == 'Training_Ring_Only') & (rings == 20):
                        valid_trial = True
                elif (round_type != 'Training_Ring_Only') & (povs == 9):
                        valid_trial = True
                
                round_data.append({
                     'SubjectId': rg_round.iloc[0].SubjectId, 'Round': r, 'RoundType': round_type,
                     'Trial': trialNum, 'RepeatCount': rc, 'SplitCount': sc,
                     'ValidTrial': valid_trial, 'Duration': trial_dur})
                
                rg.loc[rg_trial_idx, 'Trial'] = trialNum
                rg.loc[rg_trial_idx, 'ValidTrial'] = valid_trial
                
                re.loc[re_trial_idx, 'Trial'] = trialNum
                re.loc[re_trial_idx, 'ValidTrial'] = valid_trial
                
                trialNum += 1
                
    return pd.DataFrame(data=round_data, columns=round_columns), re, rg

In [6]:
def process_events(re, rg):
    re["EndTime"] = pd.Series([], dtype=float)
    re["Duration"] = pd.Series([], dtype=float)
    re["Round"] = pd.Series(np.array(np.zeros(re.size), dtype="uint8"))
    rg['CamContXPos'] = pd.Series([], dtype=object)
    rg['CamContYPos'] = pd.Series([], dtype=object)
    rg['CamContZPos'] = pd.Series([], dtype=object)

    for index, re_row in re.copy().iterrows():
        if re_row.TaskStatus == 'visible':
            started = re_row.Timestamp
            rg_info = rg.loc[(rg.RoundType == re_row.RoundType) & (rg.Trial == re_row.Trial)].iloc[0]
            re.loc[index,'Round'] = rg_info.Round

            is_corresponding_event = (re.EventId == re_row.EventId) & (re.TaskStatus != 'visible')
            ce_idx = re.index[is_corresponding_event]
            
            if ce_idx.size > 0:
                corresponding_event = re.loc[ce_idx].iloc[0]
                finished = corresponding_event.Timestamp
                duration = finished - started
                status = corresponding_event.TaskStatus
                re.drop(ce_idx, inplace=True)
            else:
                print('unfinshed event')  
                finished = np.nan
                duration = np.nan
                status = 'unfinished'
            
            re.loc[index,'EndTime'] = finished
            re.loc[index, 'Duration'] = duration
            re.loc[index, 'TaskStatus'] = status
                
    re = re.rename(columns = {'Timestamp': 'StartTime'})

    return re, rg

In [7]:
def fix_timestamps(ev, ga, ro):
    total_dur = 0
    for r in range(ro.Round.max() + 1):
        curr_round = ro.loc[ro.Round == r]
        trials = curr_round.Trial.max() + 1
        
        for t in range(trials):
            trial_dur = curr_round.loc[curr_round.Trial == t].iloc[0].Duration
            trial_mask = (ro.Round == r) & (ro.Trial == t)
            ro.loc[trial_mask, 'StartTime'] = total_dur
            ro.loc[trial_mask, 'EndTime'] = total_dur + trial_dur
            
            ga_trial_mask = (ga.Round == r) & (ga.Trial == t)
            ev_trial_mask = (ev.Round == r) & (ev.Trial == t)
            
            ga_trial_start = ga.loc[ga_trial_mask].iloc[0].Timestamp
            trial_time_offset =  total_dur - ga_trial_start
            
            ga.loc[ga_trial_mask, 'Timestamp'] = ga.loc[ga_trial_mask, 'Timestamp'] + trial_time_offset
            ev.loc[ev_trial_mask, 'StartTime'] = ev.loc[ev_trial_mask, 'StartTime'] + trial_time_offset
            ev.loc[ev_trial_mask, 'EndTime'] = ev.loc[ev_trial_mask, 'EndTime'] + trial_time_offset
            
            total_dur += trial_dur + 0.001
    return ro, ev, ga

In [8]:
def preprocess(re, rg):
    re = re.rename(columns = {"EventInfo" : "TaskPos", "EventType": "TaskType", "EventStatus": 'TaskStatus'})
    
    re, rg = move_custom_data(re, rg)
    re, rg = add_rc_count_to_events(re, rg)
    ro, re, rg = create_rounds(re, rg)
    re, rg = process_events(re, rg)
    ro, re, rg = fix_timestamps(re, rg, ro)
    
    re = re[['SubjectId', 'EventId', 'Round', 'RoundType', 'Trial', 'ValidTrial', 'TaskType', 'TaskStatus', 'TaskPos', 'Duration', 'StartTime', 'EndTime']]
    rg = rg[['SubjectId', 'Timestamp', 'Round', 'Trial', 'PlayerXPos', 'PlayerYPos', 'PlayerZPos', 'MainCamXPos', 'MainCamYPos', 'MainCamZPos', 'PlayerXRot', 'PlayerYRot', 'PlayerZRot', 'MainCamXRot', 'MainCamYRot', 'MainCamZRot', 'CamContXPos', 'CamContYPos', 'CamContZPos']]

    return ro, re, rg

In [40]:
def create_final_csvs(write_csvs=False):
    study_dict = { 'rounds': [], 'events': [], 'gamestatus': [] }
    
    for subject in range(4):
        # check if data for the subject available
        if len(glob.glob('study_data/s%d_*.csv' % subject)) < 2:
            continue
            
        print('Subject #%d' % (subject))
        
        # load all csvs and concatenate splits if available
        raw_events, raw_gamestatus = load_csvs(subject)
        
        # preprocess data
        ro, ev, ga = preprocess(raw_events, raw_gamestatus)
        
        # add to data dict
        study_dict['rounds'].append(ro)
        study_dict['events'].append(ev)
        study_dict['gamestatus'].append(ga)
        
    # clean index
    ro_total = pd.concat(study_dict['rounds']).reset_index()
    ev_total = pd.concat(study_dict['events']).reset_index()
    gs_total = pd.concat(study_dict['gamestatus']).reset_index()
    
    if write_csvs:
        ro_total.to_excel('ro_all.xlsx')
        ev_total.to_excel('ev_all.xlsx')
        gs_total.to_excel('gs_all.xlsx')
        
    print('finished')

In [41]:
create_final_csvs()


Subject #3
['-0.313', ' -1.219', ' -0.412']
unfinshed event
finished

Data-Analysis


In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

ro_tot = pd.read_excel('rounds_total.xlsx')
ev_tot = pd.read_excel('events_total.xlsx')
gs_tot = pd.read_excel('gamestatus_total.xlsx')

In [ ]:
def get_motion_data(gs, started, finished):
    gidx = gs.index[(gs.Timestamp >= started) & (gs.Timestamp <= finished)]
    md = gs.loc[gidx, ['PlayerYRot', 'PlayerXRot', 'MainCamYRot', 'MainCamXRot']]
    md['MainCamXRotRel'] = md['MainCamXRot'] - md['PlayerXRot']
    md['MainCamYRotRel'] = md['MainCamYRot'] - md['PlayerYRot']
    md['MainCamXRotNorm'] = md.apply(lambda row: ((row.MainCamXRotRel - 180) % 360 - 180), axis=1)
    return md

Gewinnspiel Auswertung


In [ ]:
columns = ['SubjectId', 'RoundType', 'RoundScore', 'PovSucc', 'PovTotal', 'RingSucc', 'RingTotal']
data = []

def filter_ev(group):
    return ((group.Trial == group.Trial.max())
           & group.ValidTrial 
           & (~group.RoundType.isin(['Training_Ring_Only', 'Training_Complete']))).any()

def round_score(x):
    pov_succ = x[(x.TaskType == 'POV') & (x.TaskStatus == 'success')].EventId.size
    pov_tot = x[(x.TaskType == 'POV')].EventId.size
    ring_succ = x[(x.TaskType == 'Ring') & (x.TaskStatus == 'success')].EventId.size
    ring_tot = x[(x.TaskType == 'Ring')].EventId.size
    round_score = pov_succ * (ring_succ / ring_tot)
    data.append({'SubjectId': x.iloc[0].SubjectId, 'RoundType': x.iloc[0].RoundType, 
                 'RoundScore': round_score, 'PovSucc': pov_succ, 'PovTotal': pov_tot,
                 'RingSucc': ring_succ, 'RingTotal': ring_tot})
    return round_score

grouped = ev_tot.groupby(['SubjectId', 'RoundType', 'Trial'])
ev_red = grouped.filter(filter_ev)

#print(ev_red[(ev_red.SubjectId == 3) & (ev_red.TaskType == 'POV')])

round_score_group = ev_red.groupby(['SubjectId', 'RoundType']).apply(round_score)
total_score = round_score_group.groupby(['SubjectId']).agg({'Sum': 'sum'}).sort_values(by="SubjectId", ascending=True)
print(total_score)

#df = pd.DataFrame(data=data, columns=columns)
#df.to_excel('scores.xlsx')

#print(filtered_ev_tot.groupby(['SubjectId', 'RoundType', 'TaskType', 'TaskStatus'])['EventId'].agg({"Count": 'count'}).to_string())

POV Selection Times by Round Type


In [ ]:
pov = ev_tot[(ev_tot.RoundType != 'Training_Complete') &(ev_tot.TaskType == 'POV') & (ev_tot.TaskStatus == 'success')]
# Fehlerhafte EInträge entfernen
pov = pov.drop(pov[(pov.RoundType == 'Audio') & (pov.Duration > 10)].index)
plot = pov.boxplot(column=['Duration'], by='RoundType', figsize=(20,10))
fig = plot.get_figure()
fig.savefig("output.png")

Feedback Reaction Times


In [ ]:
for rt in ev_tot.RoundType.unique():
    round_ev = ev_tot[ev_tot.RoundType == rt]
    round_pov = events[events.TaskType == 'POV']
    #pov_succ = pov_events.loc[events.Status == 'success']
    
    #ring_events = events.loc[(eve nts.TaskType == 'Ring')]
    #ring_succ = ring_events.loc[events.Status == 'success'].shape[0]
    #ring_fail = ring_events.loc[events.Status == 'timeout'].shape[0]
    #pov_fail = pov_events.loc[events.Status == 'timeout'].shape[0]

    for pos in round_pov.Position.unique():
        print("Plot for %s, %s" % (rt, pos))
        round_pov_pos = round_pov[round_pov.Position == pos]
        round_pov_pos.plot(kind="scatter", x="MainCamYRot", y="MainCamXRot", xlim=(-360,360), ylim=(180,-180))
        plt.show()