In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
In [28]:
def load_csvs(subject_id):
temp_events = "study_data/s%d_events%s.csv"
temp_gamestatus = "study_data/s%d_gamestatus%s.csv"
events = []
gamestatus = []
split_counter = 0
split = ""
while True:
if split_counter > 0:
split = "_%d" % split_counter
try:
ev_tmp = pd.read_csv(temp_events % (subject_id, split), sep=';')
gs_tmp = pd.read_csv(temp_gamestatus % (subject_id, split), sep=';')
ev_tmp['SubjectId'] = subject_id
gs_tmp['SubjectId'] = subject_id
ev_tmp['SplitCount'] = split_counter
gs_tmp['SplitCount'] = split_counter
events.append(ev_tmp)
gamestatus.append(gs_tmp)
except FileNotFoundError:
break
split_counter += 1
return pd.concat(events).reset_index(), pd.concat(gamestatus).reset_index()
In [3]:
def move_custom_data(re, rg):
for index, re_row in re.copy().iterrows():
if re_row.TaskType == 'Custom':
rg_split_idx = rg.index[rg.SplitCount == re_row.SplitCount]
cam_cont_pos = re_row.TaskPos.strip('() ').split(',')
print(cam_cont_pos)
rg.loc[rg_split_idx, 'CamContXPos'] = cam_cont_pos[0]
rg.loc[rg_split_idx, 'CamContYPos'] = cam_cont_pos[1]
rg.loc[rg_split_idx, 'CamContZPos'] = cam_cont_pos[2]
re.drop(index, inplace=True)
continue
return re, rg
In [4]:
def add_rc_count_to_events(re, rg):
re['RepeatCount'] = pd.Series(np.array(np.zeros(re.size), dtype='uint8'))
last_time = 0
last_round_type = re.iloc[0].RoundType
last_split_count = 0
repeat_count = 0
start_index = 0
for index, re_row in re.iterrows():
if (re_row.SplitCount != last_split_count) | (re_row.RoundType != last_round_type) | (index == re.index[-1]):
re.loc[start_index : index, 'RepeatCount'] = repeat_count
sc_rt_rg_mask = (rg.SplitCount == last_split_count) & (rg.RoundType == last_round_type)
sc_rt_re_mask = (re.SplitCount == last_split_count) & (re.RoundType == last_round_type)
last_rc_in_rg = rg.loc[(rg.Timestamp <= last_time) & sc_rt_rg_mask].RepeatCount.max()
# print (last_round_type, "sc", last_split_count, "time", last_time, "rc", repeat_count, "lrc", last_rc_in_rg)
if repeat_count < last_rc_in_rg:
for rc in range(repeat_count+1, last_rc_in_rg+1):
rc_round = rg.loc[(rg.RepeatCount == rc) & sc_rt_rg_mask]
rc_start = rc_round.iloc[0].Timestamp
rc_end = rc_round.iloc[-1].Timestamp
# print (last_round_type, repeat_count, last_rc_in_rg, rc_start, rc_end)
re.loc[(re.Timestamp >= rc_start) & (re.Timestamp <= rc_end) & sc_rt_re_mask, 'RepeatCount'] = rc
start_index = index
repeat_count = 0
last_round_type = re_row.RoundType
last_split_count = re_row.SplitCount
elif (re_row.Timestamp < last_time):
re.loc[start_index : index, 'RepeatCount'] = repeat_count
start_index = index
repeat_count += 1
last_time = re_row.Timestamp
return re, rg
In [5]:
def create_rounds(re, rg):
round_columns = ['SubjectId', 'Round', 'RoundType', 'Trial', 'RepeatCount', 'SplitCount', 'ValidTrial', 'StartTime', 'EndTime', 'Duration']
round_data = []
re["Trial"] = pd.Series(np.array(np.zeros(re.size), dtype='uint8'))
re["ValidTrial"] = pd.Series([], dtype=bool)
rg["Trial"] = pd.Series(np.array(np.empty(re.size), dtype='uint8'))
rg["ValidTrial"] = pd.Series([], dtype=bool)
# iterate over all available rounds
for r in rg.Round.unique():
trialNum = 0
rg_round = rg.loc[rg.Round == r]
round_type = rg_round.iloc[0].RoundType
re_round_idx = re.index[(re.RoundType == round_type)]
re.loc[re_round_idx, 'RoundType'] = round_type
for sc in rg_round.SplitCount.unique():
rg_split = rg_round.loc[rg_round.SplitCount == sc]
for rc in rg_split.RepeatCount.unique():
rg_trial = rg_split.loc[rg_split.RepeatCount == rc]
trial_start = rg_trial.iloc[0].Timestamp
trial_end = rg_trial.iloc[-1].Timestamp
trial_dur = trial_end - trial_start
rg_trial_idx = rg.index[(rg.Round == r) & (rg.SplitCount == sc) & (rg.RepeatCount == rc)]
re_trial_idx = re.index[(re.RoundType == round_type) & (re.SplitCount == sc) & (re.RepeatCount == rc)]
if trial_dur <= 15:
rg.drop(rg_trial_idx, inplace=True)
re.drop(re_trial_idx, inplace=True)
continue
re_trial = re.loc[re_trial_idx]
rings = re_trial.loc[(re_trial.TaskType == 'Ring') & (re_trial.TaskStatus != 'visible')].EventId.unique().size
povs = re_trial.loc[(re_trial.TaskType == 'POV') & (re_trial.TaskStatus != 'visible')].EventId.unique().size
valid_trial = False
if (round_type == 'Training_Ring_Only') & (rings == 20):
valid_trial = True
elif (round_type != 'Training_Ring_Only') & (povs == 9):
valid_trial = True
round_data.append({
'SubjectId': rg_round.iloc[0].SubjectId, 'Round': r, 'RoundType': round_type,
'Trial': trialNum, 'RepeatCount': rc, 'SplitCount': sc,
'ValidTrial': valid_trial, 'Duration': trial_dur})
rg.loc[rg_trial_idx, 'Trial'] = trialNum
rg.loc[rg_trial_idx, 'ValidTrial'] = valid_trial
re.loc[re_trial_idx, 'Trial'] = trialNum
re.loc[re_trial_idx, 'ValidTrial'] = valid_trial
trialNum += 1
return pd.DataFrame(data=round_data, columns=round_columns), re, rg
In [6]:
def process_events(re, rg):
re["EndTime"] = pd.Series([], dtype=float)
re["Duration"] = pd.Series([], dtype=float)
re["Round"] = pd.Series(np.array(np.zeros(re.size), dtype="uint8"))
rg['CamContXPos'] = pd.Series([], dtype=object)
rg['CamContYPos'] = pd.Series([], dtype=object)
rg['CamContZPos'] = pd.Series([], dtype=object)
for index, re_row in re.copy().iterrows():
if re_row.TaskStatus == 'visible':
started = re_row.Timestamp
rg_info = rg.loc[(rg.RoundType == re_row.RoundType) & (rg.Trial == re_row.Trial)].iloc[0]
re.loc[index,'Round'] = rg_info.Round
is_corresponding_event = (re.EventId == re_row.EventId) & (re.TaskStatus != 'visible')
ce_idx = re.index[is_corresponding_event]
if ce_idx.size > 0:
corresponding_event = re.loc[ce_idx].iloc[0]
finished = corresponding_event.Timestamp
duration = finished - started
status = corresponding_event.TaskStatus
re.drop(ce_idx, inplace=True)
else:
print('unfinshed event')
finished = np.nan
duration = np.nan
status = 'unfinished'
re.loc[index,'EndTime'] = finished
re.loc[index, 'Duration'] = duration
re.loc[index, 'TaskStatus'] = status
re = re.rename(columns = {'Timestamp': 'StartTime'})
return re, rg
In [7]:
def fix_timestamps(ev, ga, ro):
total_dur = 0
for r in range(ro.Round.max() + 1):
curr_round = ro.loc[ro.Round == r]
trials = curr_round.Trial.max() + 1
for t in range(trials):
trial_dur = curr_round.loc[curr_round.Trial == t].iloc[0].Duration
trial_mask = (ro.Round == r) & (ro.Trial == t)
ro.loc[trial_mask, 'StartTime'] = total_dur
ro.loc[trial_mask, 'EndTime'] = total_dur + trial_dur
ga_trial_mask = (ga.Round == r) & (ga.Trial == t)
ev_trial_mask = (ev.Round == r) & (ev.Trial == t)
ga_trial_start = ga.loc[ga_trial_mask].iloc[0].Timestamp
trial_time_offset = total_dur - ga_trial_start
ga.loc[ga_trial_mask, 'Timestamp'] = ga.loc[ga_trial_mask, 'Timestamp'] + trial_time_offset
ev.loc[ev_trial_mask, 'StartTime'] = ev.loc[ev_trial_mask, 'StartTime'] + trial_time_offset
ev.loc[ev_trial_mask, 'EndTime'] = ev.loc[ev_trial_mask, 'EndTime'] + trial_time_offset
total_dur += trial_dur + 0.001
return ro, ev, ga
In [8]:
def preprocess(re, rg):
re = re.rename(columns = {"EventInfo" : "TaskPos", "EventType": "TaskType", "EventStatus": 'TaskStatus'})
re, rg = move_custom_data(re, rg)
re, rg = add_rc_count_to_events(re, rg)
ro, re, rg = create_rounds(re, rg)
re, rg = process_events(re, rg)
ro, re, rg = fix_timestamps(re, rg, ro)
re = re[['SubjectId', 'EventId', 'Round', 'RoundType', 'Trial', 'ValidTrial', 'TaskType', 'TaskStatus', 'TaskPos', 'Duration', 'StartTime', 'EndTime']]
rg = rg[['SubjectId', 'Timestamp', 'Round', 'Trial', 'PlayerXPos', 'PlayerYPos', 'PlayerZPos', 'MainCamXPos', 'MainCamYPos', 'MainCamZPos', 'PlayerXRot', 'PlayerYRot', 'PlayerZRot', 'MainCamXRot', 'MainCamYRot', 'MainCamZRot', 'CamContXPos', 'CamContYPos', 'CamContZPos']]
return ro, re, rg
In [40]:
def create_final_csvs(write_csvs=False):
study_dict = { 'rounds': [], 'events': [], 'gamestatus': [] }
for subject in range(4):
# check if data for the subject available
if len(glob.glob('study_data/s%d_*.csv' % subject)) < 2:
continue
print('Subject #%d' % (subject))
# load all csvs and concatenate splits if available
raw_events, raw_gamestatus = load_csvs(subject)
# preprocess data
ro, ev, ga = preprocess(raw_events, raw_gamestatus)
# add to data dict
study_dict['rounds'].append(ro)
study_dict['events'].append(ev)
study_dict['gamestatus'].append(ga)
# clean index
ro_total = pd.concat(study_dict['rounds']).reset_index()
ev_total = pd.concat(study_dict['events']).reset_index()
gs_total = pd.concat(study_dict['gamestatus']).reset_index()
if write_csvs:
ro_total.to_excel('ro_all.xlsx')
ev_total.to_excel('ev_all.xlsx')
gs_total.to_excel('gs_all.xlsx')
print('finished')
In [41]:
create_final_csvs()
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
ro_tot = pd.read_excel('rounds_total.xlsx')
ev_tot = pd.read_excel('events_total.xlsx')
gs_tot = pd.read_excel('gamestatus_total.xlsx')
In [ ]:
def get_motion_data(gs, started, finished):
gidx = gs.index[(gs.Timestamp >= started) & (gs.Timestamp <= finished)]
md = gs.loc[gidx, ['PlayerYRot', 'PlayerXRot', 'MainCamYRot', 'MainCamXRot']]
md['MainCamXRotRel'] = md['MainCamXRot'] - md['PlayerXRot']
md['MainCamYRotRel'] = md['MainCamYRot'] - md['PlayerYRot']
md['MainCamXRotNorm'] = md.apply(lambda row: ((row.MainCamXRotRel - 180) % 360 - 180), axis=1)
return md
In [ ]:
columns = ['SubjectId', 'RoundType', 'RoundScore', 'PovSucc', 'PovTotal', 'RingSucc', 'RingTotal']
data = []
def filter_ev(group):
return ((group.Trial == group.Trial.max())
& group.ValidTrial
& (~group.RoundType.isin(['Training_Ring_Only', 'Training_Complete']))).any()
def round_score(x):
pov_succ = x[(x.TaskType == 'POV') & (x.TaskStatus == 'success')].EventId.size
pov_tot = x[(x.TaskType == 'POV')].EventId.size
ring_succ = x[(x.TaskType == 'Ring') & (x.TaskStatus == 'success')].EventId.size
ring_tot = x[(x.TaskType == 'Ring')].EventId.size
round_score = pov_succ * (ring_succ / ring_tot)
data.append({'SubjectId': x.iloc[0].SubjectId, 'RoundType': x.iloc[0].RoundType,
'RoundScore': round_score, 'PovSucc': pov_succ, 'PovTotal': pov_tot,
'RingSucc': ring_succ, 'RingTotal': ring_tot})
return round_score
grouped = ev_tot.groupby(['SubjectId', 'RoundType', 'Trial'])
ev_red = grouped.filter(filter_ev)
#print(ev_red[(ev_red.SubjectId == 3) & (ev_red.TaskType == 'POV')])
round_score_group = ev_red.groupby(['SubjectId', 'RoundType']).apply(round_score)
total_score = round_score_group.groupby(['SubjectId']).agg({'Sum': 'sum'}).sort_values(by="SubjectId", ascending=True)
print(total_score)
#df = pd.DataFrame(data=data, columns=columns)
#df.to_excel('scores.xlsx')
#print(filtered_ev_tot.groupby(['SubjectId', 'RoundType', 'TaskType', 'TaskStatus'])['EventId'].agg({"Count": 'count'}).to_string())
In [ ]:
pov = ev_tot[(ev_tot.RoundType != 'Training_Complete') &(ev_tot.TaskType == 'POV') & (ev_tot.TaskStatus == 'success')]
# Fehlerhafte EInträge entfernen
pov = pov.drop(pov[(pov.RoundType == 'Audio') & (pov.Duration > 10)].index)
plot = pov.boxplot(column=['Duration'], by='RoundType', figsize=(20,10))
fig = plot.get_figure()
fig.savefig("output.png")
In [ ]:
for rt in ev_tot.RoundType.unique():
round_ev = ev_tot[ev_tot.RoundType == rt]
round_pov = events[events.TaskType == 'POV']
#pov_succ = pov_events.loc[events.Status == 'success']
#ring_events = events.loc[(eve nts.TaskType == 'Ring')]
#ring_succ = ring_events.loc[events.Status == 'success'].shape[0]
#ring_fail = ring_events.loc[events.Status == 'timeout'].shape[0]
#pov_fail = pov_events.loc[events.Status == 'timeout'].shape[0]
for pos in round_pov.Position.unique():
print("Plot for %s, %s" % (rt, pos))
round_pov_pos = round_pov[round_pov.Position == pos]
round_pov_pos.plot(kind="scatter", x="MainCamYRot", y="MainCamXRot", xlim=(-360,360), ylim=(180,-180))
plt.show()