In [64]:
# Go through all files in a folder and combine them
import glob, os
files = glob.glob('img' + os.sep + 'vs_results' + os.sep + '*.csv')
f_out = open('out.csv', 'wb')
for i, f in enumerate(files):
# Get the participant number from the file name
p_id = f[-6:-4]
# open the file and read it's content
f_content = open(f, 'rb')
# Skip first line (only write it once!)
if i == 0:
line = f_content.readline()
outline = 'p_id\t' + line
f_out.write(outline)
continue
else:
f_content.readline()
# read the file line by line and write to new file
for line in f_content:
outline = p_id + '\t' + line
f_out.write(outline)
# Close the file
f_content.close()
# Close the files
f_out.close()
In [65]:
# The same using pandas
import pandas
files = glob.glob('img' + os.sep + 'vs_results' + os.sep + '*.csv')
df = pd.DataFrame()
out = []
for f in files:
p_id = f[-6:-4]
df = pd.read_csv(f, index_col=None, header=0, sep='\t')
df['p_id'] = int(p_id)
out.append(df)
combined = pd.concat(out)
print(combined.head())
# Save as csv
combined.to_csv('out_pandas.csv', sep = '\t')
The results (fabricated data) are stored in a csv file where each row corresponds to one trial (observation). Each trial contains information about the participant ID (p_id), the trial number (trial), the reaction time in ms (rt), the condition (BB or RB; they indicate the colors of the target and distractors), and the number of distractors (n_distractors).
In [66]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Read data into pandas dataframe
df = pd.read_csv('img\\results_visual_search.csv', sep = '\t')
In [67]:
print(df.head())
In [68]:
# Plot reaction times across participants
sns.barplot(x = 'p_id', y = 'rt', data = df)
plt.xlabel('Participant ID')
plt.ylabel('Reaction time (ms)')
plt.show()
In [69]:
# Plot reaction times across
sns.factorplot(x="n_distractors", y="rt",hue='condition',data = df)
plt.xlabel('Number of distractors')
plt.ylabel('Search time (ms)')
plt.show()
Perhaps it would make more sense construct the plot based on the average value of each participant from each trial and condition.
In [70]:
# Take the mean over different trials
df_avg = df.groupby(['p_id', 'n_distractors', 'condition']).mean()
df_avg.reset_index(inplace=True)
# Plot reaction times across
sns.factorplot(x="n_distractors", y="rt",hue='condition',data = df_avg)
plt.xlabel('Number of distractors')
plt.ylabel('Search time (ms)')
plt.show()
In [ ]: