In [1]:
%pylab inline
In [1]:
In [34]:
import re
#takw raw csv from google docs
with open('responses_v2.csv') as f:
rawdata = f.readlines()
#make each row into a list
rawdata = rawdata[0].split('\r')
datalist = []
for i,d in enumerate(rawdata):
datalist.append(d.split(','))
## Get subjects
subjects = set()
for i in rawdata[1:]:
s = i.split(',')
subjects.add(s[1])
print subjects
#probmap = {.1:1,.25:2,.75:3,.9:4} original version
probmap = {.5:1,.6:2,.7:3,.8:4,.9:5}
class Experiment():
def __init__(self,subject_name):
self.subject = subject_name
self.all_data = []
def get_data(self,data):
#get data associated with the subject name
trial_data = []
for line in data:
s = line.split(',')
if self.subject == s[1]:
# trial_num, probsame, response_num(not count 1), time_taken, response (1=S, 0=D), correct/incorrect (1/0),
trial_split = s[2].split('\t')
trial_num = int((trial_split[1].split('-'))[-1])
probsame = float((trial_split[2].split('-'))[-1])
print probsame
# print 'hello',trial_num, probsame
pattern = r'response-\d+\t+[SD]+\t+[SD]+\t+\d+'
matches = re.findall(pattern,s[2])
# 'response-1\tS\tS\t10239' ## response, target, actual, time
for index, response in zip(range(len(matches)), matches):
if index > 0:
response_num = int((response.split('\t'))[0].split('-')[-1])
correct = 1 if ((response.split('\t'))[1] == (response.split('\t'))[2]) else 0
#print index
time_taken = int((response.split('\t'))[-1]) - int((matches[index-1].split('\t'))[-1])
#print time_taken
response_key = 1 if (response.split('\t'))[2] == 'S' else 0
#print response
#TARGET response means switch
switch = 1 if (response.split('\t'))[1] != (matches[index-1].split('\t'))[1] else 0
target = 1 if (response.split('\t'))[1] == 'S' else 0
trial_data.append([trial_num, probsame, response_num, time_taken, response_key, correct, switch, target])
self.all_data.append(array(trial_data,dtype='float32'))
data_dict = {}
columns=["trial_num", "prob_same", "response_num", 'time_taken', 'response', 'correct', 'switch', 'target']
for subject in ['3337264','Nate!'] :
e = Experiment(subject)
e.get_data(rawdata)
with open(subject + '_formatted.csv','w') as outfile:
writer = csv.writer(outfile)
#header is a list of strings
writer.writerow(columns)
#data is a list of lists
writer.writerows(e.all_data[0])
In [327]:
## 1. prob_same vs average response time
data_switch = data[logical_and(data[:,6] > 0.5, data[:,3] < 2000),:]
data_same = data[logical_and(data[:,6]< 0.5 , data[:,3] < 2000),:]
#
close()
fig1 = figure(1);
figsize(10,10)
#.1 means arrows mostly switch - most responses will be D
#.9 means arrows mostly stay the same - most responses will be S
for i,index in zip([1, 2,3 , 4],range(1,5)):
subplot(220 + index)
hist(data_switch[abs(data_switch[:,1] - i) < .01,3], 20, color='b',alpha=0.5)
hist(data_same[abs(data_same[:,1] - i) < .01,3], 20, color='r',alpha=0.5)
axis([0, 2000, 0, 350])
title('response times ' + str(i))
legend(['switch','same'])
In [328]:
#mean difference seems to grow as a function
fig2 = figure(2)
means_switch = []
means_same = []
for i,index in zip([1, 2, 3, 4],range(1,5)):
plot(i, mean(data_switch[abs(data_switch[:,1] - i) < .01,3]), 'bx',markersize=20)
means_switch.append(mean(data_switch[abs(data_switch[:,1] - i) < .01,3]))
plot(i,mean(data_same[abs(data_same[:,1] - i) < .01,3]),'rx',markersize=20)
means_same.append(mean(data_same[abs(data_same[:,1] - i) < .01,3]))
axis([0, 5, 0, 1000])
xlabel('prob of D response')
ylabel('mean response time')
legend(['response change','response the same'])
t = [1, 2, 3, 4] #[.1 .25 .75 .9]
(ar,br)=polyfit(t,means_switch,1)
xr=polyval([ar,br],t)
plot(t,xr)
(ar,br)=polyfit(t,means_same,1)
xr=polyval([ar,br],t)
plot(t,xr,'r')
Out[328]:
In [451]:
#2. correct/incorrect vs sequence (like a bar graph w/ probe trials), also as a function of predictability
#for each SWITCH response AFTER the 2nd and before the 2nd to last, look 2 back and 2 forward
def plot_data(subject, color):
error_count_1 = array([0, 0, 0, 0, 0, 0, 0])
error_count_2 = array([0, 0, 0, 0, 0, 0, 0])
error_count_3 = array([0, 0, 0, 0, 0, 0, 0])
error_count_4 = array([0, 0, 0, 0, 0, 0, 0])
total_counts = array([0, 0, 0, 0])
probs = [1, 2, 3, 4]
columns=["trial_num", "prob_same", "response_num", 'time_taken', 'response', 'correct', 'switch', 'target']
data = data_dict[subject]
set_printoptions(threshold=nan)
for prob_index, prob in zip(range(4), probs):
data_prob = data[data[:,1] == prob,:] #dealing with JUST the data from this probability
trials = list(set(data_prob[:,0]))
trials.sort()
#for each trial
for trial in trials:
data_trial = data_prob[ data_prob[:,0] == trial,:]
num_responses = data_trial.shape[0]
for response in range(4,num_responses-2):
total_counts[prob_index] = total_counts[prob_index] + 1
if data_trial[response,6] == 1: #if it's a switch trial
#print response, 'is a switch trial'
#print data_trial[response-2:response+3,:]
for offset in range(-4,3): #look in the surroundings
#count up the number of errors - 1-correct gives 1 for error, 0 for non-error
#print offset, offset+4, response+offset
if prob_index == 0:
error_count_1[offset+4] += 0 if data_trial[response + offset,5] else 1
elif prob_index == 1:
error_count_2[offset+4] += 0 if data_trial[response + offset,5] else 1
elif prob_index == 2:
error_count_3[offset+4] += 0 if data_trial[response + offset,5] else 1
elif prob_index == 3:
error_count_4[offset+4] += 0 if data_trial[response + offset,5] else 1
counts = [error_count_1,error_count_2,error_count_3,error_count_4]
figsize(8,8)
for i in range(4):
count_proportion = array(counts[i],dtype='float') / array(total_counts[i],dtype='float')
print count_proportion
subplot(220 + 1 + i)
bar(array([-4, -3, -2, -1, 0, 1, 2]) - .3,count_proportion,color=color,alpha=0.5)
axis([-5, 4, 0, .05])
# print total_counts
# print error_count_1
# print error_count_2
# print error_count_3
# print error_count_4
fig, ax = subplots();
plot_data('CalvinLBS','r')
plot_data('Calvin','b')
ax.set_title('hello')
title('red=low, blue=normal')
#TODO: make sure these are normalized by the NUMBER of trials and responses
#TODO: I feel like I'm probably one trial off, possibly in recording the data.
#This would make LOTS of sense if -1 was the same as 0 here. DOUBLE CHECK!!
Out[451]:
In [414]:
# 3. Look at prob correct on switch trials as a function of the length of the non-switch sequence preceeding it
#get counts
#this holds the counts, so we can look up by [probsame][previousNoSwitchTrials][correct]
class Multidict(dict):
"""Implementation of perl's autovivification feature."""
def __getitem__(self, item):
try:
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
counts = Multidict()
probs = [1, 2, 3, 4]
columns=["trial_num", "prob_same", "response_num", 'time_taken', 'response', 'correct', 'switch', 'target']
set_printoptions(threshold=nan)
trials = list(set(data[:,0]))
for trial in trials:
data_trial = data[ data[:,0] == trial,:]
num_responses = data_trial.shape[0]
#print data_trial
for response in range(10,num_responses):
if data_trial[response,6] == 1: #if it's a switch trial
#print response, 'is a switch trial'
#look back up to 10 and count the number of subsequent NON-switches BEFORE this trial
count = 0
for lookback in range(-1,-11,-1):
if data_trial[response + lookback,6] == 0: #if not switch
count += 1 #add to count
else:
break #stop the for-loop, stop counting
#add to database
try:
counts[data_trial[response,1]][count][data_trial[response,5]] = counts[data_trial[response,1]][count][data_trial[response,5]] + 1
except:
counts[data_trial[response,1]][count][data_trial[response,5]] = 1
In [420]:
fig = figure();
colors = ['r','g','b','k']
#for each prob
for p in probs:
#for each distance back
counts_correct = zeros((1,10))
counts_incorrect = zeros((1,10))
for dist in range(1,11):
#print counts_correct
#get the correct and incorrect counts
#print p, dist, counts[p][dist][1], counts[p][dist][0]
#try adding to everything less than it
for i in range(1,dist+1):
# counts_correct[0][dist-1] += counts[p][dist][1]
# counts_incorrect[0][dist-1] += counts[p][dist][0]
if counts[p][i-1][0] == {}: counts[p][i-1][0] = 0
if counts[p][i-1][1] == {}: counts[p][i-1][1] = 0
counts_correct[0][i-1] += counts[p][i][1]
counts_incorrect[0][i-1] += counts[p][i][0]
print counts_correct, counts_incorrect
prob_error = counts_incorrect / (counts_incorrect + counts_correct)
plot(range(1,11),prob_error[0],colors[p-1])
print (counts_incorrect + counts_correct)
xlabel('number of non-switches proceeding the switch')
ylabel('proportion of errors')
#TODO question: why are there so many with 10-back but not with 9-back? 10-back is "10 or more"
#TODO I don't think it's counting the incorrect properly - they should be strictly decreasing.
Out[420]:
Red and Black are the extrme cases - they are more similar than green and blue.
1-4 seems to be a strong trend. I wonder what happens with 5. In any case, PART of it seems linear.
TODO: need to fix the counting - for some reason the incorrect is not monotonically decreasing where it SHOULD be
In [ ]: