In [98]:
# function to calculate levenshtein distance
def levenshtein(s, t):
''' From Wikipedia article; Iterative with two matrix rows. '''
if s == t: return 0
elif len(s) == 0: return len(t)
elif len(t) == 0: return len(s)
v0 = [None] * (len(t) + 1)
v1 = [None] * (len(t) + 1)
for i in range(len(v0)):
v0[i] = i
for i in range(len(s)):
v1[0] = i + 1
for j in range(len(t)):
cost = 0 if s[i] == t[j] else 1
v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost)
for j in range(len(v0)):
v0[j] = v1[j]
return v1[len(t)]
import csv
import pandas as pd
def timeWithTaskNumber(task_number):
# get right answer list
right_answers_file = 'right_answers' + str(task_number) + '.csv'
df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
right_answer_list = df['right_answer'].tolist()
user_id_list = [i for i in range(2,27)]
# get users' answers from csv file
df = pd.read_csv('results-survey.csv', encoding = "ISO-8859-1")
user_time_dic = {}
for user_id in user_id_list:
user_time_dic[user_id] = []
for index, row in df.iterrows():
# row[0] -> user_id, row[2] -> time
for user_id in user_id_list:
if str(row[0]) == str(user_id) and str(task_number) in row[1]:
user_time_dic[user_id].append(row[2])
df2 = pd.DataFrame.from_dict(user_time_dic, orient='index')
# change column names
df2.columns = right_answer_list
time_result = 'time_result_' + str(task_number) + '.csv'
df2.to_csv(time_result,sep=',', encoding='ISO-8859-1')
def levenshteinWithTaskNumber(task_number):
# get right answer list
right_answers_file = 'right_answers' + str(task_number) + '.csv'
df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
right_answer_list = df['right_answer'].tolist()
user_id_list = [i for i in range(2,27)]
# get users' answers from csv file
df = pd.read_csv('results-survey.csv', encoding = "ISO-8859-1")
user_answer_dic = {}
for user_id in user_id_list:
user_answer_dic[user_id] = []
for index, row in df.iterrows():
# row[0] -> user_id, row[1] -> input_ifeld, row[3] -> input_text)
for user_id in user_id_list:
if str(row[0]) == str(user_id) and str(task_number) in row[1]:
user_answer_dic[user_id].append(row[3])
# print(user_answer_dic)
# for user_id in user_id_list:
# print(len(user_answer_dic[user_id]))
user_answer_levenshtein_dic = {}
for user_id in user_id_list:
user_answer_levenshtein_dic[user_id] = []
for user_id in user_answer_dic.keys():
for answer,right_answer in zip(user_answer_dic[user_id],right_answer_list):
distance = levenshtein(answer,right_answer)
# print(user_id,answer,right_answer,distance)
user_answer_levenshtein_dic[user_id].append(distance)
df2 = pd.DataFrame.from_dict(user_answer_levenshtein_dic, orient='index')
# change column names
df2.columns = right_answer_list
levenshtein_result = 'levenshtein_result_' + str(task_number) + '.csv'
df2.to_csv(levenshtein_result,sep=',', encoding='ISO-8859-1')
def createFilterAndDistanceWithTaskNumber(task_number):
# get right answer list
right_answers_file = 'right_answers' + str(task_number) + '.csv'
df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
right_answer_list = df['right_answer'].tolist()
filter_and_word_file = 'filter_and_word_' + str(task_number) + '.csv'
df = pd.read_csv(filter_and_word_file, encoding = "ISO-8859-1")
user_id_list = [i for i in range(2,27)]
filter_and_distance_dic = {}
for user_id in user_id_list:
filter_and_distance_dic[user_id] = []
for index, row in df.iterrows():
for right_answer in right_answer_list:
temp_list = row[right_answer].split('%')
user_answer = temp_list[1]
filter_type = temp_list[0]
distance = levenshtein(user_answer,right_answer)
filter_and_distance = filter_type + '%' + str(distance)
filter_and_distance_dic[index + 2].append(filter_and_distance)
df2 = pd.DataFrame.from_dict(filter_and_distance_dic, orient='index')
df2.columns = right_answer_list
filter_and_distance = 'filter_and_distance_' + str(task_number) + '.csv'
df2.to_csv(filter_and_distance,sep=',', encoding='ISO-8859-1')
def reformatDataWithTaskNumberAndTasktype(task_number,task_type):
# get right answer list
right_answers_file = 'right_answers' + str(task_number) + '.csv'
df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
right_answer_list = df['right_answer'].tolist()
user_id_list = [i for i in range(2,27)]
task_type_dic = {}
for user_id in user_id_list:
task_type_dic[user_id] = []
filter_list = ['colorhalftone','crystallize','blur','pixelation','plaintext','asterisk']
new_column_names = []
for the_filter in filter_list:
new_column_names.append(the_filter + '_wordtype1_word1')
new_column_names.append(the_filter + '_wordtype1_word2')
new_column_names.append(the_filter + '_wordtype2_word1')
new_column_names.append(the_filter + '_wordtype2_word2')
new_column_names.append(the_filter + '_wordtype3_word1')
new_column_names.append(the_filter + '_wordtype3_word2')
in_file = 'filter_and_' + task_type + '_' + str(task_number) + '.csv'
df = pd.read_csv(in_file, encoding = "ISO-8859-1")
for the_filter in filter_list:
for index, row in df.iterrows():
for right_answer in right_answer_list:
temp = row[right_answer]
if the_filter in temp:
task_type_temp = temp.split('%')[1]
task_type_dic[index + 2].append(task_type_temp)
df2 = pd.DataFrame.from_dict(task_type_dic, orient='index')
df2.columns = new_column_names
# for the_filter in filter_list:
# df2 = df2.drop([the_filter + '_wordtype1_word1'], 1)
# df2 = df2.drop([the_filter + '_wordtype1_word1'], 1)
# df2 = df2.drop([the_filter + '_wordtype2_word1'], 1)
# df2 = df2.drop([the_filter + '_wordtype2_word1'], 1)
# df2 = df2.drop([the_filter + '_wordtype3_word1'], 1)
# df2 = df2.drop([the_filter + '_wordtype3_word1'], 1)
out_file = task_type + '_' + str(task_number) + '.csv'
df2.to_csv(out_file,sep=',', encoding='ISO-8859-1')
def processSecurityAuswertung():
# get right answer list
df = pd.read_csv('security_right_answer.csv', encoding = "ISO-8859-1")
right_answer_list = df['Wordlist'].tolist()
user_id_list = [i for i in range(2,27)]
# get users' answers from csv file
df = pd.read_csv('Security_Auswertung.csv', encoding = "ISO-8859-1")
user_type_list = ['A','B','C','D','E','F']
filter_dic = {'A':[],'B':[],'C':[],'D':[],'E':[],'F':[]}
for i in range(0,6):
filter_dic['A'].append('colorhalftone')
for i in range(0,6):
filter_dic['A'].append('crystallize')
for i in range(0,6):
filter_dic['A'].append('asteriks')
for i in range(0,6):
filter_dic['A'].append('gauss')
for i in range(0,6):
filter_dic['A'].append('plain')
for i in range(0,6):
filter_dic['A'].append('mosaik')
for i in range(0,6):
filter_dic['B'].append('crystallize')
for i in range(0,6):
filter_dic['B'].append('gauss')
for i in range(0,6):
filter_dic['B'].append('colorhalftone')
for i in range(0,6):
filter_dic['B'].append('mosaik')
for i in range(0,6):
filter_dic['B'].append('asteriks')
for i in range(0,6):
filter_dic['B'].append('plain')
for i in range(0,6):
filter_dic['C'].append('gauss')
for i in range(0,6):
filter_dic['C'].append('mosaik')
for i in range(0,6):
filter_dic['C'].append('crystallize')
for i in range(0,6):
filter_dic['C'].append('plain')
for i in range(0,6):
filter_dic['C'].append('colorhalftone')
for i in range(0,6):
filter_dic['C'].append('asteriks')
for i in range(0,6):
filter_dic['D'].append('mosaik')
for i in range(0,6):
filter_dic['D'].append('plain')
for i in range(0,6):
filter_dic['D'].append('gauss')
for i in range(0,6):
filter_dic['D'].append('asteriks')
for i in range(0,6):
filter_dic['D'].append('crystallize')
for i in range(0,6):
filter_dic['D'].append('colorhalftone')
for i in range(0,6):
filter_dic['E'].append('plain')
for i in range(0,6):
filter_dic['E'].append('asteriks')
for i in range(0,6):
filter_dic['E'].append('mosaik')
for i in range(0,6):
filter_dic['E'].append('colorhalftone')
for i in range(0,6):
filter_dic['E'].append('gauss')
for i in range(0,6):
filter_dic['E'].append('crystallize')
for i in range(0,6):
filter_dic['F'].append('asteriks')
for i in range(0,6):
filter_dic['F'].append('colorhalftone')
for i in range(0,6):
filter_dic['F'].append('plain')
for i in range(0,6):
filter_dic['F'].append('crystallize')
for i in range(0,6):
filter_dic['F'].append('mosaik')
for i in range(0,6):
filter_dic['F'].append('gauss')
filter_list = ['colorhalftone','crystallize','gauss','mosaik','plain','asteriks']
new_column_names = []
for the_filter in filter_list:
new_column_names.append(the_filter + '_wordtype1_word1')
new_column_names.append(the_filter + '_wordtype1_word2')
new_column_names.append(the_filter + '_wordtype2_word1')
new_column_names.append(the_filter + '_wordtype2_word2')
new_column_names.append(the_filter + '_wordtype3_word1')
new_column_names.append(the_filter + '_wordtype3_word2')
distance_dic = {}
new_distance_dic = {}
for user_id in user_id_list:
distance_dic[user_id] = []
new_distance_dic[user_id] = []
i = 0
for index, row in df.iterrows():
# row[1] -> user_id, row[2] -> Group, row[3] -> Wordlist, row[4] -> Answers
for user_id in user_id_list:
if str(row[1]) == str(user_id):
# print(user_id,type(row[3]),type(row[4]))
if (type(row[4]) == str):
distance = str(levenshtein(row[3],row[4]))
elif (type(row[4]) == float):
distance = str(levenshtein(row[3],''))
# print(row[2],i,filter_dic[row[2]][i])
distance_dic[user_id].append(filter_dic[row[2]][i] + '$' + distance)
i = (i + 1) % 36
df2 = pd.DataFrame.from_dict(distance_dic, orient='index')
# change column names
df2.columns = right_answer_list
df2.to_csv('security_result_temp.csv',sep=',', encoding='ISO-8859-1')
for the_filter in filter_list:
for index, row in df2.iterrows():
for right_answer in right_answer_list:
temp = row[right_answer]
if the_filter in temp:
task_type_temp = temp.split('$')[1]
# print(index)
# print(the_filter,index,temp)
new_distance_dic[index].append(task_type_temp)
df3 = pd.DataFrame.from_dict(new_distance_dic, orient='index')
# change column names
df3.columns = new_column_names
df3.to_csv('security_result.csv',sep=',', encoding='ISO-8859-1')
# levenshteinWithTaskNumber(1)
# levenshteinWithTaskNumber(2)
# timeWithTaskNumber(1)
# timeWithTaskNumber(2)
# createFilterAndDistanceWithTaskNumber(1)
# createFilterAndDistanceWithTaskNumber(2)
# reformatDataWithTaskNumberAndTasktype(1,'time')
# reformatDataWithTaskNumberAndTasktype(2,'time')
# reformatDataWithTaskNumberAndTasktype(1,'distance')
# reformatDataWithTaskNumberAndTasktype(2,'distance')
processSecurityAuswertung()