In [98]:
# function to calculate levenshtein distance
def levenshtein(s, t):
        ''' From Wikipedia article; Iterative with two matrix rows. '''
        if s == t: return 0
        elif len(s) == 0: return len(t)
        elif len(t) == 0: return len(s)
        v0 = [None] * (len(t) + 1)
        v1 = [None] * (len(t) + 1)
        for i in range(len(v0)):
            v0[i] = i
        for i in range(len(s)):
            v1[0] = i + 1
            for j in range(len(t)):
                cost = 0 if s[i] == t[j] else 1
                v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost)
            for j in range(len(v0)):
                v0[j] = v1[j]
                
        return v1[len(t)]

import csv
import pandas as pd

def timeWithTaskNumber(task_number):
    # get right answer list
    right_answers_file = 'right_answers' + str(task_number) + '.csv'
    df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
    right_answer_list = df['right_answer'].tolist()
    
    user_id_list = [i for i in range(2,27)]
    # get users' answers from csv file
    df = pd.read_csv('results-survey.csv', encoding = "ISO-8859-1")

    user_time_dic = {}
    for user_id in user_id_list:
        user_time_dic[user_id] = []

    for index, row in df.iterrows():
    #     row[0] -> user_id, row[2] -> time
        for user_id in user_id_list:
            if str(row[0]) == str(user_id) and str(task_number) in row[1]:
                user_time_dic[user_id].append(row[2])

    df2 = pd.DataFrame.from_dict(user_time_dic, orient='index')
#     change column names
    df2.columns = right_answer_list
    time_result = 'time_result_' + str(task_number) + '.csv'
    df2.to_csv(time_result,sep=',', encoding='ISO-8859-1')
    

def levenshteinWithTaskNumber(task_number):
    # get right answer list
    right_answers_file = 'right_answers' + str(task_number) + '.csv'
    df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
    right_answer_list = df['right_answer'].tolist()
    
    user_id_list = [i for i in range(2,27)]
    # get users' answers from csv file
    df = pd.read_csv('results-survey.csv', encoding = "ISO-8859-1")

    user_answer_dic = {}
    for user_id in user_id_list:
        user_answer_dic[user_id] = []

    for index, row in df.iterrows():
    #     row[0] -> user_id, row[1] -> input_ifeld, row[3] -> input_text)
        for user_id in user_id_list:
            if str(row[0]) == str(user_id) and str(task_number) in row[1]:
                user_answer_dic[user_id].append(row[3])
#     print(user_answer_dic)

#     for user_id in user_id_list:
#         print(len(user_answer_dic[user_id]))
        
    user_answer_levenshtein_dic = {}
    for user_id in user_id_list:
        user_answer_levenshtein_dic[user_id] = []

    for user_id in user_answer_dic.keys():
        for answer,right_answer in zip(user_answer_dic[user_id],right_answer_list):
            distance = levenshtein(answer,right_answer)
#             print(user_id,answer,right_answer,distance)
            user_answer_levenshtein_dic[user_id].append(distance)
    df2 = pd.DataFrame.from_dict(user_answer_levenshtein_dic, orient='index')
#     change column names
    df2.columns = right_answer_list
    levenshtein_result = 'levenshtein_result_' + str(task_number) + '.csv'
    df2.to_csv(levenshtein_result,sep=',', encoding='ISO-8859-1')
    
def createFilterAndDistanceWithTaskNumber(task_number):
    # get right answer list
    right_answers_file = 'right_answers' + str(task_number) + '.csv'
    df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
    right_answer_list = df['right_answer'].tolist()
    
    filter_and_word_file = 'filter_and_word_' + str(task_number) + '.csv'
    df = pd.read_csv(filter_and_word_file, encoding = "ISO-8859-1")
    
    user_id_list = [i for i in range(2,27)]
    filter_and_distance_dic = {}
    for user_id in user_id_list:
        filter_and_distance_dic[user_id] = []
    
    for index, row in df.iterrows():
        for right_answer in right_answer_list:
            temp_list = row[right_answer].split('%')
            user_answer = temp_list[1]
            filter_type = temp_list[0]
            distance = levenshtein(user_answer,right_answer)
            filter_and_distance = filter_type + '%' + str(distance)
            filter_and_distance_dic[index + 2].append(filter_and_distance)
    
    df2 = pd.DataFrame.from_dict(filter_and_distance_dic, orient='index')
    df2.columns = right_answer_list
    filter_and_distance = 'filter_and_distance_' + str(task_number) + '.csv'
    df2.to_csv(filter_and_distance,sep=',', encoding='ISO-8859-1')
    
def reformatDataWithTaskNumberAndTasktype(task_number,task_type):
    # get right answer list
    right_answers_file = 'right_answers' + str(task_number) + '.csv'
    df = pd.read_csv(right_answers_file, encoding = "ISO-8859-1")
    right_answer_list = df['right_answer'].tolist()
    
    user_id_list = [i for i in range(2,27)]
    task_type_dic = {}
    for user_id in user_id_list:
        task_type_dic[user_id] = []
        
    filter_list = ['colorhalftone','crystallize','blur','pixelation','plaintext','asterisk']
    new_column_names = []
    for the_filter in filter_list:
        new_column_names.append(the_filter + '_wordtype1_word1')
        new_column_names.append(the_filter + '_wordtype1_word2')
        new_column_names.append(the_filter + '_wordtype2_word1')
        new_column_names.append(the_filter + '_wordtype2_word2')
        new_column_names.append(the_filter + '_wordtype3_word1')
        new_column_names.append(the_filter + '_wordtype3_word2')
    
    in_file = 'filter_and_' + task_type + '_' +  str(task_number) + '.csv'
    df = pd.read_csv(in_file, encoding = "ISO-8859-1")
    
    for the_filter in filter_list:
        for index, row in df.iterrows():
            for right_answer in right_answer_list:
                temp = row[right_answer]
                if the_filter in temp:
                    task_type_temp = temp.split('%')[1]
                    task_type_dic[index + 2].append(task_type_temp)
    
    df2 = pd.DataFrame.from_dict(task_type_dic, orient='index')
    df2.columns = new_column_names
    
#     for the_filter in filter_list:
#         df2 = df2.drop([the_filter + '_wordtype1_word1'], 1)
#         df2 = df2.drop([the_filter + '_wordtype1_word1'], 1)
#         df2 = df2.drop([the_filter + '_wordtype2_word1'], 1)
#         df2 = df2.drop([the_filter + '_wordtype2_word1'], 1)
#         df2 = df2.drop([the_filter + '_wordtype3_word1'], 1)
#         df2 = df2.drop([the_filter + '_wordtype3_word1'], 1)
        
    out_file = task_type + '_' + str(task_number) + '.csv'
    df2.to_csv(out_file,sep=',', encoding='ISO-8859-1')
    
    
def processSecurityAuswertung():
    # get right answer list
    df = pd.read_csv('security_right_answer.csv', encoding = "ISO-8859-1")
    right_answer_list = df['Wordlist'].tolist()
    
    user_id_list = [i for i in range(2,27)]
    # get users' answers from csv file
    df = pd.read_csv('Security_Auswertung.csv', encoding = "ISO-8859-1")
    
    user_type_list = ['A','B','C','D','E','F']
    filter_dic = {'A':[],'B':[],'C':[],'D':[],'E':[],'F':[]}
    for i in range(0,6):
        filter_dic['A'].append('colorhalftone')
    for i in range(0,6):
        filter_dic['A'].append('crystallize')
    for i in range(0,6):
        filter_dic['A'].append('asteriks')
    for i in range(0,6):
        filter_dic['A'].append('gauss')
    for i in range(0,6):
        filter_dic['A'].append('plain')
    for i in range(0,6):
        filter_dic['A'].append('mosaik')
    for i in range(0,6):
        filter_dic['B'].append('crystallize')
    for i in range(0,6):
        filter_dic['B'].append('gauss')
    for i in range(0,6):
        filter_dic['B'].append('colorhalftone')
    for i in range(0,6):
        filter_dic['B'].append('mosaik')
    for i in range(0,6):
        filter_dic['B'].append('asteriks')
    for i in range(0,6):
        filter_dic['B'].append('plain')
    for i in range(0,6):
        filter_dic['C'].append('gauss')
    for i in range(0,6):
        filter_dic['C'].append('mosaik')
    for i in range(0,6):
        filter_dic['C'].append('crystallize')
    for i in range(0,6):
        filter_dic['C'].append('plain')
    for i in range(0,6):
        filter_dic['C'].append('colorhalftone')
    for i in range(0,6):
        filter_dic['C'].append('asteriks')
    for i in range(0,6):
        filter_dic['D'].append('mosaik')
    for i in range(0,6):
        filter_dic['D'].append('plain')
    for i in range(0,6):
        filter_dic['D'].append('gauss')
    for i in range(0,6):
        filter_dic['D'].append('asteriks')
    for i in range(0,6):
        filter_dic['D'].append('crystallize')
    for i in range(0,6):
        filter_dic['D'].append('colorhalftone')
    for i in range(0,6):
        filter_dic['E'].append('plain')
    for i in range(0,6):
        filter_dic['E'].append('asteriks')
    for i in range(0,6):
        filter_dic['E'].append('mosaik')
    for i in range(0,6):
        filter_dic['E'].append('colorhalftone')
    for i in range(0,6):
        filter_dic['E'].append('gauss')
    for i in range(0,6):
        filter_dic['E'].append('crystallize')
    for i in range(0,6):
        filter_dic['F'].append('asteriks')
    for i in range(0,6):
        filter_dic['F'].append('colorhalftone')
    for i in range(0,6):
        filter_dic['F'].append('plain')
    for i in range(0,6):
        filter_dic['F'].append('crystallize')
    for i in range(0,6):
        filter_dic['F'].append('mosaik')
    for i in range(0,6):
        filter_dic['F'].append('gauss')

    filter_list = ['colorhalftone','crystallize','gauss','mosaik','plain','asteriks']
    new_column_names = []
    for the_filter in filter_list:
        new_column_names.append(the_filter + '_wordtype1_word1')
        new_column_names.append(the_filter + '_wordtype1_word2')
        new_column_names.append(the_filter + '_wordtype2_word1')
        new_column_names.append(the_filter + '_wordtype2_word2')
        new_column_names.append(the_filter + '_wordtype3_word1')
        new_column_names.append(the_filter + '_wordtype3_word2')
    
    distance_dic = {}
    new_distance_dic = {}
    for user_id in user_id_list:
        distance_dic[user_id] = []
        new_distance_dic[user_id] = []

    i = 0
    for index, row in df.iterrows():
    #     row[1] -> user_id, row[2] -> Group, row[3] -> Wordlist, row[4] -> Answers
        for user_id in user_id_list:
            if str(row[1]) == str(user_id):
                
#                 print(user_id,type(row[3]),type(row[4]))
                if (type(row[4]) == str):
                    distance = str(levenshtein(row[3],row[4]))
                elif (type(row[4]) == float):
                    distance = str(levenshtein(row[3],''))
#                 print(row[2],i,filter_dic[row[2]][i])
                distance_dic[user_id].append(filter_dic[row[2]][i] + '$' + distance)
                i = (i + 1) % 36
    
    df2 = pd.DataFrame.from_dict(distance_dic, orient='index')
#     change column names
    df2.columns = right_answer_list
    df2.to_csv('security_result_temp.csv',sep=',', encoding='ISO-8859-1')


    for the_filter in filter_list:
        for index, row in df2.iterrows():
            for right_answer in right_answer_list:
                temp = row[right_answer]
                if the_filter in temp:
                    task_type_temp = temp.split('$')[1]
#                     print(index)
#                     print(the_filter,index,temp)
                    new_distance_dic[index].append(task_type_temp)

    df3 = pd.DataFrame.from_dict(new_distance_dic, orient='index')
#     change column names
    df3.columns = new_column_names
    df3.to_csv('security_result.csv',sep=',', encoding='ISO-8859-1')
    

# levenshteinWithTaskNumber(1)
# levenshteinWithTaskNumber(2)
# timeWithTaskNumber(1)
# timeWithTaskNumber(2)
# createFilterAndDistanceWithTaskNumber(1)
# createFilterAndDistanceWithTaskNumber(2)
# reformatDataWithTaskNumberAndTasktype(1,'time')
# reformatDataWithTaskNumberAndTasktype(2,'time')
# reformatDataWithTaskNumberAndTasktype(1,'distance')
# reformatDataWithTaskNumberAndTasktype(2,'distance')
processSecurityAuswertung()