In [1]:
# -*- coding: UTF-8 -*-
import pandas as pd
import numpy as np
In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")
In [3]:
# Check data
#data[0:4] # Equals to data.head()
In [4]:
# Data to be exported with the name:
data_full_codes = [ "D1",
"D4[SQ001]","D4[SQ002]",
"D8", "D9", "D12", "D13", "D15",
"D16[SQ001]","D16[SQ002]","D16[SQ004]","D16[SQ012]","D16[SQ013]","D16[SQ003]","D16[SQ005]",
"D16[SQ006]","D16[SQ007]","D16[SQ008]","D16[SQ009]","D16[SQ010]","D16[SQ011]","D16[other]",
"D17", "D18", "D19"]
# Data to be exported in an anonymized version:
data_anonymized_codes = ["D2[SQ001]","D2[SQ002]","D2[SQ003]","D2[SQ004]","D2[SQ005]","D2[SQ006]","D2[SQ007]","D2[SQ008]",
"D2[SQ009]","D2[SQ010]","D2[SQ011]","D2[other]",
"D3","D3[other]",
"D5",
"D6","D6[other]",
"D7","D7[other]",
"D10[SQ001]","D10[SQ002]","D10[SQ003]","D10[SQ004]","D10[SQ005]","D10[SQ006]","D10[SQ007]",
"D10[SQ008]","D10[other]",
"D11[SQ001]","D11[SQ002]","D11[SQ003]","D11[SQ004]","D11[SQ005]","D11[other]",
"D14[SQ001]","D14[SQ002]","D14[SQ004]","D14[SQ010]","D14[SQ011]","D14[SQ003]","D14[SQ005]",
"D14[SQ006]","D14[SQ007]","D14[SQ008]","D14[SQ009]","D14[SQ012]","D14[other]",
"D20[SQ001]",
"D21[SQ001]","D21[SQ002]","D21[SQ003]","D21[SQ004]","D21[SQ005]","D21[SQ006]","D21[SQ007]",
"D21[SQ008]","D21[other]",
"D22", "D23", "D24", "D25",
"D26[SQ001]","D26[SQ002]","D26[SQ003]","D26[SQ004]","D26[SQ005]","D26[SQ006]","D26[SQ007]",
"D26[SQ008]","D26[other]",
"D27[SQ001_SQ001]","D27[SQ001_SQ002]","D27[SQ001_SQ003]","D27[SQ001_SQ004]","D27[SQ001_SQ005]",
"D27[SQ001_SQ006]","D27[SQ001_SQ007]","D27[SQ002_SQ001]","D27[SQ002_SQ002]","D27[SQ002_SQ003]",
"D27[SQ002_SQ004]","D27[SQ002_SQ005]","D27[SQ002_SQ006]","D27[SQ002_SQ007]",
"D28[SQ001]","D28[SQ002]","D28[SQ003]","D28[SQ004]","D28[SQ005]","D28[SQ006]","D28[SQ007]",
"D28[SQ008]","D28[SQ009]","D28[SQ010]","D28[SQ011]","D28[SQ012]","D28[SQ013]","D28[SQ016]",
"D28[SQ014]","D28[SQ015]",
"D29[SQ001_SQ002]","D29[SQ001_SQ001]","D29[SQ002_SQ002]","D29[SQ002_SQ001]","D29[SQ003_SQ002]",
"D29[SQ003_SQ001]","D29[SQ004_SQ002]","D29[SQ004_SQ001]","D29[SQ005_SQ002]","D29[SQ005_SQ001]",
"D29[SQ006_SQ002]","D29[SQ006_SQ001]","D29[SQ007_SQ002]","D29[SQ007_SQ001]","D29[SQ008_SQ002]",
"D29[SQ008_SQ001]","D29[SQ009_SQ002]","D29[SQ009_SQ001]","D29[SQ010_SQ002]","D29[SQ010_SQ001]",
"D30[SQ002]","D30[SQ003]","D30[SQ004]","D30[SQ005]","D30[SQ006]","D30[SQ007]","D30[SQ008]",
"D30[SQ009]","D30[SQ010]","D30[SQ011]","D30[SQ012]","D30[SQ013]","D30[SQ014]",
"D31[SQ001_SQ001]","D31[SQ001_SQ002]","D31[SQ002_SQ001]","D31[SQ002_SQ002]","D31[SQ003_SQ001]",
"D31[SQ003_SQ002]","D31[SQ004_SQ001]","D31[SQ004_SQ002]","D31[SQ005_SQ001]","D31[SQ005_SQ002]",
"D31[SQ006_SQ001]","D31[SQ006_SQ002]","D31[SQ007_SQ001]","D31[SQ007_SQ002]","D31[SQ008_SQ001]",
"D31[SQ008_SQ002]","D31[SQ009_SQ001]","D31[SQ009_SQ002]","D31[SQ010_SQ001]","D31[SQ010_SQ002]",
"D32[SQ001]","D32[SQ002]","D32[SQ003]","D32[SQ004]","D32[SQ005]",
"D33",
"D34[SQ001]","D34[SQ002]",
"D35", "D36", "D37",
"D38[SQ001]","D38[SQ002]","D38[SQ003]","D38[SQ004]","D38[SQ005]","D38[SQ006]","D38[SQ007]",
"D39[SQ001]","D39[SQ002]","D39[SQ003]","D39[SQ004]",
"D40[SQ001]","D40[SQ002]","D40[SQ003]","D40[SQ004]","D40[SQ005]","D40[SQ006]","D40[SQ007]",
"D41[SQ001]","D41[SQ002]","D41[SQ003]","D41[SQ004]","D41[SQ005]","D41[SQ006]","D41[SQ007]",
"D41[SQ008]","D41[SQ009]","D41[SQ010]",
]
# Load data from these selections
data_full = data[data_full_codes]
data_anonymized = data[data_anonymized_codes]
data_business_models = data["D42"]
In [5]:
# Shuffle the anonymized data in order to change the order of the rows
# Learnt here: http://stackoverflow.com/a/15772330/2237113
sorted_data_anonymized = data_anonymized.reindex(np.random.permutation(data_anonymized.index))
sorted_data_business_models = data_business_models.reindex(np.random.permutation(data_business_models.index))
In [6]:
# Rename the index, for more anonymization... for all the anonymized data except business models data
new_index = {}
for k,i in enumerate(sorted_data_anonymized.index):
new_index[i] = k
sorted_data_anonymized_final = sorted_data_anonymized.rename(index=new_index)
In [7]:
# Rename the index, for more anonymization... for the business models data
new_index = {}
for k,i in enumerate(sorted_data_business_models.index):
new_index[i] = k
sorted_data_business_models_final = sorted_data_business_models.rename(index=new_index)
In [8]:
# Debug
#data_full
In [9]:
# Debug
#data_anonymized
In [10]:
# Export datasets
data_full.to_csv('data/italian-labs_final_data_with_names.csv', encoding='utf-8')
sorted_data_anonymized_final.to_csv('data/italian-labs_final_data_anonymized.csv', encoding='utf-8')
sorted_data_business_models_final.to_csv('data/italian-labs_final_data_business_models_anonymized.csv', encoding='utf-8')