000 - Export anonymized data


In [1]:
# -*- coding: UTF-8 -*-

import pandas as pd
import numpy as np

In [2]:
# Load csv file first
data = pd.read_csv("data/results-makers-40.csv", encoding="utf-8")

In [3]:
# Check data
#data # Equals to data.head()

In [4]:
# Get list of columns
columns = list(data.columns.values)

In [5]:
# Delete element we do not want to export
del_columns = [u'Unnamed: 0',
               u'id',
               u'submitdate', 
               u'lastpage', 
               u'startlanguage', 
               u'startdate', 
               u'datestamp', 
               u'ipaddr',
               u'Q002'
               ]
for i in del_columns:
    del columns[columns.index(i)]

# Debug
#print columns

In [6]:
# Shuffle the data in order to change the order of the rows
# Learnt here: http://stackoverflow.com/a/15772330/2237113
data_export = data[columns]
sorted_data_export = data_export.reindex(np.random.permutation(data_export.index))

# Debug
#sorted_data_export

In [7]:
# Rename the index, for more anonymization... for all the anonymized data except business models data
new_index = {}
for k,i in enumerate(sorted_data_export.index):
    new_index[i] = k
sorted_data_anonymized_final = sorted_data_export.rename(index=new_index)

#Debug
#sorted_data_anonymized_final

In [8]:
# Export datasets
sorted_data_anonymized_final.to_csv('data/makersinquiry-italy-2014.csv', encoding='utf-8')