In [1]:
# -*- coding: UTF-8 -*-
# Render our plots inline
%matplotlib inline
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs
# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html
# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})
In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")
In [3]:
# Check data
#data[0:4] # Equals to data.head()
In [4]:
# Range: D14[SQ001] - D14[SQ012] - D14[other]
initiative_columns = ["D14[SQ001]","D14[SQ002]","D14[SQ003]","D14[SQ004]",
"D14[SQ005]","D14[SQ006]","D14[SQ007]","D14[SQ008]",
"D14[SQ009]","D14[SQ010]","D14[SQ011]","D14[SQ012]"]
initiative_options = ['Singolo individuo privato',
'Gruppo di individui privati',
'Scuola primaria (scuola elementare)',
'Scuola secondaria di primo grado (scuola media)',
'Scuola secondaria di secondo grado (scuola superiore)',
u'Universitá',
'Museo',
'Centro di ricerca',
"Incubatore o acceleratore d'impresa",
'Coworking',
'Impresa privata',
'Fondazione']
initiative = data[initiative_columns]
initiative.replace(u'Sì', 'Si', inplace=True) # Get rid of accented characters
initiative_other = data['D14[other]'].str.lower().value_counts()
In [5]:
#places[0:4]
In [6]:
# Create all the possible combinations from the main options
# See http://stackoverflow.com/questions/17176887/python-get-all-permutation-of-a-list-w-o-repetitions
import itertools
all_combinations = {}
all_combinations_columns = []
for i in range(1, len(initiative_columns)+1):
comb = list(itertools.combinations(initiative_columns, i))
for k in comb:
# Each combination
all_combinations[k] = {}
all_combinations[k]["col_list"] = list(k)
# Build the string and boolean list of each combination
comb_list = []
comb_bool_list = []
# Put default False value
for l in initiative_columns:
comb_bool_list.append(False)
for j in k:
pos = initiative_columns.index(j) # Get position
comb_list.append(initiative_options[pos])
comb_bool_list[pos] = True
all_combinations[k]["list"] = comb_list
all_combinations[k]["bool_list"] = comb_bool_list
all_combinations[k]["str"] = ", ".join(comb_list)
In [7]:
# Check which combinations correspond each row
str_values = []
for i in initiative.index:
current_bool_list = list(initiative.ix[i].isin(["Si"]))
for i in all_combinations:
if current_bool_list == all_combinations[i]["bool_list"]:
str_values.append(all_combinations[i]["str"])
In [8]:
# Add combination column
initiative["Combination"] = pd.Series(str_values)
In [9]:
# Gather data
resulting_combinations = initiative["Combination"].value_counts()
resulting_combinations_percentage = initiative["Combination"].value_counts(normalize=True)*100
In [10]:
# Plotting the first 10 values of the most popular combinations
resulting_combinations[0:10].plot(kind='bar',figsize=(20,10),rot=90)
plt.title(u"Di chi è stata l’iniziativa che ha portato all’apertura del laboratorio? Combinazioni",fontsize=18, y=1.02)
plt.ylabel("Lab",fontsize=16)
plt.xlabel("Combinazioni",fontsize=16)
plt.savefig("svg/Q014-Combinazioni.svg")
plt.savefig("png/Q014-Combinazioni.png")
plt.savefig("pdf/Q014-Combinazioni.pdf")
In [11]:
%%capture output
# Save the output as a variable that can be saved to a file
# Data of the combinations
print "Data:"
print resulting_combinations
print
# Data of the combinations: percentage
print "Data %:"
print resulting_combinations_percentage
In [12]:
# Save+show the output to a text file
%save Q014-Combinazioni.py str(output)
shutil.move("Q014-Combinazioni.py", "text/Q014-Combinazioni.txt")