In [1]:
# -*- coding: UTF-8 -*-
# Render our plots inline
%matplotlib inline
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs
# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html
# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})
In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")
In [3]:
# Check data
#data[0:4] # Equals to data.head()
In [4]:
# For each subquestion, plot the data
subquestions = ["D30[SQ002]","D30[SQ003]","D30[SQ004]","D30[SQ005]",
"D30[SQ006]","D30[SQ007]","D30[SQ008]","D30[SQ009]",
"D30[SQ010]","D30[SQ011]","D30[SQ012]","D30[SQ013]",
"D30[SQ014]"]
subquestions_value = [u"Zona falegnameria",
u"Fusione di metalli",
u"Lavorazioni ceramica",
u"Lavorazioni vetro",
u"Lavorazioni tessile",
u"Fresatrice a controllo manuale",
u"Tornio a controllo manuale",
u"Trapano a colonna",
u"Termoformatura",
u"Produzione di circuiti per incisione",
u"Macchina da taglio e cucito",
u"Materiali per stampi in silicone e colatura resine",
u"Attrezzature per Biolab"]
In [5]:
space = {}
for k,i in enumerate(subquestions):
current_series = data[i].value_counts(dropna=False)
old_dict = current_series.to_dict()
new_dict = {}
zero_value = 0.0
nan_value = 0.0
for i in old_dict.keys():
if np.isnan(i):
nan_value = old_dict[i]
elif i == 0 or i == 0.0:
zero_value = old_dict[i]
else:
new_dict[i] = old_dict[i]
new_dict[0.0] = zero_value + nan_value
gradou = pd.Series(new_dict)
space[i] = gradou.order()
In [6]:
%%capture output
# Save the output as a variable that can be saved to a file
for k,i in enumerate(space):
print ""
print subquestions_value[k].encode('utf-8')
print
print "Data:"
print space[i]
print ""
print "Data %:"
print space[i] / space[i].sum() * 100
print ""
print "Data: statistics:"
print space[i].describe()
In [7]:
# Save+show the output to a text file
%save Q030-Dotazioni.py str(output)
shutil.move("Q030-Dotazioni.py", "text/Q030-Dotazioni.txt")
In [8]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
old_dict = space[i].to_dict()
new_dict = {}
for i in old_dict:
if isinstance(i, numpy.float64) and np.isnan(i):
new_dict["Nessuna risposta"] = old_dict[i]
elif type(i) is float and np.isnan(i):
new_dict["Nessuna risposta"] = old_dict[i]
else:
new_dict[i] = old_dict[i]
gradou = pd.Series(new_dict)
space2[i] = gradou.order()
In [9]:
for k,i in enumerate(space2):
# Plot the data 01
plt.figure(figsize=(8,6))
plt.xlabel(subquestions_value[k], fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"Quali altre dotazioni avete nel laboratorio?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
space2[i].plot(kind="bar",color=my_colors)
plt.savefig(u"svg/Q030-"+subquestions_value[k]+"01.svg")
plt.savefig(u"png/Q030-"+subquestions_value[k]+"01.png")
plt.savefig(u"pdf/Q030-"+subquestions_value[k]+"01.pdf")
In [10]:
# Plot the data 02
for k,i in enumerate(space2):
# Reorder value_counts by index natural order
space1 = space2[i].sort_index()
plt.figure(figsize=(8,6))
plt.title(u"Quali altre dotazioni avete nel laboratorio?", fontsize=18, y=1.02)
plt.xlabel(subquestions_value[k], fontsize=16)
plt.ylabel('Lab', fontsize=16)
# Plot the data
my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
space1.plot(kind='bar',color=my_colors)
plt.savefig(u"svg/Q030-"+subquestions_value[k]+"02.svg")
plt.savefig(u"png/Q030-"+subquestions_value[k]+"02.png")
plt.savefig(u"pdf/Q030-"+subquestions_value[k]+"02.pdf")
In [11]:
for k,i in enumerate(space2):
# Check histogram
plt.figure(figsize=(8,6))
plt.title(u"Quali altre dotazioni avete nel laboratorio?", fontsize=18, y=1.02)
plt.xlabel(subquestions_value[k], fontsize=16)
plt.ylabel('Lab', fontsize=16)
space2[i].hist(bins=60)
plt.savefig(u"svg/Q030-"+subquestions_value[k]+"03.svg")
plt.savefig(u"png/Q030-"+subquestions_value[k]+"03.png")
plt.savefig(u"pdf/Q030-"+subquestions_value[k]+"03.pdf")
In [11]: