Q030 - Quali altre dotazioni avete nel laboratorio?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
# For each subquestion, plot the data
subquestions = ["D30[SQ002]","D30[SQ003]","D30[SQ004]","D30[SQ005]",
                "D30[SQ006]","D30[SQ007]","D30[SQ008]","D30[SQ009]",
                "D30[SQ010]","D30[SQ011]","D30[SQ012]","D30[SQ013]",
                "D30[SQ014]"]
subquestions_value = [u"Zona falegnameria",
                      u"Fusione di metalli", 
                      u"Lavorazioni ceramica", 
                      u"Lavorazioni vetro", 
                      u"Lavorazioni tessile", 
                      u"Fresatrice a controllo manuale", 
                      u"Tornio a controllo manuale", 
                      u"Trapano a colonna", 
                      u"Termoformatura", 
                      u"Produzione di circuiti per incisione", 
                      u"Macchina da taglio e cucito", 
                      u"Materiali per stampi in silicone e colatura resine", 
                      u"Attrezzature per Biolab"]

In [5]:
space = {}
for k,i in enumerate(subquestions):
    current_series = data[i].value_counts(dropna=False)
    old_dict = current_series.to_dict()
    new_dict = {}
    zero_value = 0.0
    nan_value = 0.0
    for i in old_dict.keys():
        if np.isnan(i):
            nan_value = old_dict[i]
        elif i == 0 or i == 0.0:
            zero_value = old_dict[i]
        else:
            new_dict[i] = old_dict[i]
    new_dict[0.0] = zero_value + nan_value
            
    gradou = pd.Series(new_dict)
    space[i] = gradou.order()

In [6]:
%%capture output

# Save the output as a variable that can be saved to a file
for k,i in enumerate(space):
    print ""
    print subquestions_value[k].encode('utf-8')
    print
    print "Data:"
    print space[i]
    print ""
    print "Data %:"
    print space[i] / space[i].sum() * 100
    print ""
    print "Data: statistics:"
    print space[i].describe()

In [7]:
# Save+show the output to a text file
%save Q030-Dotazioni.py str(output)
shutil.move("Q030-Dotazioni.py", "text/Q030-Dotazioni.txt")


The following commands were written to file `Q030-Dotazioni.py`:

Zona falegnameria

Data:
1    12
0    58
dtype: int64

Data %:
1    17.142857
0    82.857143
dtype: float64

Data: statistics:
count     2.000000
mean     35.000000
std      32.526912
min      12.000000
25%      23.500000
50%      35.000000
75%      46.500000
max      58.000000
dtype: float64

Fusione di metalli

Data:
1     2
0    68
dtype: int64

Data %:
1     2.857143
0    97.142857
dtype: float64

Data: statistics:
count     2.000000
mean     35.000000
std      46.669048
min       2.000000
25%      18.500000
50%      35.000000
75%      51.500000
max      68.000000
dtype: float64

Lavorazioni ceramica

Data:
2     4
0    33
1    33
dtype: float64

Data %:
2     5.714286
0    47.142857
1    47.142857
dtype: float64

Data: statistics:
count     3.000000
mean     23.333333
std      16.743158
min       4.000000
25%      18.500000
50%      33.000000
75%      33.000000
max      33.000000
dtype: float64

Lavorazioni vetro

Data:
2     1
3     1
4     1
1    32
0    35
dtype: float64

Data %:
2     1.428571
3     1.428571
4     1.428571
1    45.714286
0    50.000000
dtype: float64

Data: statistics:
count     5.000000
mean     14.000000
std      17.832555
min       1.000000
25%       1.000000
50%       1.000000
75%      32.000000
max      35.000000
dtype: float64

Lavorazioni tessile

Data:
2     1
8     1
1    24
0    44
dtype: int64

Data %:
2     1.428571
8     1.428571
1    34.285714
0    62.857143
dtype: float64

Data: statistics:
count     4.000000
mean     17.500000
std      20.728402
min       1.000000
25%       1.000000
50%      12.500000
75%      29.000000
max      44.000000
dtype: float64

Fresatrice a controllo manuale

Data:
20     1
0     69
dtype: int64

Data %:
20     1.428571
0     98.571429
dtype: float64

Data: statistics:
count     2.000000
mean     35.000000
std      48.083261
min       1.000000
25%      18.000000
50%      35.000000
75%      52.000000
max      69.000000
dtype: float64


In [8]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
    old_dict = space[i].to_dict()
    new_dict = {}
    for i in old_dict:
        if isinstance(i, numpy.float64) and np.isnan(i):
            new_dict["Nessuna risposta"] = old_dict[i]
        elif type(i) is float and np.isnan(i):
            new_dict["Nessuna risposta"] = old_dict[i]
        else:
            new_dict[i] = old_dict[i]

    gradou = pd.Series(new_dict)
    space2[i] = gradou.order()

In [9]:
for k,i in enumerate(space2):
    # Plot the data 01
    plt.figure(figsize=(8,6))
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    plt.title(u"Quali altre dotazioni avete nel laboratorio?", fontsize=18, y=1.02)
    my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
    space2[i].plot(kind="bar",color=my_colors)
    plt.savefig(u"svg/Q030-"+subquestions_value[k]+"01.svg")
    plt.savefig(u"png/Q030-"+subquestions_value[k]+"01.png")
    plt.savefig(u"pdf/Q030-"+subquestions_value[k]+"01.pdf")



In [10]:
# Plot the data 02
for k,i in enumerate(space2):
    # Reorder value_counts by index natural order
    space1 = space2[i].sort_index()

    plt.figure(figsize=(8,6))
    plt.title(u"Quali altre dotazioni avete nel laboratorio?", fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)

    # Plot the data
    my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
    space1.plot(kind='bar',color=my_colors)
    plt.savefig(u"svg/Q030-"+subquestions_value[k]+"02.svg")
    plt.savefig(u"png/Q030-"+subquestions_value[k]+"02.png")
    plt.savefig(u"pdf/Q030-"+subquestions_value[k]+"02.pdf")



In [11]:
for k,i in enumerate(space2):
    # Check histogram
    plt.figure(figsize=(8,6))
    plt.title(u"Quali altre dotazioni avete nel laboratorio?", fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    space2[i].hist(bins=60)
    plt.savefig(u"svg/Q030-"+subquestions_value[k]+"03.svg")
    plt.savefig(u"png/Q030-"+subquestions_value[k]+"03.png")
    plt.savefig(u"pdf/Q030-"+subquestions_value[k]+"03.pdf")



In [11]: