Q034 - Quanti sono i tirocinanti che lavorano alla gestione del laboratorio?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
# For each subquestion, plot the data
subquestions = ["D34[SQ001]","D34[SQ002]"]
subquestions_value = ["Tirocinanti non retribuiti","Tirocinanti retribuiti"]

In [5]:
%%capture output

# Save the output as a variable that can be saved to a file
space = {}
for k,i in enumerate(subquestions):
    space[k] = data[i].value_counts(dropna=False)
    print "Data:"
    print space[k]
    print ""
    print "Data %:"
    print data[i].value_counts(normalize=True, dropna=False) * 100
    print ""
    print "Data: statistics:"
    print data[i].describe()

In [6]:
# Save+show the output to a text file
%save Q034-Tirocinanti.py str(output)
shutil.move("Q034-Tirocinanti.py", "text/Q034-Tirocinanti.txt")


The following commands were written to file `Q034-Tirocinanti.py`:
Data:
NaN    47
 0      9
 1      7
 3      3
 2      3
 10     1
dtype: int64

Data %:
NaN    67.142857
 0     12.857143
 1     10.000000
 3      4.285714
 2      4.285714
 10     1.428571
dtype: float64

Data: statistics:
count    23.000000
mean      1.391304
std       2.147708
min       0.000000
25%       0.000000
50%       1.000000
75%       2.000000
max      10.000000
Name: D34[SQ001], dtype: float64
Data:
NaN    55
 0     12
 1      3
dtype: int64

Data %:
NaN    78.571429
 0     17.142857
 1      4.285714
dtype: float64

Data: statistics:
count    15.000000
mean      0.200000
std       0.414039
min       0.000000
25%       0.000000
50%       0.000000
75%       0.000000
max       1.000000
Name: D34[SQ002], dtype: float64


In [7]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
    old_dict = space[i].to_dict()
    new_dict = {}
    for k in old_dict:
        if isinstance(k, numpy.float64) and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        elif type(k) is float and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        else:
            new_dict[k] = old_dict[k]

    gradou = pd.Series(new_dict)
    space2[i] = gradou.order()

In [8]:
for k,i in enumerate(space2):
    # Plot the data 01
    plt.figure(figsize=(8,6))
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    plt.title(u"Quanti sono i tirocinanti che lavorano alla gestione del laboratorio?", fontsize=18, y=1.02)
    my_colors = seaborn.color_palette("husl", len(space2[i])) # Set color palette
    space2[i].plot(kind="bar",color=my_colors)
    plt.savefig(u"svg/Q034-"+subquestions_value[k]+"01.svg")
    plt.savefig(u"png/Q034-"+subquestions_value[k]+"01.png")
    plt.savefig(u"pdf/Q034-"+subquestions_value[k]+"01.pdf")



In [9]:
# Plot the data 02
# Plot the data 02
for k,i in enumerate(space2):
    # Reorder value_counts by index natural order
    space1 = space2[i].sort_index()

    plt.figure(figsize=(8,6))
    plt.title(u"Quanti sono i tirocinanti che lavorano alla gestione del laboratorio? "+subquestions_value[k], fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)

    # Plot the data
    my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
    space1.plot(kind='bar',color=my_colors)
    plt.savefig(u"svg/Q034-"+subquestions_value[k]+"02.svg")
    plt.savefig(u"png/Q034-"+subquestions_value[k]+"02.png")
    plt.savefig(u"pdf/Q034-"+subquestions_value[k]+"02.pdf")



In [10]:
for k,i in enumerate(space2):
    # Check histogram
    plt.figure(figsize=(8,6))
    plt.title(u"Quanti sono i tirocinanti che lavorano alla gestione del laboratorio? "+subquestions_value[k], fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    space2[i].hist(bins=60)
    plt.savefig(u"svg/Q034-"+subquestions_value[k]+"03.svg")
    plt.savefig(u"png/Q034-"+subquestions_value[k]+"03.png")
    plt.savefig(u"pdf/Q034-"+subquestions_value[k]+"03.pdf")



In [10]: