Q020 - In che percentuale la gestione del laboratorio è dipendente da organizzazioni terze?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
%%capture output

# Save the output as a variable that can be saved to a file
# Get the distribution of way of living
grado = data["D20[SQ001]"].value_counts(dropna=False)
print "Data:"
print grado
print ""
print "Data %:"
print data["D20[SQ001]"].value_counts(normalize=True,dropna=False) * 100

In [5]:
# Save+show the output to a text file
%save Q020-GradoDipendenza01.py str(output)
shutil.move("Q020-GradoDipendenza01.py", "text/Q020-GradoDipendenza01.txt")


The following commands were written to file `Q020-GradoDipendenza01.py`:
Data:
0%      39
NaN     12
10%      5
50%      4
30%      2
20%      2
60%      2
80%      1
100%     1
70%      1
40%      1
dtype: int64

Data %:
0%      55.714286
NaN     17.142857
10%      7.142857
50%      5.714286
30%      2.857143
20%      2.857143
60%      2.857143
80%      1.428571
100%     1.428571
70%      1.428571
40%      1.428571
dtype: float64


In [6]:
# Swap nan for a more understandable word
old_dict = grado.to_dict()
new_dict = {}
for i in old_dict:
    if type(i) is float and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    else:
        new_dict[i.capitalize()] = old_dict[i]

gradou = pd.Series(new_dict)
grado = gradou.order()

In [7]:
# Plot the data
plt.figure(figsize=(8,6))
plt.xlabel(u'Dipendenza da altri', fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"n che percentuale la gestione del laboratorio è dipendente da organizzazioni terze?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(grado)) # Set color palette
grado.plot(kind="bar",color=my_colors)
plt.savefig("svg/Q020-GradoDipendenza01.svg")
plt.savefig("png/Q020-GradoDipendenza01.png")
plt.savefig("pdf/Q020-GradoDipendenza01.pdf")



In [8]:
%%capture output

# Save the output as a variable that can be saved to a file
# Order of the choices
grado_order = [NaN,"0%","10%","20%","30%","40%","50%","60%","70%","80%","90%","100%"]

grado2 = grado.reindex(grado_order)

# Get the distribution of way of living, reindexed
print "Data:"
print grado2
print ""
print "Data %:"
grado2_normalized = data["D3"].value_counts(normalize=True, dropna=False) * 100
print grado2_normalized.reindex(grado_order)

In [9]:
# Save+show the output to a text file
%save Q020-GradoDipendenza02.py str(output)
shutil.move("Q020-GradoDipendenza02.py", "text/Q020-GradoDipendenza02.txt")


The following commands were written to file `Q020-GradoDipendenza02.py`:
Data:
NaN    NaN
0%      39
10%      5
20%      2
30%      2
40%      1
50%      4
60%      2
70%      1
80%      1
90%    NaN
100%     1
dtype: float64

Data %:
NaN      0
0%     NaN
10%    NaN
20%    NaN
30%    NaN
40%    NaN
50%    NaN
60%    NaN
70%    NaN
80%    NaN
90%    NaN
100%   NaN
dtype: float64


In [10]:
# Swap nan for a more understandable word
old_dict = grado.to_dict()
new_dict = {}
for i in old_dict:
    if type(i) is float and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    else:
        new_dict[i.capitalize()] = old_dict[i]

gradou = pd.Series(new_dict)
grado2 = gradou.order()

In [11]:
# Plot the data
plt.figure(figsize=(8,6))
plt.xlabel(u'Dipendenza da altri', fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"In che percentuale la gestione del laboratorio è dipendente da organizzazioni terze?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(grado2)) # Set color palette
grado2.plot(kind="bar",color=my_colors)
plt.savefig("svg/Q020-GradoDipendenza02.svg")
plt.savefig("png/Q020-GradoDipendenza02.png")
plt.savefig("pdf/Q020-GradoDipendenza02.pdf")



In [11]: