Q033 - Quanti sono i volontari che lavorano alla gestione del laboratorio?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
%%capture output

# Save the output as a variable that can be saved to a file
# Get the distribution of data
space = data["D33"].value_counts(dropna=False)
print "Data:"
print space
print ""
print "Data %:"
print data["D33"].value_counts(normalize=True, dropna=False) * 100
print ""
print "Data: statistics:"
print data["D33"].describe()

In [5]:
# Save+show the output to a text file
%save Q033-NumeroVolontari.py str(output)
shutil.move("Q033-NumeroVolontari.py", "text/Q033-NumeroVolontari.txt")


The following commands were written to file `Q033-NumeroVolontari.py`:
Data:
NaN     12
 10      9
 4       9
 5       8
 2       8
 6       5
 3       5
 20      3
 1       3
 8       2
 25      1
 15      1
 13      1
 12      1
 11      1
 130     1
dtype: int64

Data %:
NaN     17.142857
 10     12.857143
 4      12.857143
 5      11.428571
 2      11.428571
 6       7.142857
 3       7.142857
 20      4.285714
 1       4.285714
 8       2.857143
 25      1.428571
 15      1.428571
 13      1.428571
 12      1.428571
 11      1.428571
 130     1.428571
dtype: float64

Data: statistics:
count     58.000000
mean       8.827586
std       17.012006
min        1.000000
25%        3.000000
50%        5.000000
75%       10.000000
max      130.000000
Name: D33, dtype: float64


In [8]:
# Swap nan for a more understandable word
old_dict = space.to_dict()
new_dict = {}
for i in old_dict:
    if isinstance(i, numpy.float64) and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    elif type(i) is float and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    else:
        new_dict[i] = old_dict[i]

spaceu = pd.Series(new_dict)
space = spaceu.order()

In [9]:
# Plot the data 01
plt.figure(figsize=(8,6))
plt.xlabel(u'Numero volontari', fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"Quanti sono i volontari che lavorano alla gestione del laboratorio?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
space.plot(kind="bar",color=my_colors)
plt.savefig(u"svg/Q033-NumeroVolontari01.svg")
plt.savefig(u"png/Q033-NumeroVolontari01.png")
plt.savefig(u"pdf/Q033-NumeroVolontari01.pdf")



In [10]:
# Plot the data 02

# Reorder value_counts by index (age) natural order
space1 = space.sort_index()

plt.figure(figsize=(8,6))
plt.title(u'Quanti sono i volontari che lavorano alla gestione del laboratorio?', fontsize=18, y=1.02)
plt.xlabel(u'Numero volontari', fontsize=16)
plt.ylabel('Lab', fontsize=16)

# Plot the data
my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
space1.plot(kind='bar',color=my_colors)
plt.savefig(u"svg/Q033-NumeroVolontari02.svg")
plt.savefig(u"png/Q033-NumeroVolontari02.png")
plt.savefig(u"pdf/Q033-NumeroVolontari02.pdf")



In [11]:
# Check histogram
plt.figure(figsize=(8,6))
plt.title(u'Quanti sono i volontari che lavorano alla gestione del laboratorio?', fontsize=18, y=1.02)
plt.xlabel(u'Numero volontari', fontsize=16)
plt.ylabel('Lab', fontsize=16)
space.hist(bins=60)
plt.savefig(u"svg/Q033-NumeroVolontari02.svg")
plt.savefig(u"png/Q033-NumeroVolontari02.png")
plt.savefig(u"pdf/Q033-NumeroVolontari02.pdf")



In [ ]: