Q039 - Qual è stato il bilancio annuale del laboratorio?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
# For each subquestion, plot the data
subquestions = ["D39[SQ001]","D39[SQ002]","D39[SQ003]","D39[SQ004]"]
subquestions_value = [u"2011",  
                      u"2012", 
                      u"2013",
                      u"2014"]

In [5]:
%%capture output

# Save the output as a variable that can be saved to a file
space = {}
for k,i in enumerate(subquestions):
    space[k] = data[i].value_counts(dropna=False)
    print ""
    print "Data:",subquestions_value[k]
    print space
    print ""
    print "Data %:"
    print data[i].value_counts(normalize=True, dropna=False) * 100
    print ""
    print "Data: statistics:"
    print data[i].describe()

In [6]:
# Save+show the output to a text file
%save Q039-Bilancio.py str(output)
shutil.move("Q039-Bilancio.py", "text/Q039-Bilancio.txt")


The following commands were written to file `Q039-Bilancio.py`:

Data: 2011
{0: NaN    67
 0      3
dtype: int64}

Data %:
NaN    95.714286
 0      4.285714
dtype: float64

Data: statistics:
count    3
mean     0
std      0
min      0
25%      0
50%      0
75%      0
max      0
Name: D39[SQ001], dtype: float64

Data: 2012
{0: NaN    67
 0      3
dtype: int64, 1: NaN       62
 0         3
 12000     1
 20000     1
 2000      1
 1090      1
 5000      1
dtype: int64}

Data %:
NaN       88.571429
 0         4.285714
 12000     1.428571
 20000     1.428571
 2000      1.428571
 1090      1.428571
 5000      1.428571
dtype: float64

Data: statistics:
count        8.0000
mean      5011.2500
std       7302.5151
min          0.0000
25%          0.0000
50%       1545.0000
75%       6750.0000
max      20000.0000
Name: D39[SQ002], dtype: float64

Data: 2013
{0: NaN    67
 0      3
dtype: int64, 1: NaN       62
 0         3
 12000     1
 20000     1
 2000      1
 1090      1
 5000      1
dtype: int64, 2: NaN       57
 0         3
 15000     2
 10000     2
 14000     1
 3500      1
 1000      1
-2000      1
 50000     1
 16        1
dtype: int64}

Data %:
NaN       81.428571
 0         4.285714
 15000     2.857143
 10000     2.857143
 14000     1.428571
 3500      1.428571
 1000      1.428571
-2000      1.428571
 50000     1.428571
 16        1.428571
dtype: float64

Data: statistics:
count       13.000000
mean      8962.769231
std      13901.271633
min      -2000.000000
25%          0.000000
50%       3500.000000
75%      14000.000000
max      50000.000000
Name: D39[SQ003], dtype: float64

Data: 2014
{0: NaN    67
 0      3
dtype: int64, 1: NaN       62
 0         3
 12000     1
 20000     1
 2000      1
 1090      1
 5000      1
dtype: int64, 2: NaN       57
 0         3
 15000     2
 10000     2
 14000     1
 3500      1
 1000      1
-2000      1
 50000     1
 16        1
dtype: int64, 3: NaN       56
 0         4
 15000     2
 10000     2
 6000      1
 12        1
 300       1
 1000      1
 65000     1
 2500      1
dtype: int64}

Data %:
NaN       80.000000
 0         5.714286
 15000     2.857143
 10000     2.857143
 6000      1.428571
 12        1.428571
 300       1.428571
 1000      1.428571
 65000     1.428571
 2500      1.428571
dtype: float64

Data: statistics:
count       14.000000
mean      8915.142857
std      17109.653896
min          0.000000
25%          3.000000
50%       1750.000000
75%      10000.000000
max      65000.000000
Name: D39[SQ004], dtype: float64


In [7]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
    old_dict = space[i].to_dict()
    new_dict = {}
    for k in old_dict:
        if isinstance(k, numpy.float64) and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        elif type(k) is float and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        else:
            new_dict[k] = old_dict[k]

    gradou = pd.Series(new_dict)
    space2[i] = gradou.order()

In [8]:
for k,i in enumerate(space2):
    # Plot the data 01
    plt.figure(figsize=(8,6))
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    plt.title(u"Qual è stato il bilancio annuale del laboratorio? €", fontsize=18, y=1.02)
    my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
    space2[i].plot(kind="bar",color=my_colors)
    plt.savefig(u"svg/Q039-"+subquestions_value[k]+"01.svg")
    plt.savefig(u"png/Q039-"+subquestions_value[k]+"01.png")
    plt.savefig(u"pdf/Q039-"+subquestions_value[k]+"01.pdf")



In [9]:
# Plot the data 02
for k,i in enumerate(space2):
    # Reorder value_counts by index natural order
    space1 = space2[i].sort_index()

    plt.figure(figsize=(8,6))
    plt.title(u"Qual è stato il bilancio annuale del laboratorio? €", fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)

    # Plot the data
    my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
    space1.plot(kind='bar',color=my_colors)
    plt.savefig(u"svg/Q039-"+subquestions_value[k]+"02.svg")
    plt.savefig(u"png/Q039-"+subquestions_value[k]+"02.png")
    plt.savefig(u"pdf/Q039-"+subquestions_value[k]+"02.pdf")



In [10]:
for k,i in enumerate(space2):
    # Check histogram
    plt.figure(figsize=(8,6))
    plt.title(u"Qual è stato il bilancio annuale del laboratorio? € "+subquestions_value[k], fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    space2[i].hist(bins=60)
    plt.savefig(u"svg/Q039-"+subquestions_value[k]+"03.svg")
    plt.savefig(u"png/Q039-"+subquestions_value[k]+"03.png")
    plt.savefig(u"pdf/Q039-"+subquestions_value[k]+"03.pdf")



In [10]: