Q038 - Come è stato suddiviso il budget di partenza?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
# For each subquestion, plot the data
subquestions = ["D38[SQ001]","D38[SQ002]","D38[SQ003]","D38[SQ004]","D38[SQ005]","D38[SQ006]","D38[SQ007]"]
subquestions_value = [u"Stipendi",  
                      u"Spazi", 
                      u"Materiali di consumo",
                      u"Macchine e strumenti",
                      u"Trasporti",
                      u"Comunicazione",
                      u"Consulenze"]

In [5]:
%%capture output

# Save the output as a variable that can be saved to a file
space = {}
for k,i in enumerate(subquestions):
    space[k] = data[i].value_counts(dropna=False)
    print ""
    print "Data:",subquestions_value[k]
    print space[k]
    print ""
    print "Data %:"
    print data[i].value_counts(normalize=True, dropna=False) * 100
    print ""
    print "Data: statistics:"
    print data[i].describe()

In [6]:
# Save+show the output to a text file
%save Q038-Budget.py str(output)
shutil.move("Q038-Budget.py", "text/Q038-Budget.txt")


The following commands were written to file `Q038-Budget.py`:

Data: Stipendi
NaN    48
 0     13
 30     5
 10     2
 70     1
 12     1
dtype: int64

Data %:
NaN    68.571429
 0     18.571429
 30     7.142857
 10     2.857143
 70     1.428571
 12     1.428571
dtype: float64

Data: statistics:
count    22.000000
mean     11.454545
std      18.070569
min       0.000000
25%       0.000000
50%       0.000000
75%      25.500000
max      70.000000
Name: D38[SQ001], dtype: float64

Data: Spazi
NaN    36
 0      8
 30     5
 50     5
 60     2
 25     2
 20     2
 15     2
 10     2
 80     1
 12     1
 75     1
 40     1
 70     1
 5      1
dtype: int64

Data %:
NaN    51.428571
 0     11.428571
 30     7.142857
 50     7.142857
 60     2.857143
 25     2.857143
 20     2.857143
 15     2.857143
 10     2.857143
 80     1.428571
 12     1.428571
 75     1.428571
 40     1.428571
 70     1.428571
 5      1.428571
dtype: float64

Data: statistics:
count    34.000000
mean     27.705882
std      24.305542
min       0.000000
25%       6.250000
50%      25.000000
75%      50.000000
max      80.000000
Name: D38[SQ002], dtype: float64

Data: Materiali di consumo
NaN     31
 10     15
 20      7
 5       5
 15      4
 30      2
 100     2
 60      1
 40      1
 3       1
 2       1
dtype: int64

Data %:
NaN     44.285714
 10     21.428571
 20     10.000000
 5       7.142857
 15      5.714286
 30      2.857143
 100     2.857143
 60      1.428571
 40      1.428571
 3       1.428571
 2       1.428571
dtype: float64

Data: statistics:
count     39.000000
mean      18.974359
std       21.943093
min        2.000000
25%       10.000000
50%       10.000000
75%       20.000000
max      100.000000
Name: D38[SQ003], dtype: float64

Data: Macchine e strumenti
NaN     28
 60      5
 70      5
 30      4
 50      3
 45      3
 25      3
 75      3
 40      3
 90      3
 35      3
 20      2
 15      1
 18      1
 100     1
 0       1
 12      1
dtype: int64

Data %:
NaN     40.000000
 60      7.142857
 70      7.142857
 30      5.714286
 50      4.285714
 45      4.285714
 25      4.285714
 75      4.285714
 40      4.285714
 90      4.285714
 35      4.285714
 20      2.857143
 15      1.428571
 18      1.428571
 100     1.428571
 0       1.428571
 12      1.428571
dtype: float64

Data: statistics:
count     42.000000
mean      48.452381
std       24.240867
min        0.000000
25%       30.000000
50%       45.000000
75%       70.000000
max      100.000000
Name: D38[SQ004], dtype: float64

Data: Trasporti
NaN    53
 0      7
 5      5
 45     1
 10     1
 20     1
 3      1
 2      1
dtype: int64

Data %:
NaN    75.714286
 0     10.000000
 5      7.142857
 45     1.428571
 10     1.428571
 20     1.428571
 3      1.428571
 2      1.428571
dtype: float64

Data: statistics:
count    17.000000
mean      6.176471
std      11.220714
min       0.000000
25%       0.000000
50%       3.000000
75%       5.000000
max      45.000000
Name: D38[SQ005], dtype: float64

Data: Comunicazione
NaN    44
 5     10
 10     6
 0      3
 20     2
 3      2
 2      2
 80     1
dtype: int64

Data %:
NaN    62.857143
 5     14.285714
 10     8.571429
 0      4.285714
 20     2.857143
 3      2.857143
 2      2.857143
 80     1.428571
dtype: float64

Data: statistics:
count    26.000000
mean      9.230769
std      15.310931
min       0.000000
25%       3.500000
50%       5.000000
75%      10.000000
max      80.000000
Name: D38[SQ006], dtype: float64

Data: Consulenze
NaN     47
 5      10
 0       6
 10      3
 100     1
 20      1
 50      1
 1       1
dtype: int64

Data %:
NaN     67.142857
 5      14.285714
 0       8.571429
 10      4.285714
 100     1.428571
 20      1.428571
 50      1.428571
 1       1.428571
dtype: float64

Data: statistics:
count     23.000000
mean      10.913043
std       22.065838
min        0.000000
25%        0.500000
50%        5.000000
75%        7.500000
max      100.000000
Name: D38[SQ007], dtype: float64


In [17]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
    old_dict = space[i].to_dict()
    new_dict = {}
    for k in old_dict:
        if isinstance(k, numpy.float64) and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        elif type(k) is float and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        else:
            new_dict[k] = old_dict[k]

    gradou = pd.Series(new_dict)
    space2[i] = gradou.order()


7 7

In [8]:
for k,i in enumerate(space2):
    # Plot the data 01
    plt.figure(figsize=(8,6))
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    plt.title(u"Come è stato suddiviso il budget di partenza? %", fontsize=18, y=1.02)
    my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
    space2[i].plot(kind="bar",color=my_colors)
    plt.savefig(u"svg/Q038-"+subquestions_value[k]+"01.svg")
    plt.savefig(u"png/Q038-"+subquestions_value[k]+"01.png")
    plt.savefig(u"pdf/Q038-"+subquestions_value[k]+"01.pdf")



In [9]:
# Plot the data 02
for k,i in enumerate(space2):
    # Reorder value_counts by index natural order
    space1 = space2[i].sort_index()

    plt.figure(figsize=(8,6))
    plt.title(u"Come è stato suddiviso il budget di partenza? %", fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)

    # Plot the data
    my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
    space1.plot(kind='bar',color=my_colors)
    plt.savefig(u"svg/Q038-"+subquestions_value[k]+"02.svg")
    plt.savefig(u"png/Q038-"+subquestions_value[k]+"02.png")
    plt.savefig(u"pdf/Q038-"+subquestions_value[k]+"02.pdf")



In [10]:
for k,i in enumerate(space2):
    # Check histogram
    plt.figure(figsize=(8,6))
    plt.title(u"Come è stato suddiviso il budget di partenza? % "+subquestions_value[k], fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    space2[i].hist(bins=60)
    plt.savefig(u"svg/Q038-"+subquestions_value[k]+"03.svg")
    plt.savefig(u"png/Q038-"+subquestions_value[k]+"03.png")
    plt.savefig(u"pdf/Q038-"+subquestions_value[k]+"03.pdf")



In [10]: