Q038 - Come viene suddiviso il bilancio annuale?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
# For each subquestion, plot the data
subquestions = ["D40[SQ001]","D40[SQ002]","D40[SQ003]","D40[SQ004]","D40[SQ005]","D40[SQ006]","D40[SQ007]"]
subquestions_value = [u"Stipendi",  
                      u"Affitto e mantenimento sede", 
                      u"Materiali di consumo",
                      u"Macchine e strumenti",
                      u"Trasporti",
                      u"Comunicazione",
                      u"Consulenze"]

In [5]:
%%capture output

# Save the output as a variable that can be saved to a file
space = {}
for k,i in enumerate(subquestions):
    space[k] = data[i].value_counts(dropna=False)
    print ""
    print "Data:",subquestions_value[k]
    print space[k]
    print ""
    print "Data %:"
    print data[i].value_counts(normalize=True, dropna=False) * 100
    print ""
    print "Data: statistics:"
    print data[i].describe()

In [6]:
# Save+show the output to a text file
%save Q040-BilancioSuddiviso.py str(output)
shutil.move("Q040-BilancioSuddiviso.py", "text/Q040-BilancioSuddiviso.txt")


The following commands were written to file `Q040-BilancioSuddiviso.py`:

Data: Stipendi
NaN    53
 0      6
 20     2
 50     2
 15     1
 30     1
 45     1
 60     1
 10     1
 25     1
 40     1
dtype: int64

Data %:
NaN    75.714286
 0      8.571429
 20     2.857143
 50     2.857143
 15     1.428571
 30     1.428571
 45     1.428571
 60     1.428571
 10     1.428571
 25     1.428571
 40     1.428571
dtype: float64

Data: statistics:
count    17.000000
mean     21.470588
std      20.973548
min       0.000000
25%       0.000000
50%      20.000000
75%      40.000000
max      60.000000
Name: D40[SQ001], dtype: float64

Data: Affitto e mantenimento sede
NaN     47
 30      5
 40      4
 50      4
 10      2
 20      2
 70      2
 60      1
 75      1
 100     1
 0       1
dtype: int64

Data %:
NaN     67.142857
 30      7.142857
 40      5.714286
 50      5.714286
 10      2.857143
 20      2.857143
 70      2.857143
 60      1.428571
 75      1.428571
 100     1.428571
 0       1.428571
dtype: float64

Data: statistics:
count     23.000000
mean      41.086957
std       23.449972
min        0.000000
25%       30.000000
50%       40.000000
75%       50.000000
max      100.000000
Name: D40[SQ002], dtype: float64

Data: Materiali di consumo
NaN     41
 30      5
 20      5
 15      5
 10      3
 5       3
 90      1
 25      1
 18      1
 50      1
 40      1
 7       1
 100     1
 35      1
dtype: int64

Data %:
NaN     58.571429
 30      7.142857
 20      7.142857
 15      7.142857
 10      4.285714
 5       4.285714
 90      1.428571
 25      1.428571
 18      1.428571
 50      1.428571
 40      1.428571
 7       1.428571
 100     1.428571
 35      1.428571
dtype: float64

Data: statistics:
count     29.000000
mean      25.344828
std       22.196003
min        5.000000
25%       15.000000
50%       20.000000
75%       30.000000
max      100.000000
Name: D40[SQ003], dtype: float64

Data: Macchine e strumenti
NaN    44
 20     6
 10     5
 15     3
 30     3
 50     3
 25     2
 40     1
 5      1
 70     1
 85     1
dtype: int64

Data %:
NaN    62.857143
 20     8.571429
 10     7.142857
 15     4.285714
 30     4.285714
 50     4.285714
 25     2.857143
 40     1.428571
 5      1.428571
 70     1.428571
 85     1.428571
dtype: float64

Data: statistics:
count    26.000000
mean     27.115385
std      19.655690
min       5.000000
25%      15.000000
50%      20.000000
75%      30.000000
max      85.000000
Name: D40[SQ004], dtype: float64

Data: Trasporti
NaN    63
 0      3
 2      2
 5      1
 3      1
dtype: int64

Data %:
NaN    90.000000
 0      4.285714
 2      2.857143
 5      1.428571
 3      1.428571
dtype: float64

Data: statistics:
count    7.000000
mean     1.714286
std      1.889822
min      0.000000
25%      0.000000
50%      2.000000
75%      2.500000
max      5.000000
Name: D40[SQ005], dtype: float64

Data: Comunicazione
NaN    57
 5      5
 10     4
 30     1
 25     1
 2      1
 1      1
dtype: int64

Data %:
NaN    81.428571
 5      7.142857
 10     5.714286
 30     1.428571
 25     1.428571
 2      1.428571
 1      1.428571
dtype: float64

Data: statistics:
count    13.000000
mean      9.461538
std       8.617960
min       1.000000
25%       5.000000
50%       5.000000
75%      10.000000
max      30.000000
Name: D40[SQ006], dtype: float64

Data: Consulenze
NaN    60
 5      2
 20     2
 25     2
 0      2
 7      1
 8      1
dtype: int64

Data %:
NaN    85.714286
 5      2.857143
 20     2.857143
 25     2.857143
 0      2.857143
 7      1.428571
 8      1.428571
dtype: float64

Data: statistics:
count    10.000000
mean     11.500000
std       9.947082
min       0.000000
25%       5.000000
50%       7.500000
75%      20.000000
max      25.000000
Name: D40[SQ007], dtype: float64


In [7]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
    old_dict = space[i].to_dict()
    new_dict = {}
    for k in old_dict:
        if isinstance(k, numpy.float64) and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        elif type(k) is float and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        else:
            new_dict[k] = old_dict[k]

    gradou = pd.Series(new_dict)
    space2[i] = gradou.order()

In [8]:
for k,i in enumerate(space2):
    # Plot the data 01
    plt.figure(figsize=(8,6))
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    plt.title(u"Come viene suddiviso il bilancio annuale? %", fontsize=18, y=1.02)
    my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
    space2[i].plot(kind="bar",color=my_colors)
    plt.savefig(u"svg/Q040-"+subquestions_value[k]+"01.svg")
    plt.savefig(u"png/Q040-"+subquestions_value[k]+"01.png")
    plt.savefig(u"pdf/Q040-"+subquestions_value[k]+"01.pdf")



In [9]:
# Plot the data 02
for k,i in enumerate(space2):
    # Reorder value_counts by index natural order
    space1 = space2[i].sort_index()

    plt.figure(figsize=(8,6))
    plt.title(u"Come viene suddiviso il bilancio annuale? %", fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)

    # Plot the data
    my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
    space1.plot(kind='bar',color=my_colors)
    plt.savefig(u"svg/Q040-"+subquestions_value[k]+"02.svg")
    plt.savefig(u"png/Q040-"+subquestions_value[k]+"02.png")
    plt.savefig(u"pdf/Q040-"+subquestions_value[k]+"02.pdf")



In [10]:
for k,i in enumerate(space2):
    # Check histogram
    plt.figure(figsize=(8,6))
    plt.title(u"Come viene suddiviso il bilancio annuale? % "+subquestions_value[k], fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    space2[i].hist(bins=60)
    plt.savefig(u"svg/Q040-"+subquestions_value[k]+"03.svg")
    plt.savefig(u"png/Q040-"+subquestions_value[k]+"03.png")
    plt.savefig(u"pdf/Q040-"+subquestions_value[k]+"03.pdf")



In [10]: