Q041 - Da dove provengono le risorse per il budget annuale?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
# For each subquestion, plot the data
subquestions = ["D41[SQ001]","D41[SQ002]","D41[SQ003]","D41[SQ004]", 
                "D41[SQ005]","D41[SQ006]","D41[SQ007]","D41[SQ008]",
                "D41[SQ009]", "D41[SQ010]"]
subquestions_value = [u"Quote d'iscrizione",
                      u"Noleggio macchinari",
                      u"Servizi di prototipazione conto terzi",
                      u"Servizi di progettazione conto terzi",
                      u"Servizi di formazione conto terzi",
                      u"Corsi e workshop propri",
                      u"Autofinanziamento",
                      u"Fundraising",
                      u"Noleggio spazi",
                      u"Crowdfunding"]

In [5]:
%%capture output

# Save the output as a variable that can be saved to a file
space = {}
for k,i in enumerate(subquestions):
    space[k] = data[i].value_counts(dropna=False)
    print ""
    print "Data:",subquestions_value[k]
    print space
    print ""
    print "Data %:"
    print data[i].value_counts(normalize=True, dropna=False) * 100
    print ""
    print "Data: statistics:"
    print data[i].describe()

In [6]:
# Save+show the output to a text file
%save Q041-BudgetAnnuale.py str(output)
shutil.move("Q041-BudgetAnnuale.py", "text/Q041-BudgetAnnuale.txt")


The following commands were written to file `Q041-BudgetAnnuale.py`:

Data: Quote d'iscrizione
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64}

Data %:
NaN     74.285714
 10      5.714286
 30      2.857143
 40      2.857143
 20      2.857143
 100     2.857143
 2       2.857143
 80      2.857143
 60      1.428571
 5       1.428571
dtype: float64

Data: statistics:
count     18.000000
mean      36.055556
std       33.635542
min        2.000000
25%       10.000000
50%       25.000000
75%       55.000000
max      100.000000
Name: D41[SQ001], dtype: float64

Data: Noleggio macchinari
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64}

Data %:
NaN    81.428571
 10     5.714286
 20     4.285714
 0      2.857143
 15     1.428571
 30     1.428571
 5      1.428571
 35     1.428571
dtype: float64

Data: statistics:
count    13.000000
mean     14.230769
std      10.576340
min       0.000000
25%      10.000000
50%      10.000000
75%      20.000000
max      35.000000
Name: D41[SQ002], dtype: float64

Data: Servizi di prototipazione conto terzi
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64}

Data %:
NaN    64.285714
 20     7.142857
 5      5.714286
 10     4.285714
 25     2.857143
 50     2.857143
 15     2.857143
 30     1.428571
 60     1.428571
 55     1.428571
 80     1.428571
 40     1.428571
 35     1.428571
 0      1.428571
dtype: float64

Data: statistics:
count    25.000000
mean     25.200000
std      20.385861
min       0.000000
25%      10.000000
50%      20.000000
75%      35.000000
max      80.000000
Name: D41[SQ003], dtype: float64

Data: Servizi di progettazione conto terzi
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64}

Data %:
NaN    70.000000
 40     7.142857
 5      5.714286
 20     5.714286
 10     5.714286
 0      4.285714
 25     1.428571
dtype: float64

Data: statistics:
count    21.000000
mean     17.380952
std      14.800257
min       0.000000
25%       5.000000
50%      10.000000
75%      25.000000
max      40.000000
Name: D41[SQ004], dtype: float64

Data: Servizi di formazione conto terzi
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64, 4: NaN    55
 5      4
 10     4
 0      3
 20     2
 23     1
 60     1
dtype: int64}

Data %:
NaN    78.571429
 5      5.714286
 10     5.714286
 0      4.285714
 20     2.857143
 23     1.428571
 60     1.428571
dtype: float64

Data: statistics:
count    15.000000
mean     12.200000
std      15.110072
min       0.000000
25%       5.000000
50%      10.000000
75%      15.000000
max      60.000000
Name: D41[SQ005], dtype: float64

Data: Corsi e workshop propri
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64, 4: NaN    55
 5      4
 10     4
 0      3
 20     2
 23     1
 60     1
dtype: int64, 5: NaN    46
 30     4
 10     4
 20     4
 5      3
 50     3
 40     2
 12     1
 90     1
 60     1
 35     1
dtype: int64}

Data %:
NaN    65.714286
 30     5.714286
 10     5.714286
 20     5.714286
 5      4.285714
 50     4.285714
 40     2.857143
 12     1.428571
 90     1.428571
 60     1.428571
 35     1.428571
dtype: float64

Data: statistics:
count    24.000000
mean     28.416667
std      20.917886
min       5.000000
25%      10.000000
50%      25.000000
75%      40.000000
max      90.000000
Name: D41[SQ006], dtype: float64

Data: Autofinanziamento
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64, 4: NaN    55
 5      4
 10     4
 0      3
 20     2
 23     1
 60     1
dtype: int64, 5: NaN    46
 30     4
 10     4
 20     4
 5      3
 50     3
 40     2
 12     1
 90     1
 60     1
 35     1
dtype: int64, 6: NaN     47
 30      3
 20      3
 15      3
 50      2
 10      2
 100     2
 0       2
 60      1
 23      1
 80      1
 75      1
 40      1
 5       1
dtype: int64}

Data %:
NaN     67.142857
 30      4.285714
 20      4.285714
 15      4.285714
 50      2.857143
 10      2.857143
 100     2.857143
 0       2.857143
 60      1.428571
 23      1.428571
 80      1.428571
 75      1.428571
 40      1.428571
 5       1.428571
dtype: float64

Data: statistics:
count     23.000000
mean      34.695652
std       30.050616
min        0.000000
25%       15.000000
50%       23.000000
75%       50.000000
max      100.000000
Name: D41[SQ007], dtype: float64

Data: Fundraising
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64, 4: NaN    55
 5      4
 10     4
 0      3
 20     2
 23     1
 60     1
dtype: int64, 5: NaN    46
 30     4
 10     4
 20     4
 5      3
 50     3
 40     2
 12     1
 90     1
 60     1
 35     1
dtype: int64, 6: NaN     47
 30      3
 20      3
 15      3
 50      2
 10      2
 100     2
 0       2
 60      1
 23      1
 80      1
 75      1
 40      1
 5       1
dtype: int64, 7: NaN     60
 0       4
 20      2
 5       1
 100     1
 10      1
 15      1
dtype: int64}

Data %:
NaN     85.714286
 0       5.714286
 20      2.857143
 5       1.428571
 100     1.428571
 10      1.428571
 15      1.428571
dtype: float64

Data: statistics:
count     10.000000
mean      17.000000
std       30.294847
min        0.000000
25%        0.000000
50%        7.500000
75%       18.750000
max      100.000000
Name: D41[SQ008], dtype: float64

Data: Noleggio spazi
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64, 4: NaN    55
 5      4
 10     4
 0      3
 20     2
 23     1
 60     1
dtype: int64, 5: NaN    46
 30     4
 10     4
 20     4
 5      3
 50     3
 40     2
 12     1
 90     1
 60     1
 35     1
dtype: int64, 6: NaN     47
 30      3
 20      3
 15      3
 50      2
 10      2
 100     2
 0       2
 60      1
 23      1
 80      1
 75      1
 40      1
 5       1
dtype: int64, 7: NaN     60
 0       4
 20      2
 5       1
 100     1
 10      1
 15      1
dtype: int64, 8: NaN     57
 10      4
 0       4
 15      1
 80      1
 25      1
 40      1
 100     1
dtype: int64}

Data %:
NaN     81.428571
 10      5.714286
 0       5.714286
 15      1.428571
 80      1.428571
 25      1.428571
 40      1.428571
 100     1.428571
dtype: float64

Data: statistics:
count     13.000000
mean      23.076923
std       32.050641
min        0.000000
25%        0.000000
50%       10.000000
75%       25.000000
max      100.000000
Name: D41[SQ009], dtype: float64

Data: Crowdfunding
{0: NaN     52
 10      4
 30      2
 40      2
 20      2
 100     2
 2       2
 80      2
 60      1
 5       1
dtype: int64, 1: NaN    57
 10     4
 20     3
 0      2
 15     1
 30     1
 5      1
 35     1
dtype: int64, 2: NaN    45
 20     5
 5      4
 10     3
 25     2
 50     2
 15     2
 30     1
 60     1
 55     1
 80     1
 40     1
 35     1
 0      1
dtype: int64, 3: NaN    49
 40     5
 5      4
 20     4
 10     4
 0      3
 25     1
dtype: int64, 4: NaN    55
 5      4
 10     4
 0      3
 20     2
 23     1
 60     1
dtype: int64, 5: NaN    46
 30     4
 10     4
 20     4
 5      3
 50     3
 40     2
 12     1
 90     1
 60     1
 35     1
dtype: int64, 6: NaN     47
 30      3
 20      3
 15      3
 50      2
 10      2
 100     2
 0       2
 60      1
 23      1
 80      1
 75      1
 40      1
 5       1
dtype: int64, 7: NaN     60
 0       4
 20      2
 5       1
 100     1
 10      1
 15      1
dtype: int64, 8: NaN     57
 10      4
 0       4
 15      1
 80      1
 25      1
 40      1
 100     1
dtype: int64, 9: NaN     64
 0       5
 100     1
dtype: int64}

Data %:
NaN     91.428571
 0       7.142857
 100     1.428571
dtype: float64

Data: statistics:
count      6.000000
mean      16.666667
std       40.824829
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max      100.000000
Name: D41[SQ010], dtype: float64


In [7]:
# Swap nan for a more understandable word
space2 = {}
for i in space:
    old_dict = space[i].to_dict()
    new_dict = {}
    for k in old_dict:
        if isinstance(k, numpy.float64) and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        elif type(k) is float and np.isnan(k):
            new_dict["Nessuna risposta"] = old_dict[k]
        else:
            new_dict[k] = old_dict[k]

    gradou = pd.Series(new_dict)
    space2[i] = gradou.order()

In [8]:
for k,i in enumerate(space2):
    # Plot the data 01
    plt.figure(figsize=(8,6))
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    plt.title(u"Da dove provengono le risorse per il budget annuale? %", fontsize=18, y=1.02)
    my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
    space2[i].plot(kind="bar",color=my_colors)
    plt.savefig(u"svg/Q041-"+subquestions_value[k]+"01.svg")
    plt.savefig(u"png/Q041-"+subquestions_value[k]+"01.png")
    plt.savefig(u"pdf/Q041-"+subquestions_value[k]+"01.pdf")



In [9]:
# Plot the data 02
for k,i in enumerate(space2):
    # Reorder value_counts by index natural order
    space1 = space2[i].sort_index()

    plt.figure(figsize=(8,6))
    plt.title(u"Da dove provengono le risorse per il budget annuale? %", fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)

    # Plot the data
    my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
    space1.plot(kind='bar',color=my_colors)
    plt.savefig(u"svg/Q041-"+subquestions_value[k]+"02.svg")
    plt.savefig(u"png/Q041-"+subquestions_value[k]+"02.png")
    plt.savefig(u"pdf/Q041-"+subquestions_value[k]+"02.pdf")



In [10]:
for k,i in enumerate(space2):
    # Check histogram
    plt.figure(figsize=(8,6))
    plt.title(u"Da dove provengono le risorse per il budget annuale? % "+subquestions_value[k], fontsize=18, y=1.02)
    plt.xlabel(subquestions_value[k], fontsize=16)
    plt.ylabel('Lab', fontsize=16)
    space2[i].hist(bins=60)
    plt.savefig(u"svg/Q041-"+subquestions_value[k]+"03.svg")
    plt.savefig(u"png/Q041-"+subquestions_value[k]+"03.png")
    plt.savefig(u"pdf/Q041-"+subquestions_value[k]+"03.pdf")



In [10]: