Q037 - Qual è stato il budget del laboratorio alla sua partenza?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
%%capture output

# Save the output as a variable that can be saved to a file
# Get the distribution of ages
space = data["D37"].value_counts(dropna=False)
print "Data:"
print space
print ""
print "Data %:"
print data["D37"].value_counts(normalize=True,dropna=False) * 100
print ""
print "Data: statistics:"
print data["D37"].describe()

In [5]:
# Save+show the output to a text file
%save Q037-Budget.py str(output)
shutil.move("Q037-Budget.py", "text/Q037-Budget.txt")


The following commands were written to file `Q037-Budget.py`:
Data:
NaN        16
 0          6
 2000       5
 10000      5
 5000       4
 8000       3
 500        3
 30000      2
 100000     2
 3000       2
 16000      2
 20000      2
 40000      1
 600        1
 34000      1
 275        1
 20         1
 900000     1
 50000      1
 80         1
 25000      1
 30         1
 120        1
 6800       1
 100        1
 1000       1
 15000      1
 60000      1
 150000     1
 70000      1
dtype: int64

Data %:
NaN        22.857143
 0          8.571429
 2000       7.142857
 10000      7.142857
 5000       5.714286
 8000       4.285714
 500        4.285714
 30000      2.857143
 100000     2.857143
 3000       2.857143
 16000      2.857143
 20000      2.857143
 40000      1.428571
 600        1.428571
 34000      1.428571
 275        1.428571
 20         1.428571
 900000     1.428571
 50000      1.428571
 80         1.428571
 25000      1.428571
 30         1.428571
 120        1.428571
 6800       1.428571
 100        1.428571
 1000       1.428571
 15000      1.428571
 60000      1.428571
 150000     1.428571
 70000      1.428571
dtype: float64

Data: statistics:
count        54.000000
mean      33268.981481
std      123725.800056
min           0.000000
25%         500.000000
50%        5000.000000
75%       19000.000000
max      900000.000000
Name: D37, dtype: float64


In [6]:
# Swap nan for a more understandable word
old_dict = space.to_dict()
new_dict = {}
for i in old_dict:
    if isinstance(i, numpy.float64) and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    elif type(i) is float and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    else:
        new_dict[i] = old_dict[i]

spaceu = pd.Series(new_dict)
space = spaceu.order()

In [7]:
# Plot the data 01
plt.figure(figsize=(8,6))
plt.xlabel(u'Budget €', fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"Qual è stato il budget del laboratorio alla sua partenza?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
space.plot(kind="bar",color=my_colors)
plt.savefig(u"svg/Q037-Budget.svg")
plt.savefig(u"png/Q037-Budget.png")
plt.savefig(u"pdf/Q037-Budget.pdf")



In [8]:
# Plot the data 02

# Reorder value_counts by index (age) natural order
space1 = space.sort_index()

plt.figure(figsize=(8,6))
plt.title(u'Qual è stato il budget del laboratorio alla sua partenza?', fontsize=18, y=1.02)
plt.xlabel(u'Budget €', fontsize=16)
plt.ylabel('Lab', fontsize=16)

# Plot the data
my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
space1.plot(kind='bar',color=my_colors)
plt.savefig(u"svg/Q037-Budget02.svg")
plt.savefig(u"png/Q037-Budget02.png")
plt.savefig(u"pdf/Q037-Budget02.pdf")



In [9]:
# Check histogram
plt.figure(figsize=(8,6))
plt.title(u'Qual è stato il budget del laboratorio alla sua partenza?', fontsize=18, y=1.02)
plt.xlabel(u'Budget €', fontsize=16)
plt.ylabel('Lab', fontsize=16)
data["D37"].hist(bins=60)
plt.savefig(u"svg/Q037-Budget03.svg")
plt.savefig(u"png/Q037-Budget03.png")
plt.savefig(u"pdf/Q037-Budget03.pdf")



In [9]: