Q005 - Quanto è grande la sede del laboratorio?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
%%capture output

# Save the output as a variable that can be saved to a file
# Get the distribution of ages
space = data["D5"].value_counts()
print "Data:"
print space
print ""
print "Data %:"
print data["D5"].value_counts(normalize=True) * 100
print ""
print "Data: statistics:"
print data["D5"].describe()

In [5]:
# Save+show the output to a text file
%save Q005-DimensioneLab.py str(output)
shutil.move("Q005-DimensioneLab.py", "text/Q005-DimensioneLab.txt")


The following commands were written to file `Q005-DimensioneLab.py`:
Data:
50      7
100     7
150     7
200     6
250     5
60      4
30      4
80      4
40      3
110     3
350     2
500     2
36      2
260     1
230     1
120     1
70      1
15      1
700     1
90      1
1300    1
280     1
210     1
35      1
dtype: int64

Data %:
50      10.000000
100     10.000000
150     10.000000
200      8.571429
250      7.142857
60       5.714286
30       5.714286
80       5.714286
40       4.285714
110      4.285714
350      2.857143
500      2.857143
36       2.857143
260      1.428571
230      1.428571
120      1.428571
70       1.428571
15       1.428571
700      1.428571
90       1.428571
1300     1.428571
280      1.428571
210      1.428571
35       1.428571
dtype: float64

Data: statistics:
count      67.000000
mean      160.626866
std       188.815112
min        15.000000
25%        50.000000
50%       100.000000
75%       200.000000
max      1300.000000
Name: D5, dtype: float64


In [6]:
# Plot the data 01
plt.figure(figsize=(8,6))
plt.xlabel(u'Dimensione spazio (mq)', fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"Quanto è grande la sede del laboratorio?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
space.plot(kind="bar",color=my_colors)
plt.savefig(u"svg/Q005-DimensioneLab01.svg")
plt.savefig(u"png/Q005-DimensioneLab01.png")
plt.savefig(u"pdf/Q005-DimensioneLab01.pdf")



In [7]:
# Plot the data 02

# Reorder value_counts by index (age) natural order
space1 = space.sort_index()

plt.figure(figsize=(8,6))
plt.title(u'Quanto è grande la sede del laboratorio?', fontsize=18, y=1.02)
plt.xlabel(u'Dimensione spazio (mq)', fontsize=16)
plt.ylabel('Lab', fontsize=16)

# Plot the data
my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
space1.plot(kind='bar',color=my_colors)
plt.savefig(u"svg/Q005-DimensioniLab02.svg")
plt.savefig(u"png/Q005-DimensioniLab02.png")
plt.savefig(u"pdf/Q005-DimensioniLab02.pdf")



In [8]:
# Check histogram
plt.figure(figsize=(8,6))
plt.title(u'Quanto è grande la sede del laboratorio?', fontsize=18, y=1.02)
plt.xlabel(u'Dimensioni (mq)', fontsize=16)
plt.ylabel('Lab', fontsize=16)
data["D5"].hist(bins=60)
plt.savefig(u"svg/Q005-DimensioniLab03.svg")
plt.savefig(u"png/Q005-DimensioniLab03.png")
plt.savefig(u"pdf/Q005-DimensioniLab03.pdf")



In [9]: