Q035 - Quanti sono gli utenti registrati o associati?


In [1]:
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil

pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier, overridden by seaborn
pd.set_option('display.max_columns', None) # Display all the columns
plt.rcParams['font.family'] = 'sans-serif' # Sans Serif fonts for all the graphs

# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html

# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})

In [2]:
# Load csv file first
data = pd.read_csv("data/lab-survey.csv", encoding="utf-8")

In [3]:
# Check data
#data[0:4] # Equals to data.head()

In [4]:
%%capture output

# Save the output as a variable that can be saved to a file
# Get the distribution of ages
space = data["D35"].value_counts(dropna=False)
print "Data:"
print space
print ""
print "Data %:"
print data["D35"].value_counts(normalize=True,dropna=False) * 100
print ""
print "Data: statistics:"
print data["D35"].describe()

In [5]:
# Save+show the output to a text file
%save Q035-NumeroUtenti.py str(output)
shutil.move("Q035-NumeroUtenti.py", "text/Q035-NumeroUtenti.txt")


The following commands were written to file `Q035-NumeroUtenti.py`:
Data:
NaN     21
 0       5
 20      5
 50      3
 40      3
 100     3
 25      3
 120     2
 150     2
 60      2
 12      2
 15      1
 1       1
 130     1
 6       1
 200     1
 10      1
 11      1
 13      1
 19      1
 336     1
 18      1
 54      1
 22      1
 600     1
 28      1
 30      1
 159     1
 35      1
 140     1
dtype: int64

Data %:
NaN     30.000000
 0       7.142857
 20      7.142857
 50      4.285714
 40      4.285714
 100     4.285714
 25      4.285714
 120     2.857143
 150     2.857143
 60      2.857143
 12      2.857143
 15      1.428571
 1       1.428571
 130     1.428571
 6       1.428571
 200     1.428571
 10      1.428571
 11      1.428571
 13      1.428571
 19      1.428571
 336     1.428571
 18      1.428571
 54      1.428571
 22      1.428571
 600     1.428571
 28      1.428571
 30      1.428571
 159     1.428571
 35      1.428571
 140     1.428571
dtype: float64

Data: statistics:
count     49.000000
mean      66.448980
std      101.389115
min        0.000000
25%       15.000000
50%       28.000000
75%      100.000000
max      600.000000
Name: D35, dtype: float64


In [7]:
# Swap nan for a more understandable word
old_dict = space.to_dict()
new_dict = {}
for i in old_dict:
    if isinstance(i, numpy.float64) and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    elif type(i) is float and np.isnan(i):
        new_dict["Nessuna risposta"] = old_dict[i]
    else:
        new_dict[i] = old_dict[i]

spaceu = pd.Series(new_dict)
space = spaceu.order()

In [8]:
# Plot the data 01
plt.figure(figsize=(8,6))
plt.xlabel(u'Numero utenti', fontsize=16)
plt.ylabel('Lab', fontsize=16)
plt.title(u"Quanti sono gli utenti registrati o associati?", fontsize=18, y=1.02)
my_colors = seaborn.color_palette("husl", len(space)) # Set color palette
space.plot(kind="bar",color=my_colors)
plt.savefig(u"svg/Q035-NumeroUtenti01.svg")
plt.savefig(u"png/Q035-NumeroUtenti01.png")
plt.savefig(u"pdf/Q035-NumeroUtenti01.pdf")



In [9]:
# Plot the data 02

# Reorder value_counts by index (age) natural order
space1 = space.sort_index()

plt.figure(figsize=(8,6))
plt.title(u'Quanti sono gli utenti registrati o associati?', fontsize=18, y=1.02)
plt.xlabel(u'Numero utenti', fontsize=16)
plt.ylabel('Lab', fontsize=16)

# Plot the data
my_colors = seaborn.color_palette("husl", len(space1)) # Set color palette
space1.plot(kind='bar',color=my_colors)
plt.savefig(u"svg/Q035-NumeroUtenti02.svg")
plt.savefig(u"png/Q035-NumeroUtenti02.png")
plt.savefig(u"pdf/Q035-NumeroUtenti02.pdf")



In [13]:
# Check histogram
plt.figure(figsize=(8,6))
plt.title(u'Quanti sono gli utenti registrati o associati?', fontsize=18, y=1.02)
plt.xlabel(u'Numero utenti', fontsize=16)
plt.ylabel('Lab', fontsize=16)
data['D35'].hist(bins=60)
plt.savefig(u"svg/Q035-NumeroUtenti03.svg")
plt.savefig(u"png/Q035-NumeroUtenti03.png")
plt.savefig(u"pdf/Q035-NumeroUtenti03.pdf")



In [ ]: