In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
sns.set(style="white")
sns.set_context("talk")
In [2]:
# small example
data = [7, 3, 15]
widths = [1, 5, 3]
left = [0, 1, 6]
plt.bar(left, data, width = widths, color=('orange','green','blue'),
alpha = 0.6, align='edge', edgecolor = 'k', linewidth = 2)
plt.show()
In [3]:
df = pd.read_csv('raw/2016-17-ClassCentral-Survey-data-noUserText.csv', decimal=',', encoding = "ISO-8859-1")
In [4]:
age = df['What is your age range?'].value_counts()
age
Out[4]:
In [5]:
from_age =age.index.map(lambda x: x[:2]).astype(np.int)
In [6]:
to_age = age.index.map(lambda x: x[3:5])
print(to_age == ' y')
to_age.values[to_age == ' y'] = '75' # dummy max age (unix)
# to_age[to_age == ' y'] = '75' # dummy max age (windows)
to_age = to_age.astype(np.int)
In [7]:
widths = to_age - from_age + 1
widths
Out[7]:
In [8]:
age_df = pd.DataFrame({'from': from_age, 'to': to_age, 'span': widths, 'count': age})
In [9]:
sorted_age = age_df.sort_values('from')
In [10]:
plt.bar(sorted_age['from'], sorted_age['count']/sorted_age['span'], width = sorted_age['span'],
alpha = 0.3, align='edge')
plt.xlabel('age of respondents')
plt.show()
In [11]:
fig = plt.figure(figsize=(10,10))
st = fig.suptitle("Age of respondents")
n = 0
for region in df['Which region of the world are you in?'].dropna().unique():
dfc = df[df['Which region of the world are you in?'] == region]
age = dfc['What is your age range?'].value_counts()
from_age = age.index.map(lambda x: x[:2]).astype(np.int)
to_age = age.index.map(lambda x: x[3:5])
to_age.values[to_age == ' y'] = '75' # dummy max age (unix)
#to_age[to_age == ' y'] = '75' # dummy max age (windows)
to_age = to_age.astype(np.int)
widths = to_age - from_age + 1
age_df = pd.DataFrame({'from': from_age, 'to': to_age, 'span': widths, 'count': age})
sorted_age = age_df.sort_values('from')
n += 1
plt.subplot(4, 3, n)
plt.bar(sorted_age['from'], sorted_age['count']/sorted_age['span'], width = sorted_age['span'],
alpha = 0.3, align='edge')
plt.yticks([])
plt.title(region)
plt.tight_layout(rect=[0, 0, 1, .95])
plt.show()