In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
sns.set_context("talk")
In [2]:
df = pd.read_csv('raw/2016-17-ClassCentral-Survey-data-noUserText.csv', decimal=',', encoding = "ISO-8859-1")
In [3]:
selectors = ['Timestamp', '# MOOCs Finished', 'How important is the ability to earn a certificate when you complete a MOOC?']
In [4]:
df_select = df.loc[:, selectors]
df_select.head()
Out[4]:
In [5]:
len(df_select)
Out[5]:
In [6]:
table = pd.pivot_table(df_select, values = 'Timestamp', index = ['How important is the ability to earn a certificate when you complete a MOOC?'],
columns =['# MOOCs Finished'], aggfunc = lambda x: len(x.unique()))
In [7]:
table
Out[7]:
In [8]:
table = table[['0', '1', '2.5', '4.5', '8', '15', '25']]
new_table = table.loc[:,:].div(table.sum(axis=0), axis=1).mul(100)
In [9]:
new_table.index
Out[9]:
In [10]:
#sns.heatmap(new_table.iloc[::-1], annot=True, fmt='2.0f', cbar = False, cmap="Greens")
sns.heatmap(new_table.iloc[[4, 0], :], annot=True, fmt='2.0f', cbar = False, cmap="Greens")
Out[10]:
In [11]:
plt.show()