In [63]:
%pylab inline
In [64]:
import pandas as pd
dfs = pd.read_excel('/home/saket/BISC104-Session01-Thursday-A.xlsx', sheet_name=[1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12])
sheet_number_to_name = {1: 'Mingyu Park',
2: 'William Garrick',
3: 'Mariah & Ahmed',
4: 'Reem AlFaisal',
5: 'Sophia and Catherine',
6: 'Jesus Juarez',
7: 'Reagor Wagnon',
8: 'Yuyuan Chen',
9: 'Gus and Vedika',
10: 'Zak Cook',
11: 'Sam Gholami',
12: 'Hannah Sobelman and Erin Philen'}
master_dfs = []
for key in dfs.keys():
df = dfs[key]
df.columns = list(map(lambda x: x.replace(' ',''), df.columns))
df['sheet_name'] = sheet_number_to_name[key]
try:
df['Time'] = df['Time'].str.replace(' ','')
except:
print(df.columns)
df['start'] , df['end'] = df['Time'].str.split('-', 1).str
master_dfs.append(df)
In [65]:
master_df = pd.concat(master_dfs)
master_df.Day = master_df.Day.str.replace(' ', '')
master_df = master_df.drop(columns=['Time'])
In [66]:
master_df = master_df.sort_values(by=['start', 'Day'])
In [67]:
print(master_df.reset_index(drop=True).to_latex(index=False))
In [68]:
master_df = master_df.rename(columns={'#Skateboarders': 'Skateboarders', '#Bikeriders': 'Bikeriders'})
In [69]:
master_df.head()
Out[69]:
In [70]:
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('talk', font_scale=2)
In [71]:
df_molten = pd.melt(master_df, id_vars=['Day', 'sheet_name', 'start', 'end'],
value_vars=['Skateboarders', 'Bikeriders'])
In [72]:
pd.to_datetime('06:00')
Out[72]:
In [73]:
df_molten['start_time'] = pd.to_datetime(df_molten['start'])
df_molten['end_time'] = pd.to_datetime(df_molten['end'])
In [74]:
df_molten['delta'] = df_molten['end_time'] - df_molten['start_time']
In [75]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='sheet_name', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)
fig.tight_layout()
fig.savefig('thursday_a_sheet_wise_plots.pdf')
In [76]:
fig, ax = plt.subplots(figsize=(12,8))
sns.barplot(data=df_molten, hue='variable', x='Day', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)
fig.tight_layout()
fig.savefig('thursday_a_day_wise_plots.pdf')
In [ ]:
In [77]:
df_molten['delta_min'] = df_molten['delta'].dt.components.minutes
df_molten['normalized_value'] = df_molten['value']/df_molten['delta_min']
In [78]:
fig, ax = plt.subplots(figsize=(12,8))
sns.barplot(data=df_molten, hue='variable', x='Day', y='normalized_value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)
fig.tight_layout()
fig.savefig('thursday_a_day_wise_plots_normalized.pdf')
In [33]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='sheet_name', y='normalized_value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)
fig.tight_layout()
fig.savefig('thursday_a_sheet_wise_plots_normalized.pdf')
In [51]:
df_molten['start_hours'] = df_molten['start_time'].dt.hour
In [53]:
df_molten.head()
Out[53]:
In [62]:
df_molten['subject'] = 0
df_molten.head()
Out[62]:
In [61]:
sns.tsplot(data=df_molten, time="start_hours", unit='subject', value="normalized_value")
In [34]:
sns.lmplot(x="delta_min", y="value", hue="variable", data=df_molten, size=15)
plt.savefig('thursday_a_time_wise_plots.pdf')
In [ ]: