In [9]:
%pylab inline
import pandas as pd
import seaborn as sns
import datetime
from collections import OrderedDict
sns.set_context('talk', font_scale=1.5)
sns.set_style('white')
In [10]:
xl = pd.ExcelFile('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx')
sheet_names = xl.sheet_names # see all sheet names
sheet_names
Out[10]:
In [18]:
all_df = OrderedDict()
master_df = pd.DataFrame()
index = 1
for sheet_name in sheet_names[1:]:
df = pd.read_excel('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx', sheet_name=sheet_name)
df.columns = ['Day', 'Time', '# Females', '# Males']
all_df['sheet-{}'.format(index)] = df
master_df = pd.concat([master_df, df], ignore_index=True)
index+=1
In [8]:
print(master_df.to_latex(index=False))
In [23]:
all_df = OrderedDict()
master_df = pd.DataFrame()
index = 1
for sheet_name in sheet_names[1:]:
df = pd.read_excel('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx', sheet_name=sheet_name)
df.columns = ['Day', 'Time', '# Females', '# Males']
#df['Group'] = sheet_name
df['Group'] = 'sheet-{}'.format(index)
#all_df[sheet_name] = df
all_df['sheet-{}'.format(index)] = df
index += 1
master_df = pd.concat([master_df, df], ignore_index=True)
In [24]:
master_df.columns
Out[24]:
In [25]:
master_df_molten = pd.melt(master_df, id_vars=['Day', 'Time', 'Group'], value_vars=['# Females', '# Males'])
In [26]:
fig, ax = plt.subplots(figsize=(18, 13))
sns.barplot(data=master_df_molten, x='Group', y='value', hue='variable')
plt.xticks(rotation=45)
ax.legend(frameon=False)
fig.tight_layout()
fig.savefig('BISC104_Fr_M_bar.pdf')
In [27]:
all_df = OrderedDict()
master_df = pd.DataFrame()
index = 1
for sheet_name in sheet_names[1:]:
df = pd.read_excel('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx', sheet_name=sheet_name)
df.columns = ['Day', 'Time', '# Females', '# Males']
#df['Group'] = sheet_name
df['Group'] = 'sheet-{}'.format(index)
index += 1
df[['Time_start', 'Time_end']] = df['Time'].str.split('-', n=1, expand=True)
df['Time_start'] = pd.to_datetime(df['Time_start'])#.astype(pd.Timestamp)
df['Time_end'] = pd.to_datetime(df['Time_end'])#.astype(pd.Timestamp)
df['delta'] = [datetime.timedelta.total_seconds(x) for x in df['Time_end'] - df['Time_start']]
all_df[sheet_name] = df
df['# Females'] = df['# Females']/df['delta'].astype(float)
df['# Males'] = df['# Males']/df['delta']
master_df = pd.concat([master_df, df], ignore_index=True)
In [28]:
master_df_molten = pd.melt(master_df, id_vars=['Day', 'Time', 'Group'], value_vars=['# Females', '# Males'])
fig, ax = plt.subplots(figsize=(12, 10))
sns.barplot(data=master_df_molten, x='Group', y='value', hue='variable')
plt.xticks(rotation=45)
ax.set_ylabel('#/second')
fig.tight_layout()
ax.legend(frameon=False)
fig.savefig('BISC104_F_M_bar_normalized.pdf')
In [29]:
all_df = OrderedDict()
master_df = pd.DataFrame()
index = 1
for sheet_name in sheet_names[1:]:
df = pd.read_excel('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx', sheet_name=sheet_name)
df.columns = ['Day', 'Time', '# Females', '# Males']
df['Group'] = sheet_name
df['Group'] = 'sheet-{}'.format(index)
index += 1
df[['Time_start', 'Time_end']] = df['Time'].str.split('-', n=1, expand=True)
df['Time_start'] = pd.to_datetime(df['Time_start'])#.astype(pd.Timestamp)
df['Time_end'] = pd.to_datetime(df['Time_end'])#.astype(pd.Timestamp)
df['delta'] = [datetime.timedelta.total_seconds(x) for x in df['Time_end'] - df['Time_start']]
all_df[sheet_name] = df
#df['# Females'] = df['# Females']/df['delta'].astype(float)
#df['# Males'] = df['# Males']/df['delta']
master_df = pd.concat([master_df, df], ignore_index=True)
master_df_molten = pd.melt(master_df, id_vars=['Day', 'Time', 'Group', 'delta'], value_vars=['# Females', '# Males'])
sns.lmplot(x="delta", y="value", hue="variable", data=master_df_molten, legend_out=True)
plt.xlabel('Time duration (s)')
plt.ylabel('Count')
plt.tight_layout()
plt.savefig('BISC104_F_M_bar_timewise.pdf')
In [30]:
all_df = OrderedDict()
master_df = pd.DataFrame()
index = 1
for sheet_name in sheet_names[1:]:
df = pd.read_excel('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx', sheet_name=sheet_name)
df.columns = ['Day', 'Time', '# Females', '# Males']
#df['Group'] = sheet_name
split_sheet = sheet_name.split('__')
if len(split_sheet)==2:
location = split_sheet[1]
else:
location = 'not-specified'
df['Group'] = location
#all_df[sheet_name] = df
all_df['sheet-{}'.format(index)] = df
index += 1
master_df = pd.concat([master_df, df], ignore_index=True)
In [32]:
master_df_molten = pd.melt(master_df, id_vars=['Day', 'Time', 'Group'], value_vars=['# Females', '# Males'])
fig, ax = plt.subplots(figsize=(18, 13))
sns.barplot(data=master_df_molten, x='Group', y='value', hue='variable')
plt.xticks(rotation=45)
ax.legend(frameon=False)
fig.tight_layout()
fig.savefig('BISC104_Fr_M_bar_location.pdf')
In [34]:
all_df = OrderedDict()
master_df = pd.DataFrame()
index = 1
for sheet_name in sheet_names[1:]:
df = pd.read_excel('../BISC-104-Session01-Scientific-Method-Friday-M.xlsx', sheet_name=sheet_name)
df.columns = ['Day', 'Time', '# Females', '# Males']
#df['Group'] = sheet_name
split_sheet = sheet_name.split('__')
if len(split_sheet)==2:
location = split_sheet[1]
else:
location = 'not-specified'
df['Group'] = location
#all_df[sheet_name] = df
all_df['sheet-{}'.format(index)] = df
index += 1
df[['Time_start', 'Time_end']] = df['Time'].str.split('-', n=1, expand=True)
df['Time_start'] = pd.to_datetime(df['Time_start'])#.astype(pd.Timestamp)
df['Time_end'] = pd.to_datetime(df['Time_end'])#.astype(pd.Timestamp)
df['delta'] = [datetime.timedelta.total_seconds(x) for x in df['Time_end'] - df['Time_start']]
all_df[sheet_name] = df
df['# Females'] = df['# Females']/df['delta'].astype(float)
df['# Males'] = df['# Males']/df['delta']
master_df = pd.concat([master_df, df], ignore_index=True)
master_df_molten = pd.melt(master_df, id_vars=['Day', 'Time', 'Group'], value_vars=['# Females', '# Males'])
fig, ax = plt.subplots(figsize=(18, 13))
sns.barplot(data=master_df_molten, x='Group', y='value', hue='variable')
plt.xticks(rotation=45)
ax.legend(frameon=False)
ax.set_ylabel('#/second')
fig.tight_layout()
fig.savefig('BISC104_Fr_M_bar_location_normalized.pdf')
In [ ]: