In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

More people skateboard than walk


In [26]:
import pandas as pd

dfs = pd.read_excel('/home/saket/BISC104-Session01-Thursday-M.xlsx',sheet_name=[1, 2, 3, 4, 
                                                                                 5, 6, 7,
                                                                                9, 10])

sheet_number_to_name = {1: 'Aris and Sabrina',
                        2: 'Lourdes and Emilia',
                        3: 'Monique and Farah',
                        4: 'Michael and Jonathan',
                        5: 'Grace and Avery',
                        6: 'Kelly and Charis',
                        7: 'Hailey and Madison',
                        8: 'Michael',
                        9: 'Jett and Dorian',
                       10:  'Nicholas and Zack'}

master_dfs = []
for key in dfs.keys():
    df = dfs[key]
    df['sheet_name'] = sheet_number_to_name[key]
    try:
        df['Time'] = df['Time'].str.replace(' ','') 
    except:
        print(df.columns)
    df['start'] , df['end'] = df['Time'].str.split('-', 1).str
    master_dfs.append(df)

In [27]:
master_df = pd.concat(master_dfs)
master_df = master_df.drop(columns=['Time'])
master_df


Out[27]:
Day Skateboarders Walkers Skateboarder-Walkers Location sheet_name start end
0 Tuesday 2 56 1 SCA Courtyard Aris and Sabrina 11:30 11:50
1 Wednesday 1 43 4 SCA Courtyard Aris and Sabrina 10:30 10:50
2 Tuesday 4 66 2 SCA Courtyard Aris and Sabrina 16:00 16:30
3 Wednesday 3 39 0 SCA Courtyard Aris and Sabrina 16:00 16:20
0 Thursday 6 37 1 McCarthy Quad Lourdes and Emilia 12:45 12:50
1 Friday 23 183 0 McCarthy Quad Lourdes and Emilia 21:30 21:45
2 Monday 71 149 7 McCarthy Quad Lourdes and Emilia 17:45 17:55
0 Thursday 14 39 9 Taper Hall (near bike racks) Monique and Farah 12:00 12:25
1 Tuesday 20 45 11 Taper Hall (near bike racks) Monique and Farah 12:10 12:25
2 Wednesday 6 22 4 Taper Hall (near bike racks) Monique and Farah 11:00 11:10
0 Tuesday 8 50 2 USC Village Michael and Jonathan 15:00 15:10
1 Tuesday 14 73 1 USC Village Michael and Jonathan 21:10 21:25
2 Wednesday 15 113 7 USC Village Michael and Jonathan 15:25 15:50
0 Wednesday 37 94 1 Lyon Center Grace and Avery 13:55 14:05
1 Wednesday 28 111 6 Lyon Center Grace and Avery 15:00 15:15
2 Wednesday 113 179 14 Lyon Center Grace and Avery 15:25 15:50
0 Thursday 7 67 0 In front of bookstore Kelly and Charis 14:41 14:46
1 Tuesday 13 191 2 In front of bookstore Kelly and Charis 13:30 13:40
2 Wednesday 22 187 1 In front of bookstore Kelly and Charis 14:10 14:20
0 Thursday 30 245 5 In front of track Hailey and Madison 11:43 11:53
1 Friday 18 176 2 In front of track Hailey and Madison 09:00 09:10
2 Wednesday 94 12 1 In front of track Hailey and Madison 10:00 10:05
3 Tuesday 26 175 5 In front of track Hailey and Madison 13:57 14:02
4 Wednesday 6 76 2 In front of track Hailey and Madison 15:27 15:32
5 Wednesday 129 20 0 In front of track Hailey and Madison 15:35 15:45
0 Thursday 14 55 2 Parkside: Walkway Jett and Dorian 11:30 11:40
1 Friday 2 32 1 Parkside: Walkway Jett and Dorian 11:05 11:15
2 Monday 10 107 4 Parkside: Walkway Jett and Dorian 14:05 14:20
0 Thursday 11 140 2 Starbucks by EVK Nicholas and Zack 11:25 11:35
1 Thursday 26 158 6 Starbucks by EVK Nicholas and Zack 15:35 15:45
2 Friday 32 141 5 Starbucks by EVK Nicholas and Zack 13:50 14:00
3 Tuesday 14 105 3 Starbucks by EVK Nicholas and Zack 15:30 15:40

In [28]:
master_df = master_df.sort_values(by=['start', 'Day'])

In [29]:
master_df


Out[29]:
Day Skateboarders Walkers Skateboarder-Walkers Location sheet_name start end
1 Friday 18 176 2 In front of track Hailey and Madison 09:00 09:10
2 Wednesday 94 12 1 In front of track Hailey and Madison 10:00 10:05
1 Wednesday 1 43 4 SCA Courtyard Aris and Sabrina 10:30 10:50
2 Wednesday 6 22 4 Taper Hall (near bike racks) Monique and Farah 11:00 11:10
1 Friday 2 32 1 Parkside: Walkway Jett and Dorian 11:05 11:15
0 Thursday 11 140 2 Starbucks by EVK Nicholas and Zack 11:25 11:35
0 Thursday 14 55 2 Parkside: Walkway Jett and Dorian 11:30 11:40
0 Tuesday 2 56 1 SCA Courtyard Aris and Sabrina 11:30 11:50
0 Thursday 30 245 5 In front of track Hailey and Madison 11:43 11:53
0 Thursday 14 39 9 Taper Hall (near bike racks) Monique and Farah 12:00 12:25
1 Tuesday 20 45 11 Taper Hall (near bike racks) Monique and Farah 12:10 12:25
0 Thursday 6 37 1 McCarthy Quad Lourdes and Emilia 12:45 12:50
1 Tuesday 13 191 2 In front of bookstore Kelly and Charis 13:30 13:40
2 Friday 32 141 5 Starbucks by EVK Nicholas and Zack 13:50 14:00
0 Wednesday 37 94 1 Lyon Center Grace and Avery 13:55 14:05
3 Tuesday 26 175 5 In front of track Hailey and Madison 13:57 14:02
2 Monday 10 107 4 Parkside: Walkway Jett and Dorian 14:05 14:20
2 Wednesday 22 187 1 In front of bookstore Kelly and Charis 14:10 14:20
0 Thursday 7 67 0 In front of bookstore Kelly and Charis 14:41 14:46
0 Tuesday 8 50 2 USC Village Michael and Jonathan 15:00 15:10
1 Wednesday 28 111 6 Lyon Center Grace and Avery 15:00 15:15
2 Wednesday 15 113 7 USC Village Michael and Jonathan 15:25 15:50
2 Wednesday 113 179 14 Lyon Center Grace and Avery 15:25 15:50
4 Wednesday 6 76 2 In front of track Hailey and Madison 15:27 15:32
3 Tuesday 14 105 3 Starbucks by EVK Nicholas and Zack 15:30 15:40
1 Thursday 26 158 6 Starbucks by EVK Nicholas and Zack 15:35 15:45
5 Wednesday 129 20 0 In front of track Hailey and Madison 15:35 15:45
2 Tuesday 4 66 2 SCA Courtyard Aris and Sabrina 16:00 16:30
3 Wednesday 3 39 0 SCA Courtyard Aris and Sabrina 16:00 16:20
2 Monday 71 149 7 McCarthy Quad Lourdes and Emilia 17:45 17:55
1 Tuesday 14 73 1 USC Village Michael and Jonathan 21:10 21:25
1 Friday 23 183 0 McCarthy Quad Lourdes and Emilia 21:30 21:45

In [30]:
print(master_df.reset_index(drop=True)[['Day',  'Skateboarders', 'Skateboarder-Walkers', 'Walkers']].to_latex(index=False))


\begin{tabular}{lrrr}
\toprule
       Day &  Skateboarders &  Skateboarder-Walkers &  Walkers \\
\midrule
    Friday &             18 &                     2 &      176 \\
 Wednesday &             94 &                     1 &       12 \\
 Wednesday &              1 &                     4 &       43 \\
 Wednesday &              6 &                     4 &       22 \\
    Friday &              2 &                     1 &       32 \\
  Thursday &             11 &                     2 &      140 \\
  Thursday &             14 &                     2 &       55 \\
   Tuesday &              2 &                     1 &       56 \\
  Thursday &             30 &                     5 &      245 \\
  Thursday &             14 &                     9 &       39 \\
   Tuesday &             20 &                    11 &       45 \\
  Thursday &              6 &                     1 &       37 \\
   Tuesday &             13 &                     2 &      191 \\
    Friday &             32 &                     5 &      141 \\
 Wednesday &             37 &                     1 &       94 \\
   Tuesday &             26 &                     5 &      175 \\
    Monday &             10 &                     4 &      107 \\
 Wednesday &             22 &                     1 &      187 \\
  Thursday &              7 &                     0 &       67 \\
   Tuesday &              8 &                     2 &       50 \\
 Wednesday &             28 &                     6 &      111 \\
 Wednesday &             15 &                     7 &      113 \\
 Wednesday &            113 &                    14 &      179 \\
 Wednesday &              6 &                     2 &       76 \\
   Tuesday &             14 &                     3 &      105 \\
  Thursday &             26 &                     6 &      158 \\
 Wednesday &            129 &                     0 &       20 \\
   Tuesday &              4 &                     2 &       66 \\
 Wednesday &              3 &                     0 &       39 \\
    Monday &             71 &                     7 &      149 \\
   Tuesday &             14 &                     1 &       73 \\
    Friday &             23 &                     0 &      183 \\
\bottomrule
\end{tabular}


In [31]:
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('talk', font_scale=2)

In [32]:
df_molten = pd.melt(master_df, id_vars=['Day', 'sheet_name', 'start', 'end', 'Location'], value_vars=['Skateboarders', 
                                                                                          'Walkers',
                                                                                         'Skateboarder-Walkers'])

In [33]:
pd.to_datetime('06:00')


Out[33]:
Timestamp('2018-09-06 06:00:00')

In [34]:
df_molten['start_time'] = pd.to_datetime(df_molten['start'])
df_molten['end_time'] = pd.to_datetime(df_molten['end'])

In [35]:
df_molten['delta'] = df_molten['end_time'] - df_molten['start_time']

In [36]:
fig, ax = plt.subplots(figsize=(12,10))
sns.barplot(data=df_molten, hue='variable', x='sheet_name', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_m_sheet_wise_plots.pdf')



In [40]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='Day', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_m_day_wise_plots.pdf')



In [39]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='Day', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_m_day_wise_plots.pdf')



In [41]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='Location', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_m_location_wise_plots.pdf')



In [42]:
df_molten['delta_min'] = df_molten['delta'].dt.components.minutes

In [17]:
sns.lmplot(x="delta_min", y="value", hue="variable", data=df_molten, size=15)
plt.savefig('thursday_m_time_wise_plots.pdf')



In [44]:
df_molten['normalized_value'] = df_molten['value']/df_molten['delta_min']

In [45]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='sheet_name', y='normalized_value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_m_sheet_wise_plots_normalized.pdf')



In [46]:
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='Location', y='normalized_value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_m_location_wise_plots_normalized.pdf')



In [19]:
sns.lmplot(x="Location", y="value", hue="variable", data=df_molten, size=15)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-1021d129ca0b> in <module>()
----> 1 sns.lmplot(x="Location", y="value", hue="variable", data=df_molten, size=15)

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, size, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws)
    588         scatter_kws=scatter_kws, line_kws=line_kws,
    589         )
--> 590     facets.map_dataframe(regplot, x, y, **regplot_kws)
    591 
    592     # Add a legend

~/anaconda3/lib/python3.6/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
    807 
    808             # Draw the plot
--> 809             self._facet_plot(func, ax, args, kwargs)
    810 
    811         # Finalize the annotations and layout

~/anaconda3/lib/python3.6/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
    825 
    826         # Draw the plot
--> 827         func(*plot_args, **plot_kwargs)
    828 
    829         # Sort out the supporting information

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    788     scatter_kws["marker"] = marker
    789     line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 790     plotter.plot(ax, scatter_kws, line_kws)
    791     return ax
    792 

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in plot(self, ax, scatter_kws, line_kws)
    340             self.scatterplot(ax, scatter_kws)
    341         if self.fit_reg:
--> 342             self.lineplot(ax, line_kws)
    343 
    344         # Label the axes

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in lineplot(self, ax, kws)
    385 
    386         # Fit the regression model
--> 387         grid, yhat, err_bands = self.fit_regression(ax)
    388 
    389         # Get set default aesthetics

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in fit_regression(self, ax, x_range, grid)
    208             yhat, yhat_boots = self.fit_logx(grid)
    209         else:
--> 210             yhat, yhat_boots = self.fit_fast(grid)
    211 
    212         # Compute the confidence interval at each grid point

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in fit_fast(self, grid)
    223         grid = np.c_[np.ones(len(grid)), grid]
    224         reg_func = lambda _x, _y: np.linalg.pinv(_x).dot(_y)
--> 225         yhat = grid.dot(reg_func(X, y))
    226         if self.ci is None:
    227             return yhat, None

~/anaconda3/lib/python3.6/site-packages/seaborn/regression.py in <lambda>(_x, _y)
    222         X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    223         grid = np.c_[np.ones(len(grid)), grid]
--> 224         reg_func = lambda _x, _y: np.linalg.pinv(_x).dot(_y)
    225         yhat = grid.dot(reg_func(X, y))
    226         if self.ci is None:

~/anaconda3/lib/python3.6/site-packages/numpy/linalg/linalg.py in pinv(a, rcond)
   1721         res = empty(a.shape[:-2] + (a.shape[-1], a.shape[-2]), dtype=a.dtype)
   1722         return wrap(res)
-> 1723     a = a.conjugate()
   1724     u, s, vt = svd(a, full_matrices=False)
   1725 

AttributeError: 'str' object has no attribute 'conjugate'

In [ ]: