In [63]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib

More females than males visit Necter



In [64]:

    
import pandas as pd

dfs = pd.read_excel('/home/saket/BISC104-Session01-Thursday-A.xlsx', sheet_name=[1, 2, 3, 4, 5, 6,
                                                                                 7, 8, 9, 10, 11, 12])

sheet_number_to_name = {1: 'Mingyu Park',
                        2: 'William Garrick',
                        3: 'Mariah & Ahmed',
                        4: 'Reem AlFaisal',
                        5: 'Sophia and Catherine',
                        6: 'Jesus Juarez',
                        7: 'Reagor Wagnon',
                        8: 'Yuyuan Chen',
                        9: 'Gus and Vedika',
                       10: 'Zak Cook',
                       11: 'Sam Gholami',
                       12: 'Hannah Sobelman and Erin Philen'}

master_dfs = []
for key in dfs.keys():
    df = dfs[key]
    df.columns = list(map(lambda x: x.replace(' ',''), df.columns))
    df['sheet_name'] = sheet_number_to_name[key]
    try:
        df['Time'] = df['Time'].str.replace(' ','') 
    except:
        print(df.columns)
    df['start'] , df['end'] = df['Time'].str.split('-', 1).str
    master_dfs.append(df)



In [65]:

    
master_df = pd.concat(master_dfs)
master_df.Day = master_df.Day.str.replace(' ', '')
master_df = master_df.drop(columns=['Time'])



In [66]:

    
master_df = master_df.sort_values(by=['start', 'Day'])



In [67]:

    
print(master_df.reset_index(drop=True).to_latex(index=False))









    



\begin{tabular}{lrrlll}
\toprule
       Day &  \#Skateboarders &  \#Bikeriders &                       sheet\_name &  start &    end \\
\midrule
 Wednesday &              17 &           13 &                     Jesus Juarez &  10:00 &  10:05 \\
   Tuesday &              29 &           23 &             Sophia and Catherine &  10:35 &  10:50 \\
    Friday &              37 &           43 &                         Zak Cook &  10:50 &  11:00 \\
 Wednesday &              15 &           14 &                  William Garrick &  11:00 &  11:05 \\
 Wednesday &              21 &           42 &                    Reem AlFaisal &  11:00 &  11:10 \\
 Wednesday &               9 &           37 &                      Sam Gholami &  11:02 &  11:07 \\
    Friday &              32 &           47 &             Sophia and Catherine &  11:15 &  11:25 \\
   Tuesday &              23 &           79 &                      Sam Gholami &  11:30 &  11:40 \\
   Tuesday &              34 &           52 &                         Zak Cook &  11:50 &  12:05 \\
   Tuesday &              52 &          107 &  Hannah Sobelman and Erin Philen &  12:00 &  12:15 \\
    Friday &              17 &           16 &                   Gus and Vedika &  13:00 &  13:05 \\
   Tuesday &              12 &           14 &                     Jesus Juarez &  13:00 &  13:05 \\
   Tuesday &              12 &           14 &                   Gus and Vedika &  13:00 &  13:05 \\
 Wednesday &              13 &           13 &                   Gus and Vedika &  13:00 &  13:05 \\
 Wednesday &              44 &          147 &  Hannah Sobelman and Erin Philen &  13:00 &  13:15 \\
   Tuesday &              21 &           83 &                      Sam Gholami &  13:09 &  13:19 \\
    Sunday &              13 &           14 &                      Yuyuan Chen &  13:26 &  13:33 \\
    Friday &              30 &           40 &                      Yuyuan Chen &  13:59 &  14:06 \\
    Monday &               4 &            3 &                    Reagor Wagnon &  14:00 &  14:05 \\
    Sunday &              22 &           13 &                     Jesus Juarez &  14:00 &  14:05 \\
   Tuesday &              20 &           18 &                  William Garrick &  14:00 &  14:10 \\
   Tuesday &               9 &           14 &                    Reagor Wagnon &  14:00 &  14:05 \\
 Wednesday &              17 &           19 &                    Reagor Wagnon &  14:00 &  14:05 \\
 Wednesday &              12 &           21 &                      Mingyu Park &  14:10 &  14:20 \\
 Wednesday &              10 &           28 &             Sophia and Catherine &  14:20 &  14:30 \\
   Tuesday &              44 &           39 &                   Mariah \& Ahmed &  15:00 &  15:25 \\
  Thursday &              91 &          133 &  Hannah Sobelman and Erin Philen &  15:15 &  15:45 \\
 Wednesday &              30 &           88 &                      Mingyu Park &  15:15 &  15:25 \\
    Friday &             121 &          135 &                    Reem AlFaisal &  15:32 &  16:00 \\
    Monday &              39 &           47 &                   Mariah \& Ahmed &  15:45 &  17:30 \\
 Wednesday &              19 &           41 &                      Mingyu Park &  16:02 &  16:07 \\
    Monday &              35 &           28 &                  William Garrick &  17:15 &  17:30 \\
 Wednesday &              57 &           55 &                   Mariah \& Ahmed &  17:15 &  18:05 \\
   Tuesday &             120 &          181 &                    Reem AlFaisal &  17:23 &  18:00 \\
  Thursday &              21 &           27 &                      Yuyuan Chen &  19:39 &  19:46 \\
\bottomrule
\end{tabular}



In [68]:

    
master_df = master_df.rename(columns={'#Skateboarders': 'Skateboarders', '#Bikeriders': 'Bikeriders'})



In [69]:

    
master_df.head()









    Out[69]:







  
    
      
      Day
      Skateboarders
      Bikeriders
      sheet_name
      start
      end
    
  
  
    
      2
      Wednesday
      17
      13
      Jesus Juarez
      10:00
      10:05
    
    
      1
      Tuesday
      29
      23
      Sophia and Catherine
      10:35
      10:50
    
    
      0
      Friday
      37
      43
      Zak Cook
      10:50
      11:00
    
    
      2
      Wednesday
      15
      14
      William Garrick
      11:00
      11:05
    
    
      2
      Wednesday
      21
      42
      Reem AlFaisal
      11:00
      11:10



In [70]:

    
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('talk', font_scale=2)



In [71]:

    
df_molten = pd.melt(master_df, id_vars=['Day', 'sheet_name', 'start', 'end'], 
                    value_vars=['Skateboarders', 'Bikeriders'])



In [72]:

    
pd.to_datetime('06:00')









    Out[72]:





Timestamp('2018-09-06 06:00:00')



In [73]:

    
df_molten['start_time'] = pd.to_datetime(df_molten['start'])
df_molten['end_time'] = pd.to_datetime(df_molten['end'])



In [74]:

    
df_molten['delta'] = df_molten['end_time'] - df_molten['start_time']



In [75]:

    
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='sheet_name', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_a_sheet_wise_plots.pdf')



In [76]:

    
fig, ax = plt.subplots(figsize=(12,8))
sns.barplot(data=df_molten, hue='variable', x='Day', y='value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_a_day_wise_plots.pdf')



In [ ]:



In [77]:

    
df_molten['delta_min'] = df_molten['delta'].dt.components.minutes
df_molten['normalized_value'] = df_molten['value']/df_molten['delta_min']



In [78]:

    
fig, ax = plt.subplots(figsize=(12,8))
sns.barplot(data=df_molten, hue='variable', x='Day', y='normalized_value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_a_day_wise_plots_normalized.pdf')



In [33]:

    
fig, ax = plt.subplots(figsize=(12,12))
sns.barplot(data=df_molten, hue='variable', x='sheet_name', y='normalized_value', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=90)

fig.tight_layout()
fig.savefig('thursday_a_sheet_wise_plots_normalized.pdf')



In [51]:

    
df_molten['start_hours'] = df_molten['start_time'].dt.hour



In [53]:

    
df_molten.head()









    Out[53]:







  
    
      
      Day
      sheet_name
      start
      end
      variable
      value
      start_time
      end_time
      delta
      delta_min
      normalized_value
      start_hours
    
  
  
    
      0
      Wednesday
      Jesus Juarez
      10:00
      10:05
      Skateboarders
      17
      2018-09-06 10:00:00
      2018-09-06 10:05:00
      00:05:00
      5
      3.400000
      10
    
    
      1
      Tuesday
      Sophia and Catherine
      10:35
      10:50
      Skateboarders
      29
      2018-09-06 10:35:00
      2018-09-06 10:50:00
      00:15:00
      15
      1.933333
      10
    
    
      2
      Friday
      Zak Cook
      10:50
      11:00
      Skateboarders
      37
      2018-09-06 10:50:00
      2018-09-06 11:00:00
      00:10:00
      10
      3.700000
      10
    
    
      3
      Wednesday
      William Garrick
      11:00
      11:05
      Skateboarders
      15
      2018-09-06 11:00:00
      2018-09-06 11:05:00
      00:05:00
      5
      3.000000
      11
    
    
      4
      Wednesday
      Reem AlFaisal
      11:00
      11:10
      Skateboarders
      21
      2018-09-06 11:00:00
      2018-09-06 11:10:00
      00:10:00
      10
      2.100000
      11



In [62]:

    
df_molten['subject'] = 0
df_molten.head()









    Out[62]:







  
    
      
      Day
      sheet_name
      start
      end
      variable
      value
      start_time
      end_time
      delta
      delta_min
      normalized_value
      start_hours
      subject
    
  
  
    
      0
      Wednesday
      Jesus Juarez
      10:00
      10:05
      Skateboarders
      17
      2018-09-06 10:00:00
      2018-09-06 10:05:00
      00:05:00
      5
      3.400000
      10
      0
    
    
      1
      Tuesday
      Sophia and Catherine
      10:35
      10:50
      Skateboarders
      29
      2018-09-06 10:35:00
      2018-09-06 10:50:00
      00:15:00
      15
      1.933333
      10
      0
    
    
      2
      Friday
      Zak Cook
      10:50
      11:00
      Skateboarders
      37
      2018-09-06 10:50:00
      2018-09-06 11:00:00
      00:10:00
      10
      3.700000
      10
      0
    
    
      3
      Wednesday
      William Garrick
      11:00
      11:05
      Skateboarders
      15
      2018-09-06 11:00:00
      2018-09-06 11:05:00
      00:05:00
      5
      3.000000
      11
      0
    
    
      4
      Wednesday
      Reem AlFaisal
      11:00
      11:10
      Skateboarders
      21
      2018-09-06 11:00:00
      2018-09-06 11:10:00
      00:10:00
      10
      2.100000
      11
      0



In [61]:

    
sns.tsplot(data=df_molten, time="start_hours", unit='subject', value="normalized_value")









    



/home/saket/anaconda3/lib/python3.6/site-packages/seaborn/timeseries.py:183: UserWarning: The tsplot function is deprecated and will be removed or replaced (in a substantially altered version) in a future release.
  warnings.warn(msg, UserWarning)






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-61-f4c2dbb9c6a9> in <module>()
----> 1 sns.tsplot(data=df_molten, time="start_hours", unit='subject', value="normalized_value")

~/anaconda3/lib/python3.6/site-packages/seaborn/timeseries.py in tsplot(data, time, unit, condition, value, err_style, ci, interpolate, color, estimator, n_boot, err_palette, err_kws, legend, ax, **kwargs)
    294     for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)):
    295 
--> 296         df_c = df_c.pivot(unit, time, value)
    297         x = df_c.columns.values.astype(np.float)
    298 

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in pivot(self, index, columns, values)
   5189         """
   5190         from pandas.core.reshape.reshape import pivot
-> 5191         return pivot(self, index=index, columns=columns, values=values)
   5192 
   5193     _shared_docs['pivot_table'] = """

~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in pivot(self, index, columns, values)
    407             indexed = self._constructor_sliced(self[values].values,
    408                                                index=index)
--> 409     return indexed.unstack(columns)
    410 
    411 

~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in unstack(self, level, fill_value)
   2895         """
   2896         from pandas.core.reshape.reshape import unstack
-> 2897         return unstack(self, level, fill_value)
   2898 
   2899     # ----------------------------------------------------------------------

~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in unstack(obj, level, fill_value)
    493         unstacker = _Unstacker(obj.values, obj.index, level=level,
    494                                fill_value=fill_value,
--> 495                                constructor=obj._constructor_expanddim)
    496         return unstacker.get_result()
    497 

~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in __init__(self, values, index, level, value_columns, fill_value, constructor)
    129 
    130         self._make_sorted_values_labels()
--> 131         self._make_selectors()
    132 
    133     def _make_sorted_values_labels(self):

~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in _make_selectors(self)
    167 
    168         if mask.sum() < len(self.index):
--> 169             raise ValueError('Index contains duplicate entries, '
    170                              'cannot reshape')
    171 

ValueError: Index contains duplicate entries, cannot reshape



In [34]:

    
sns.lmplot(x="delta_min", y="value", hue="variable", data=df_molten, size=15)
plt.savefig('thursday_a_time_wise_plots.pdf')



In [ ]:

	Day	Skateboarders	Bikeriders	sheet_name	start	end
2	Wednesday	17	13	Jesus Juarez	10:00	10:05
1	Tuesday	29	23	Sophia and Catherine	10:35	10:50
0	Friday	37	43	Zak Cook	10:50	11:00
2	Wednesday	15	14	William Garrick	11:00	11:05
2	Wednesday	21	42	Reem AlFaisal	11:00	11:10