In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tools.plotting import scatter_matrix
from io import StringIO
import numpy as np
import datetime
import os
import glob

In [67]:
path='./'
extension = 'csv'
os.chdir(path)
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
print(result[0])


['Siddhant Dange.csv', 'Robbie Tian.csv', 'Brittany Stagman.csv', 'Marcello Guadagno.csv', 'Dannish Tyrewala.csv', 'David Adamovicz.csv', 'Elizabeth Bergh.csv', 'Max Ellingson.csv', 'Jacob Prochnow.csv', 'Erica Jacobson.csv', 'Adam Augustyniak.csv', 'Riley Stroven.csv', 'Julia Smit.csv', 'Robert Mikula-Malstrom.csv', 'Paras Ravindra Ghumare.csv', 'Anna Wheatley.csv', 'John Stough.csv', 'Jesse Lassila.csv']
Siddhant Dange.csv

In [91]:
names = ['day', 'e01', 'a01', 'e02', 'a02', 'e03','a03','e04','a04','e05','a05']
df=pd.read_csv(result[0],skiprows=range(1, 3),names=names)
df.


Out[91]:
NaN        Week 1  Unnamed: 1  Week 2  Unnamed: 3  Week 3  Unnamed: 5  Week 4       Unnamed: 7  Week 5  Unnamed: 9  Week 6  Unnamed: 11  Week 7  Unnamed: 13  Week 8  Unnamed: 15  Week 9  Unnamed: 17  Week 10  Unnamed: 19  Week 11    Unnamed: 31
Monday     0       0           0       0           1       1           0            NaN         0       NaN         0       NaN          0       NaN          0       NaN          0       NaN          0        NaN          0                  NaN
Tuesday    0       0           2.5     2.5         3.5     3.5         2.5          NaN         2.5     NaN         2.5     NaN          2.5     NaN          2.5     NaN          2.5     NaN          2.5      NaN          2.5                NaN
Wednesday  0       0           1       1           2       2           Career Fair  NaN         1       NaN         1       NaN          1       NaN          1       NaN          1       NaN          1        NaN          1                  NaN
Thursday   0       0           2.5     2.5         2.5     2.5         2.5          NaN         2.5     NaN         2.5     NaN          2.5     NaN          2.5     NaN          2.5     NaN          2.5      NaN          2.5                NaN
Friday     0       0           0       0           0       0           0            NaN         0       NaN         0       NaN          0       NaN          0       NaN          0       NaN          0        NaN          0                  NaN
Saturday   0       0           1       0           2       2           1            NaN         1       NaN         1       NaN          1       NaN          1       NaN          1       NaN          1        NaN          1                  NaN
Review     0       0           1.5     1.5         1.5     1.5         1.5          NaN         1.5     NaN         1.5     NaN          1.5     NaN          1.5     NaN          1.5     NaN          1.5      NaN          1.5                  0
Total      0       0           8.5     7.5         12.5    12.5        7.5          0           8.5     0           8.5     0            8.5     0            8.5     0            8.5     0            8.5      0            8.5                  0
Name: a05, dtype: object

In [73]:
#read them into pandas
df_list=[pd.read_csv(fn,skiprows=range(1, 3)) for fn in result]
#concatenate them together
big_df = pd.concat(df_list, keys=['coach_1', 'coach_2'])

In [78]:
len(big_df)
big_df.fillna(0)


Out[78]:
Unnamed: 0 Week 1 Unnamed: 1 Week 2 Unnamed: 3 Week 3 Unnamed: 5 Week 4 Unnamed: 7 Week 5 ... Week 12 Unnamed: 23 Week 13 Unnamed: 25 Week 14 Unnamed: 27 Week 15 Unnamed: 29 Week 16 Unnamed: 31
coach_1 0 Monday 0 0 0.0 0.0 1.0 1.0 0 0 0.0 ... Thanks giving 0 0.0 0 0.0 0 0.0 0 0.0 0
1 Tuesday 0 0 2.5 2.5 3.5 3.5 2.5 0 2.5 ... 0 0 2.5 0 2.5 0 2.5 0 2.5 0
2 Wednesday 0 0 1.0 1.0 2.0 2.0 Career Fair 0 1.0 ... 0 0 1.0 0 1.0 0 1.0 0 1.0 0
3 Thursday 0 0 2.5 2.5 2.5 2.5 2.5 0 2.5 ... 0 0 2.5 0 2.5 0 2.5 0 2.5 0
4 Friday 0 0 0.0 0.0 0.0 0.0 0 0 0.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
5 Saturday 0 0 1.0 0.0 2.0 2.0 1 0 1.0 ... 0 0 1.0 0 1.0 0 1.0 0 1.0 0
6 Review 0 0 1.5 1.5 1.5 1.5 1.5 0 1.5 ... 0 0 1.5 0 1.5 0 1.5 0 1.5 0
7 Total 0 0 8.5 7.5 12.5 12.5 7.5 0 8.5 ... 0 0 8.5 0 8.5 0 8.5 0 8.5 0
coach_2 0 Monday 0 0 1.0 0.0 1.0 0.0 1 0 1.0 ... Thanksgiving 0 0.0 0 0.0 0 0.0 0 0.0 0
1 Tuesday 0 0 0.0 0.0 0.0 0.0 0 0 0.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
2 Wednesday 0 0 1.0 0.0 1.0 0.0 Career Fair 0 1.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
3 Thursday 0 0 0.0 0.0 0.0 0.0 0 0 0.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
4 Friday 0 0 0.0 0.0 0.0 0.0 0 0 0.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
5 Saturday 0 0 1.0 0.0 1.0 0.0 1 0 1.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
6 Review session 2 0 1.0 0.0 1.0 0.0 1 0 1.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0
7 Total 2 0 4.0 0.0 4.0 0.0 3 0 4.0 ... 0 0 0.0 0 0.0 0 0.0 0 0.0 0

16 rows × 33 columns


In [80]:
big_df.loc['coach_1']


Out[80]:
Unnamed: 0 Week 1 Unnamed: 1 Week 2 Unnamed: 3 Week 3 Unnamed: 5 Week 4 Unnamed: 7 Week 5 ... Week 12 Unnamed: 23 Week 13 Unnamed: 25 Week 14 Unnamed: 27 Week 15 Unnamed: 29 Week 16 Unnamed: 31
0 Monday 0 0 0.0 0.0 1.0 1.0 0 NaN 0.0 ... Thanks giving NaN 0.0 NaN 0.0 NaN 0.0 NaN 0.0 NaN
1 Tuesday 0 0 2.5 2.5 3.5 3.5 2.5 NaN 2.5 ... NaN NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN
2 Wednesday 0 0 1.0 1.0 2.0 2.0 Career Fair NaN 1.0 ... NaN NaN 1.0 NaN 1.0 NaN 1.0 NaN 1.0 NaN
3 Thursday 0 0 2.5 2.5 2.5 2.5 2.5 NaN 2.5 ... NaN NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN
4 Friday 0 0 0.0 0.0 0.0 0.0 0 NaN 0.0 ... NaN NaN 0.0 NaN 0.0 NaN 0.0 NaN 0.0 NaN
5 Saturday 0 0 1.0 0.0 2.0 2.0 1 NaN 1.0 ... NaN NaN 1.0 NaN 1.0 NaN 1.0 NaN 1.0 NaN
6 Review 0 0 1.5 1.5 1.5 1.5 1.5 NaN 1.5 ... NaN NaN 1.5 NaN 1.5 NaN 1.5 NaN 1.5 0
7 Total 0 0 8.5 7.5 12.5 12.5 7.5 0 8.5 ... 0 0 8.5 0 8.5 0 8.5 0 8.5 0

8 rows × 33 columns


In [ ]: