In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tools.plotting import scatter_matrix
from io import StringIO
import numpy as np
import datetime
import os
import glob
In [67]:
path='./'
extension = 'csv'
os.chdir(path)
result = [i for i in glob.glob('*.{}'.format(extension))]
print(result)
print(result[0])
['Siddhant Dange.csv', 'Robbie Tian.csv', 'Brittany Stagman.csv', 'Marcello Guadagno.csv', 'Dannish Tyrewala.csv', 'David Adamovicz.csv', 'Elizabeth Bergh.csv', 'Max Ellingson.csv', 'Jacob Prochnow.csv', 'Erica Jacobson.csv', 'Adam Augustyniak.csv', 'Riley Stroven.csv', 'Julia Smit.csv', 'Robert Mikula-Malstrom.csv', 'Paras Ravindra Ghumare.csv', 'Anna Wheatley.csv', 'John Stough.csv', 'Jesse Lassila.csv']
Siddhant Dange.csv
In [91]:
names = ['day', 'e01', 'a01', 'e02', 'a02', 'e03','a03','e04','a04','e05','a05']
df=pd.read_csv(result[0],skiprows=range(1, 3),names=names)
df.
Out[91]:
NaN Week 1 Unnamed: 1 Week 2 Unnamed: 3 Week 3 Unnamed: 5 Week 4 Unnamed: 7 Week 5 Unnamed: 9 Week 6 Unnamed: 11 Week 7 Unnamed: 13 Week 8 Unnamed: 15 Week 9 Unnamed: 17 Week 10 Unnamed: 19 Week 11 Unnamed: 31
Monday 0 0 0 0 1 1 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN
Tuesday 0 0 2.5 2.5 3.5 3.5 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN
Wednesday 0 0 1 1 2 2 Career Fair NaN 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN
Thursday 0 0 2.5 2.5 2.5 2.5 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN 2.5 NaN
Friday 0 0 0 0 0 0 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN 0 NaN
Saturday 0 0 1 0 2 2 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN 1 NaN
Review 0 0 1.5 1.5 1.5 1.5 1.5 NaN 1.5 NaN 1.5 NaN 1.5 NaN 1.5 NaN 1.5 NaN 1.5 NaN 1.5 0
Total 0 0 8.5 7.5 12.5 12.5 7.5 0 8.5 0 8.5 0 8.5 0 8.5 0 8.5 0 8.5 0 8.5 0
Name: a05, dtype: object
In [73]:
#read them into pandas
df_list=[pd.read_csv(fn,skiprows=range(1, 3)) for fn in result]
#concatenate them together
big_df = pd.concat(df_list, keys=['coach_1', 'coach_2'])
In [78]:
len(big_df)
big_df.fillna(0)
Out[78]:
Unnamed: 0
Week 1
Unnamed: 1
Week 2
Unnamed: 3
Week 3
Unnamed: 5
Week 4
Unnamed: 7
Week 5
...
Week 12
Unnamed: 23
Week 13
Unnamed: 25
Week 14
Unnamed: 27
Week 15
Unnamed: 29
Week 16
Unnamed: 31
coach_1
0
Monday
0
0
0.0
0.0
1.0
1.0
0
0
0.0
...
Thanks giving
0
0.0
0
0.0
0
0.0
0
0.0
0
1
Tuesday
0
0
2.5
2.5
3.5
3.5
2.5
0
2.5
...
0
0
2.5
0
2.5
0
2.5
0
2.5
0
2
Wednesday
0
0
1.0
1.0
2.0
2.0
Career Fair
0
1.0
...
0
0
1.0
0
1.0
0
1.0
0
1.0
0
3
Thursday
0
0
2.5
2.5
2.5
2.5
2.5
0
2.5
...
0
0
2.5
0
2.5
0
2.5
0
2.5
0
4
Friday
0
0
0.0
0.0
0.0
0.0
0
0
0.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
5
Saturday
0
0
1.0
0.0
2.0
2.0
1
0
1.0
...
0
0
1.0
0
1.0
0
1.0
0
1.0
0
6
Review
0
0
1.5
1.5
1.5
1.5
1.5
0
1.5
...
0
0
1.5
0
1.5
0
1.5
0
1.5
0
7
Total
0
0
8.5
7.5
12.5
12.5
7.5
0
8.5
...
0
0
8.5
0
8.5
0
8.5
0
8.5
0
coach_2
0
Monday
0
0
1.0
0.0
1.0
0.0
1
0
1.0
...
Thanksgiving
0
0.0
0
0.0
0
0.0
0
0.0
0
1
Tuesday
0
0
0.0
0.0
0.0
0.0
0
0
0.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
2
Wednesday
0
0
1.0
0.0
1.0
0.0
Career Fair
0
1.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
3
Thursday
0
0
0.0
0.0
0.0
0.0
0
0
0.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
4
Friday
0
0
0.0
0.0
0.0
0.0
0
0
0.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
5
Saturday
0
0
1.0
0.0
1.0
0.0
1
0
1.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
6
Review session
2
0
1.0
0.0
1.0
0.0
1
0
1.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
7
Total
2
0
4.0
0.0
4.0
0.0
3
0
4.0
...
0
0
0.0
0
0.0
0
0.0
0
0.0
0
16 rows × 33 columns
In [80]:
big_df.loc['coach_1']
Out[80]:
Unnamed: 0
Week 1
Unnamed: 1
Week 2
Unnamed: 3
Week 3
Unnamed: 5
Week 4
Unnamed: 7
Week 5
...
Week 12
Unnamed: 23
Week 13
Unnamed: 25
Week 14
Unnamed: 27
Week 15
Unnamed: 29
Week 16
Unnamed: 31
0
Monday
0
0
0.0
0.0
1.0
1.0
0
NaN
0.0
...
Thanks giving
NaN
0.0
NaN
0.0
NaN
0.0
NaN
0.0
NaN
1
Tuesday
0
0
2.5
2.5
3.5
3.5
2.5
NaN
2.5
...
NaN
NaN
2.5
NaN
2.5
NaN
2.5
NaN
2.5
NaN
2
Wednesday
0
0
1.0
1.0
2.0
2.0
Career Fair
NaN
1.0
...
NaN
NaN
1.0
NaN
1.0
NaN
1.0
NaN
1.0
NaN
3
Thursday
0
0
2.5
2.5
2.5
2.5
2.5
NaN
2.5
...
NaN
NaN
2.5
NaN
2.5
NaN
2.5
NaN
2.5
NaN
4
Friday
0
0
0.0
0.0
0.0
0.0
0
NaN
0.0
...
NaN
NaN
0.0
NaN
0.0
NaN
0.0
NaN
0.0
NaN
5
Saturday
0
0
1.0
0.0
2.0
2.0
1
NaN
1.0
...
NaN
NaN
1.0
NaN
1.0
NaN
1.0
NaN
1.0
NaN
6
Review
0
0
1.5
1.5
1.5
1.5
1.5
NaN
1.5
...
NaN
NaN
1.5
NaN
1.5
NaN
1.5
NaN
1.5
0
7
Total
0
0
8.5
7.5
12.5
12.5
7.5
0
8.5
...
0
0
8.5
0
8.5
0
8.5
0
8.5
0
8 rows × 33 columns
In [ ]:
Content source: dnaneet/ELC
Similar notebooks: