In [1]:
import pandas as pd

In [8]:
hours = pd.read_csv("madison_CPU_Hours__Total__by_PI_2014-04-30_to_2015-04-30_aggregate.csv", header=7)

In [9]:
hours


Out[9]:
PI CPU Hours: Total
0 Morgan, Dane - U Wisconsin-Madison 3201872
1 Boldyrev, Stanislav - U Wisconsin-Madison 3028756
2 Cui, Qiang - U Wisconsin-Madison 2815000
3 Xia, Qian - U Wisconsin-Madison 2642461
4 Yethiraj, Arun - U Wisconsin-Madison 2035921
5 Pueschel, Moritz J - U Wisconsin-Madison 1351208
6 Voyles, Paul M - U Wisconsin-Madison 699044
7 Lazarian, Alex - U Wisconsin-Madison 494873
8 Schmidt, Jordan R - U Wisconsin-Madison 300689
9 Zhu, Ping - U Wisconsin-Madison 118841
10 Heinz, Sebastian - U Wisconsin-Madison 62269
11 Fu, Chao - U Wisconsin-Madison 54682
12 Van Vleet, Mary Johanna - U Wisconsin-Madison 23367
13 Terry, Paul W - U Wisconsin-Madison 3621
14 Miller, Ian James - U Wisconsin-Madison 768
15 Mikulyuk, Alison F - U Wisconsin-Madison 56
16 Halzen, Francis - U Wisconsin-Madison 53

In [4]:
cols_file = open('cols.csv')
cols = cols_file.readline().strip().split(',')
cols = cols[0:2]
cols[1]=cols[1].replace(cols[1],cols[1][1:-1])
cols


Out[4]:
['PI', 'Job Size: Per Job (Core Count)']

In [12]:
size = pd.read_csv("madison_Job_Size__Per_Job_(Core_Count)__by_PI_2014-04-30_to_2015-04-30_aggregate.csv", header=7)

In [13]:
size


Out[13]:
PI Job Size: Per Job (Core Count)
0 Xia, Qian - U Wisconsin-Madison 858
1 Heinz, Sebastian - U Wisconsin-Madison 592
2 Boldyrev, Stanislav - U Wisconsin-Madison 568
3 Lazarian, Alex - U Wisconsin-Madison 474
4 Pueschel, Moritz J - U Wisconsin-Madison 282
5 Zhu, Ping - U Wisconsin-Madison 280
6 Fu, Chao - U Wisconsin-Madison 112
7 Terry, Paul W - U Wisconsin-Madison 104
8 Voyles, Paul M - U Wisconsin-Madison 90
9 Schmidt, Jordan R - U Wisconsin-Madison 53
10 Morgan, Dane - U Wisconsin-Madison 47
11 Yethiraj, Arun - U Wisconsin-Madison 28
12 Cui, Qiang - U Wisconsin-Madison 27
13 Mikulyuk, Alison F - U Wisconsin-Madison 22
14 Halzen, Francis - U Wisconsin-Madison 16
15 Miller, Ian James - U Wisconsin-Madison 16
16 Van Vleet, Mary Johanna - U Wisconsin-Madison 16

In [14]:
total = pd.merge(hours,size, on="PI")

In [15]:
type(total["PI"])


Out[15]:
pandas.core.series.Series

In [18]:
total["PI"] = total["PI"].str.split('-').str[0]

In [70]:


In [19]:
total


Out[19]:
PI CPU Hours: Total Job Size: Per Job (Core Count)
0 Morgan, Dane 3201872 47
1 Boldyrev, Stanislav 3028756 568
2 Cui, Qiang 2815000 27
3 Xia, Qian 2642461 858
4 Yethiraj, Arun 2035921 28
5 Pueschel, Moritz J 1351208 282
6 Voyles, Paul M 699044 90
7 Lazarian, Alex 494873 474
8 Schmidt, Jordan R 300689 53
9 Zhu, Ping 118841 280
10 Heinz, Sebastian 62269 592
11 Fu, Chao 54682 112
12 Van Vleet, Mary Johanna 23367 16
13 Terry, Paul W 3621 104
14 Miller, Ian James 768 16
15 Mikulyuk, Alison F 56 22
16 Halzen, Francis 53 16

In [1]:
import glob
import sys

In [5]:
date = "2014-04-30"
filenames = "*"+date+"*"
filenames


Out[5]:
'*2014-04-30*'

In [6]:
file_list = glob.glob(filenames)

In [7]:
file_list


Out[7]:
['CPU_Hours__Total__by_PI_2014-04-30_to_2015-04-30_aggregate.csv',
 'Job_Size__Per_Job_(Core_Count)__by_PI_2014-04-30_to_2015-04-30_aggregate.csv']

In [10]:
save_list = file_list[0].split('_')

In [12]:
save = "madison"
for thing in save_list[-5:]:
    save = save+"_"+thing
save


Out[12]:
'madison_PI_2014-04-30_to_2015-04-30_aggregate.csv'

In [30]:
df = pd.read_csv(file_list[0], header=7, usecols=["PI"])

In [32]:
for file in file_list:
    tmp = pd.read_csv(file, header=7)
    df = pd.merge(df,tmp, on="PI")

In [33]:
df


Out[33]:
PI CPU Hours: Total Job Size: Per Job (Core Count)
0 Morgan, Dane - U Wisconsin-Madison 3201872 47
1 Boldyrev, Stanislav - U Wisconsin-Madison 3028756 568
2 Cui, Qiang - U Wisconsin-Madison 2815000 27
3 Xia, Qian - U Wisconsin-Madison 2642461 858
4 Yethiraj, Arun - U Wisconsin-Madison 2035921 28
5 Pueschel, Moritz J - U Wisconsin-Madison 1351208 282
6 Voyles, Paul M - U Wisconsin-Madison 699044 90
7 Lazarian, Alex - U Wisconsin-Madison 494873 474
8 Schmidt, Jordan R - U Wisconsin-Madison 300689 53
9 Zhu, Ping - U Wisconsin-Madison 118841 280
10 Heinz, Sebastian - U Wisconsin-Madison 62269 592
11 Fu, Chao - U Wisconsin-Madison 54682 112
12 Van Vleet, Mary Johanna - U Wisconsin-Madison 23367 16
13 Terry, Paul W - U Wisconsin-Madison 3621 104
14 Miller, Ian James - U Wisconsin-Madison 768 16
15 Mikulyuk, Alison F - U Wisconsin-Madison 56 22
16 Halzen, Francis - U Wisconsin-Madison 53 16

In [34]:
df.to_csv("madison_2014-04-30_to_2015-04-30_aggregate.csv", index=False)

In [1]:
import pandas as pd

In [4]:
file_list = ["CPU_Hours__Total__by_PI_2015-04-01_to_2015-04-30_aggregate.csv", "Job_Size__Max_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv", "Job_Size__Min_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv", "Job_Size__Per_Job_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv"]

In [5]:
file_list


Out[5]:
['CPU_Hours__Total__by_PI_2015-04-01_to_2015-04-30_aggregate.csv',
 'Job_Size__Max_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv',
 'Job_Size__Min_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv',
 'Job_Size__Per_Job_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv']

In [12]:
df = pd.read_csv("madison_"+file_list[0], header=6, usecols=["PI"])

In [13]:
df


Out[13]:
PI
0 Pueschel, Moritz J - U Wisconsin-Madison
1 Morgan, Dane - U Wisconsin-Madison
2 Cui, Qiang - U Wisconsin-Madison
3 Yethiraj, Arun - U Wisconsin-Madison
4 Boldyrev, Stanislav - U Wisconsin-Madison
5 Heinz, Sebastian - U Wisconsin-Madison
6 Voyles, Paul M - U Wisconsin-Madison
7 Fu, Chao - U Wisconsin-Madison

In [ ]: