In [1]:
import pandas as pd
In [8]:
hours = pd.read_csv("madison_CPU_Hours__Total__by_PI_2014-04-30_to_2015-04-30_aggregate.csv", header=7)
In [9]:
hours
Out[9]:
In [4]:
cols_file = open('cols.csv')
cols = cols_file.readline().strip().split(',')
cols = cols[0:2]
cols[1]=cols[1].replace(cols[1],cols[1][1:-1])
cols
Out[4]:
In [12]:
size = pd.read_csv("madison_Job_Size__Per_Job_(Core_Count)__by_PI_2014-04-30_to_2015-04-30_aggregate.csv", header=7)
In [13]:
size
Out[13]:
In [14]:
total = pd.merge(hours,size, on="PI")
In [15]:
type(total["PI"])
Out[15]:
In [18]:
total["PI"] = total["PI"].str.split('-').str[0]
In [70]:
In [19]:
total
Out[19]:
In [1]:
import glob
import sys
In [5]:
date = "2014-04-30"
filenames = "*"+date+"*"
filenames
Out[5]:
In [6]:
file_list = glob.glob(filenames)
In [7]:
file_list
Out[7]:
In [10]:
save_list = file_list[0].split('_')
In [12]:
save = "madison"
for thing in save_list[-5:]:
save = save+"_"+thing
save
Out[12]:
In [30]:
df = pd.read_csv(file_list[0], header=7, usecols=["PI"])
In [32]:
for file in file_list:
tmp = pd.read_csv(file, header=7)
df = pd.merge(df,tmp, on="PI")
In [33]:
df
Out[33]:
In [34]:
df.to_csv("madison_2014-04-30_to_2015-04-30_aggregate.csv", index=False)
In [1]:
import pandas as pd
In [4]:
file_list = ["CPU_Hours__Total__by_PI_2015-04-01_to_2015-04-30_aggregate.csv", "Job_Size__Max_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv", "Job_Size__Min_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv", "Job_Size__Per_Job_(Core_Count)__by_PI_2015-04-01_to_2015-04-30_aggregate.csv"]
In [5]:
file_list
Out[5]:
In [12]:
df = pd.read_csv("madison_"+file_list[0], header=6, usecols=["PI"])
In [13]:
df
Out[13]:
In [ ]: