In [67]:
#this function will pass the ior file and return the Max read or write
def maxior(filename, kword):
import re
import sys
file = open(filename, "r")
for line in file:
if re.search(kword, line):
str_line=line.split(" ")
str_list = filter(None, str_line)
if kword.split(' ')[0]=="Max":
#print filename,str_list[2]
return (str_list[2])
else:
return str_list[1]
def getread(x):
for f in x[2]:
maxior(f,"Max Read")
def getwrite(x):
for f in x[2]:
maxior(f,"Max Write")
In [88]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/tmp_has_per_write"
In [89]:
cd $dir
In [68]:
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
import os
import numpy as np
def ior_forjob(dir,jobid,keywords,tt,sb=None):
#print dir
#print jobid
xdir=os.listdir(dir)
#print xdir
xjob=[x for x in xdir if jobid in x]
print xjob
readmax=list()
writemax=list()
print "Number of Jobs:%d"%len(xjob)#,xjob
for x in xjob:
cur=maxior(x,keywords)
writemax.append(cur)
if sb!=None:
segs=[x.split('.')[2] for x in xjob]
segs=np.asarray(segs)
xjob=[x.split('.')[1] for x in xjob] # get job nodes.cores config
xjob=np.asarray(xjob)
import pandas as pd
dfwrite=pd.DataFrame(data=xjob,columns=["setting"],index=None) # construct dataframe
dfwrite['writemax']=writemax
if sb !=None:
dfwrite['seg']=segs
dfwrite.seg=dfwrite.seg.astype('int')
#print dfwrite.writemax
dfwrite.writemax=dfwrite.writemax.astype('float')
dfwrite=dfwrite[dfwrite.writemax.astype('str') != 'None'] # remove none value
dfwrite=dfwrite[dfwrite.writemax.astype('str') != 'NaN'] # remove none value
print "Number of Completed Jobs:%d"%len(dfwrite.writemax)
dfwrite['nodes'], dfwrite['cores']=dfwrite['setting'].str.split('x', 1).str
del dfwrite['setting']
if sb==None:
print "sort by nodes, cores"
dfwrite=dfwrite.sort_values(by=['nodes','cores'],axis=0)
else:
print "sort by number of segments"
dfwrite=dfwrite.sort_values(by=['seg'],axis=0)
dfwrite.nodes=dfwrite.nodes.astype('int')
dfwrite.cores=dfwrite.cores.astype('int')
dfwrite['nodecore']=dfwrite.nodes.astype('str')+'-'+dfwrite.cores.astype('str')
dfwrite=dfwrite.reset_index(drop=True)
return dfwrite
def ior_plot(dfwrite,keywords,tt):
plt.xticks(range(len(dfwrite.nodes)),dfwrite.nodecore)
plt.plot(range(len(dfwrite.nodecore)),dfwrite.writemax,'-o')
plt.xlabel('Nodes-Cores')
plt.ylabel('MiB/s')
plt.title(tt+","+keywords)
plt.xticks(rotation=80 )
plt.grid(True)
def ior_plot_two(dfknl,dfhas,tt,step=None,label1=None,label2=None):
plt.xticks(range(len(dfknl.nodes)),dfknl.nodecore)
if label1==None:
label1="KNL"
plt.plot(range(len(dfknl.nodecore)),dfknl.writemax,'-o',label=label1)
has_x=range(len(dfhas.nodecore))
#add gap in has x
i=6
while i<= len(has_x):
j=i # do a shift on every 7th element and its afterwards
while j < len(has_x):
if step !=None:
has_x[j]+=2
else:
has_x[j]+=1
j+=1
i=i+6
if label2==None:
label2="Haswell"
plt.plot(has_x,dfhas.writemax,'-^',label=label2)
plt.xlabel('Nodes-Cores')
plt.ylabel('MiB/s')
plt.title(tt)
plt.xticks(rotation=90 )
plt.grid(True)
plt.legend()
In [91]:
jobid="4603772"
keyword="Max Write"
tt='Haswell Direct IO, File per Process'
dfhas=ior_forjob(dir,jobid,keyword,tt)
#ior_plot(dfhas,keyword,tt)
In [92]:
jobid="4603792"
keyword="Max Write"
tt='KNL Direct IO, File per Process'
dfknl=ior_forjob(dir,jobid,keyword,tt)
#ior_plot(dfknl,keyword,tt)
In [93]:
tt='Haswell vs. KNL Direct IO, File per Process, Write'
ior_plot_two(dfknl,dfhas,tt)
In [94]:
dfknl
Out[94]:
In [95]:
dfhas
Out[95]:
In [8]:
dirr="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/tmp_has_default_read"
In [9]:
cd $dirr
In [10]:
jobid="4603772"
keyword="Max Read"
tt='Haswell Direct IO, File per Process'
dfhasr=ior_forjob(dirr,jobid,keyword,tt)
#ior_plot(dfhasr,keyword,tt)
In [11]:
jobid="4603792"
keyword="Max Read"
tt='KNL Direct IO, File per Process'
dfknlr=ior_forjob(dirr,jobid,keyword,tt)
#ior_plot(dfknlr,keyword,tt)
In [12]:
tt='Haswell vs. KNL Direct IO, File per Process, Read'
ior_plot_two(dfknlr,dfhasr,tt)
In [13]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/apr22/apr22write"
In [14]:
cd $dir
In [15]:
jobid="4669220"
keyword="Max Write"
tt='KNL Direct IO, File per Process'
dfknlw=ior_forjob(dir,jobid,keyword,tt)
#ior_plot(dfknlr,keyword,tt)
In [16]:
jobid="4669797"
keyword="Max Write"
tt='HASWELL Direct IO, File per Process'
dfhasw=ior_forjob(dir,jobid,keyword,tt)
In [17]:
tt='Haswell vs. KNL Direct IO, File per Process, Write'
ior_plot_two(dfknlw,dfhasw,tt,step=2)
In [18]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/apr22/apr22read"
In [19]:
cd $dir
In [20]:
jobid="4669220"
keyword="Max Read"
tt='KNL Direct IO, File per Process'
dfknlr=ior_forjob(dir,jobid,keyword,tt)
#ior_plot(dfknlr,keyword,tt)
In [21]:
jobid="4669797"
keyword="Max Read"
tt='HASWELL Direct IO, File per Process'
dfhasr=ior_forjob(dir,jobid,keyword,tt)
In [22]:
tt='Haswell vs. KNL Direct IO, File per Process, Read'
ior_plot_two(dfknlr,dfhasr,tt,step=2)
In [54]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/apr22/apr22write"
In [55]:
cd $dir
In [61]:
jobid="4669801"
keyword="Max Write"
tt='KNL Direct IO, File per Process'
dfknlw=ior_forjob(dir,jobid,keyword,tt,sb="segment")
In [62]:
#ior_plot(dfknlr,keyword,tt)
jobid="4669800"
keyword="Max Write"
tt='HASWELL Direct IO, File per Process'
dfhasw=ior_forjob(dir,jobid,keyword,tt,sb="segment")
In [63]:
dfknlw
Out[63]:
In [64]:
dfhasw
Out[64]:
In [65]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/apr22/apr22read"
In [66]:
cd $dir
In [67]:
jobid="4669801"
keyword="Max Read"
tt='KNL Direct IO, File per Process'
dfknlr=ior_forjob(dir,jobid,keyword,tt,sb="segment")
In [68]:
jobid="4669800"
keyword="Max Read"
tt='HASWELL Direct IO, File per Process'
dfhasr=ior_forjob(dir,jobid,keyword,tt,sb="segment")
In [69]:
dfknlr
Out[69]:
In [70]:
dfhasr
Out[70]:
In [64]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/apr24/apr22read"
In [65]:
cd $dir
In [69]:
jobid="4685800"
keyword="Max Read"
tt='KNL Direct IO, File per Process'
dfknlr=ior_forjob(dir,jobid,keyword,tt)
In [70]:
jobid="4685784"
keyword="Max Read"
tt='HASWELL Direct IO, File per Process'
dfhasr=ior_forjob(dir,jobid,keyword,tt)
In [71]:
tt='Haswell vs. KNL Direct IO, File per Process, Read, Apr 25'
ior_plot_two(dfknlr,dfhasr,tt,step=2)
In [72]:
jobid="4685871"
keyword="read"
tt='KNL Buffered IO, File per Process,'
dfknlbr=ior_forjob(dir,jobid,keyword,tt)
keyword="Max Read"
tt='KNL Buffered IO, File per Process,'
dfknlbrmax=ior_forjob(dir,jobid,keyword,tt)
In [73]:
jobid="4685865"
keyword="read"
tt='HASWELL Buffered IO, File per Process,'
dfhasbr=ior_forjob(dir,jobid,keyword,tt)
keyword="Max Read"
tt='HASWELL Buffered IO, File per Process,'
dfhasbrmax=ior_forjob(dir,jobid,keyword,tt)
In [74]:
tt='Haswell vs. KNL Buffered IO, File per Process, Read, Apr 25'
ior_plot_two(dfknlbr,dfhasbr,tt,step=2)
In [75]:
tt='Haswell vs. KNL Buffered IO, File per Process, Max Read (3 times), Apr 25'
ior_plot_two(dfknlbrmax,dfhasbrmax,tt,step=2)
In [33]:
dir="/global/homes/j/jialin/heterogeneous-IO/cpu/script/ior/ior_test/apr24/apr22write"
In [34]:
cd $dir
In [ ]:
jobid="4685800"
keyword="Max Write"
tt='KNL Direct IO, File per Process'
dfknlw=ior_forjob(dir,jobid,keyword,tt)
jobid="4685784"
keyword="Max Write"
tt='HASWELL Direct IO, File per Process'
dfhasw=ior_forjob(dir,jobid,keyword,tt)
tt='Haswell vs. KNL Direct IO, File per Process, Write, Apr 25'
In [37]:
ior_plot_two(dfknlw,dfhasw,tt,step=2)
In [ ]:
jobid="4685871"
keyword="write"
tt='KNL Buffered IO, File per Process,'
dfknlbw=ior_forjob(dir,jobid,keyword,tt)
keyword="Max Write"
tt='KNL Buffered IO, File per Process,'
dfknlbwmax=ior_forjob(dir,jobid,keyword,tt)
jobid="4685865"
keyword="write"
tt='HASWELL Buffered IO, File per Process,'
dfhasbw=ior_forjob(dir,jobid,keyword,tt)
keyword="Max Write"
tt='HASWELL Buffered IO, File per Process,'
dfhasbwmax=ior_forjob(dir,jobid,keyword,tt)
In [41]:
tt='Haswell vs. KNL Buffered IO, File per Process, Write Once, Apr 25'
ior_plot_two(dfknlbw,dfhasbw,tt,step=2)
In [42]:
tt='Haswell vs. KNL Buffered IO, File per Process, Max Write (3 times), Apr 25'
ior_plot_two(dfknlbwmax,dfhasbwmax,tt,step=2)
In [48]:
tt='KNL Direct and Buffered IO, File per Process, Write Once, Apr 25'
ior_plot_two(dfknlbw,dfknlw,tt,label1="Buffered Write", label2="Direct Write")
In [49]:
tt='HASWELL Direct and Buffered IO, File per Process, Write Once, Apr 25'
ior_plot_two(dfhasbw,dfhasw,tt,label1="Buffered Write", label2="Direct Write")
In [76]:
tt='KNL Direct and Buffered IO, File per Process, Read Once, Apr 25'
ior_plot_two(dfknlbr,dfknlr,tt,label1="Buffered Read", label2="Direct Read")
In [77]:
tt='HASWELL Direct and Buffered IO, File per Process, Read Once, Apr 25'
ior_plot_two(dfhasbr,dfhasr,tt,label1="Buffered Read", label2="Direct Read")
In [ ]: