In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt
sys.path.append('../')
import read_trace
import cuda_timeline
from avgblkmodel import *
import cke
from df_util import *
#from model_cke import *
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
In [3]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048
In [4]:
# 10M for mem_mem : where the h2d between streams are overlapped
trace_file = 'trace_10M_s1.csv'
trace_file_2cke = 'trace_h2d_h2d_ovlp.csv'
df_trace = read_trace.trace2dataframe(trace_file) # read the trace to the dataframe
df_trace_2cke = read_trace.trace2dataframe(trace_file_2cke)
In [5]:
#df_trace
In [6]:
#cuda_timeline.plot_trace(df_trace)
In [7]:
#df_trace_2cke
In [8]:
#cuda_timeline.plot_trace(df_trace_2cke)
In [9]:
df_single_stream = read_trace.get_timing(df_trace)
In [10]:
df_single_stream
Out[10]:
In [11]:
df_s1 = read_trace.reset_starting(df_single_stream)
In [12]:
#df_s1
In [13]:
df_2stream = read_trace.get_timing(df_trace_2cke)
In [14]:
df_2stream
Out[14]:
In [15]:
tot_runtime = read_trace.getTotalRuntime(df_2stream)
print tot_runtime
In [16]:
stream_num = 2
# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431
df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)
In [17]:
#df_cke_list[0]
In [18]:
#df_cke_list[1]
In [19]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)
In [20]:
#df_all_api
In [21]:
#df_all_api.loc[df_all_api.stream_id == 0]
In [22]:
# print df_all_api.iloc[0]
# print df_all_api.iloc[1]
In [23]:
#df_all_api
In [24]:
# pick the 1st sleep call and wake up
r1 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r1)
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r2 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r2)
#print('r2 {}'.format(r2))
# check concurrency
conc = cke.check_cc(df_all_api, r1, r2)
# concurrency
if conc == True:
df_all_api = cke.update_before_conc(df_all_api, r1, r2)
# set r2 to wake
df_all_api = SetWake(df_all_api, r2)
# current concurrency
cc = 2.0
# predict with concurrency for rows 0 and 1
df_all_api = cke.Predict_end(df_all_api, [r1, r2], ways = cc)
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = cc)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [25]:
df_all_api
Out[25]:
In [26]:
## select the next api/call to wake it up
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r3 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r3)
#print r3
# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext(df_all_api, [r2, r3])
# check the concurrency, and predict accordingly
# check concurrency
df_all_api = cke.Predict_checkCC(df_all_api, r2, r3)
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
#print extra_conc
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = 2.0)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [27]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r4 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r4)
#print r4
# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext(df_all_api, [r3, r4])
# check the concurrency, and predict accordingly
# check concurrency
df_all_api = cke.Predict_checkCC(df_all_api, r3, r4)
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
# print extra_conc
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = 2.0)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [28]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r5 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r5)
print r5
# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r4, r5])
# we need to check whether r4 and r5 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r4, r5)
if whichType == None:
# api type is different, there is no conflict
df_all_api = cke.Predict_noConflict(df_all_api, r4, r5)
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
#print extra_conc
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [29]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r6 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r6)
print r4
print r6
# Noted: we need to work on active wake api, use r4 and r6
df_all_api = cke.StartNext_checktype(df_all_api, [r4, r6])
# we need to check whether r4 and r5 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r4, r6)
print whichType
if whichType == None:
df_all_api = cke.Predict_noConflict(df_all_api, r4, r6) # api type is different, there is no conflict
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
# print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
# print extra_conc
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [30]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r7 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r7)
print r7
# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r6, r7])
# we need to check whether they are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r6, r7)
print whichType
if whichType == None:
df_all_api = cke.Predict_noConflict(df_all_api, r6, r7) # api type is different, there is no conflict
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print extra_conc
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [31]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r8 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r8)
print r8
# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r6, r8])
# we need to check whether r6 and r8 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
# if they are overlapping, use conflict version
whichType = cke.checkType(df_all_api, r6, r8)
print whichType
if whichType in ['h2d', 'd2h']:
df_all_api = cke.Predict_transferOvlp(df_all_api, r6, r8, ways = 2.0)
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
print rangeT
extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print extra_conc
if extra_conc == 0: # update timing using the pred_end
df_all_api = cke.Update_wake_transferOvlp(df_all_api, rangeT, ways = 2.0)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
In [32]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r9 = cke.pick_first_in_sleep(df_all_api)
print r9
# df_all_api = SetWake(df_all_api, r8)
# print r9
if r9 == None:
# work on current wake api (this is the last api call)
# current_wake
df_all_api = cke.UpdateStream_lastapi(df_all_api)
In [33]:
df_all_api
Out[33]:
In [34]:
#
# run above
#
In [35]:
df_all_api.loc[df_all_api.stream_id == 0]
Out[35]:
In [36]:
df_all_api.loc[df_all_api.stream_id == 1]
Out[36]:
In [37]:
cuda_timeline.plot_cke_list(df_cke_list, savefig=True)
In [38]:
cuda_timeline.plot_cke_list(df_cke_list[0:2])
In [39]:
tot_runtime = read_trace.getTotalRuntime(df_cke_list[0:2])
print tot_runtime