In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt
sys.path.append('../../')
import read_trace
import cuda_timeline
# from avgblkmodel import *
from ModelParam import *
import cke
from df_util import *
#from model_cke import *
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
In [3]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048
In [4]:
# 10M for mem_mem : where the h2d between streams are overlapped
trace_file = 'trace_10M_s1.csv'
trace_file_2cke = 'trace_h2d_h2d_ovlp.csv'
df_trace = read_trace.trace2dataframe(trace_file) # read the trace to the dataframe
df_trace_2cke = read_trace.trace2dataframe(trace_file_2cke)
In [5]:
#df_trace
In [6]:
#cuda_timeline.plot_trace(df_trace)
In [7]:
df_trace_2cke
Out[7]:
In [8]:
cuda_timeline.plot_trace(df_trace_2cke)
In [9]:
df_single_stream = read_trace.get_timing(df_trace)
In [10]:
df_single_stream
Out[10]:
In [11]:
df_s1 = read_trace.reset_starting(df_single_stream)
In [12]:
df_s1
Out[12]:
In [13]:
df_2stream = read_trace.get_timing(df_trace_2cke)
In [14]:
df_2stream
Out[14]:
In [15]:
tot_runtime = read_trace.getTotalRuntime(df_2stream)
print tot_runtime
In [16]:
stream_num = 2
# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431
df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)
In [17]:
df_cke_list[0]
Out[17]:
In [18]:
df_cke_list[1]
Out[18]:
In [19]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)
In [20]:
df_all_api
Out[20]:
In [21]:
count = 1
# break_count = 7
while not cke.AllDone(df_all_api):
# pick two api to learn
df_all_api, r1, r2 = cke.PickTwo(df_all_api)
if r1 == None and r2 == None: # go directly updating the last wake api
df_all_api = cke.UpdateStream_lastapi(df_all_api)
else:
df_all_api = cke.StartNext_byType(df_all_api, [r1, r2])
whichType = cke.CheckType(df_all_api, r1, r2) # check whether the same api
# print whichType
if whichType == None:
df_all_api = cke.Predict_noConflict(df_all_api, r1, r2)
elif whichType in ['h2d', 'd2h']: # data transfer in the same direction
df_all_api = cke.Predict_transferOvlp(df_all_api, r1, r2, ways = 2.0)
else: # concurrent kernel: todo
pass
# if count == break_count:
# break
rangeT = cke.Get_pred_range(df_all_api)
# print rangeT
# if count == break_count:
# break
extra_conc = cke.Check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
if extra_conc == 0:
if whichType in ['h2d', 'd2h']:
df_all_api = cke.Update_wake_transferOvlp(df_all_api, rangeT, ways = 2.0)
elif whichType == 'kern':
pass
else: # no overlapping
df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
# check if any api is done, and update the timing for the other apis in that stream
df_all_api = cke.UpdateStreamTime(df_all_api)
else: # todo : when there is additional overlapping
pass
# if count == break_count:
# break
# next call
count = count + 1
In [22]:
df_all_api
Out[22]:
In [23]:
df_all_api.loc[df_all_api.stream_id == 0]
Out[23]:
In [24]:
df_all_api.loc[df_all_api.stream_id == 1]
Out[24]:
In [25]:
#
# run above
#