scheme:
In [1]:
%load_ext autoreload
%autoreload 2
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt
sys.path.append('../../')
import cuda_timeline
import read_trace
import avgblk
import cke
from model_param import *
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)
In [2]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048
In [3]:
# init SM resources
SM_resList, SM_traceList = init_gpu(gtx950)
In [4]:
SM_resList[0]
Out[4]:
In [5]:
SM_traceList[0]
Out[5]:
In [6]:
trace_s1 = 'trace_s1_5m.csv'
df_trace_s1 = read_trace.Trace2dataframe(trace_s1)
trace_s3 = 'trace_s3_5m.csv'
df_trace_s3 = read_trace.Trace2dataframe(trace_s3)
In [7]:
#df_trace_s1
In [8]:
cuda_timeline.plot_trace(df_trace_s1)
In [9]:
#cuda_timeline.plot_trace(df_trace_s2)
In [10]:
cuda_timeline.plot_trace(df_trace_s3)
In [11]:
df_3stream = read_trace.Get_timing_from_trace(df_trace_s3)
tot_runtime = read_trace.GetTotalRuntime(df_3stream)
print tot_runtime
In [12]:
# extract kernel info from trace
# warning: currently lmted to one kernel
kernel = read_trace.GetKernelInfo(df_trace_s1, gtx950)
Dump_kernel_info(kernel)
In [13]:
# for each stream, have a dd for each kernel
stream_kernel_list = []
stream_num = 3
for sid in range(stream_num):
#print sid
# key will be the kernel order
# value will be the kernel info
kern_dd = {}
kern_dd[0] = Copy_kernel_info(kernel)
stream_kernel_list.append(kern_dd)
Dump_kernel_info(stream_kernel_list[0][0])
In [14]:
df_s1_trace_timing = read_trace.Get_timing_from_trace(df_trace_s1)
df_s1 = read_trace.Reset_starting(df_s1_trace_timing)
In [15]:
df_s1
Out[15]:
In [16]:
# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431
df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)
In [17]:
#df_cke_list[0]
In [18]:
#df_cke_list[1]
In [19]:
#df_cke_list[2]
In [20]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)
In [21]:
df_all_api
Out[21]:
In [22]:
simPos = 0.0
more_streams = stream_num - 1
active_stream_dd = {}
for s in range(stream_num):
active_stream_dd[s] = None
Dump_dd(active_stream_dd)
round_count = 1
while not cke.AllDone(df_all_api):
#
# check whether there is any call in sleep
# if not, which means all wake(or done), finish them all and break the while loop
if NoMoreSleepCalls(df_all_api):
print('no more sleep calls')
df_all_api = FinishRestWakeCalls(df_all_api)
break
#if round_count == 11: break
df_all_api, r1, r1_stream = cke.pick_base_call(df_all_api)
#
# wake it up if r1 is in sleep
if GetInfo(df_all_api, r1, 'status') == 'sleep': df_all_api = SetWake(df_all_api, r1)
#
# if r1 no in the pool add it
if active_stream_dd[r1_stream] == None: active_stream_dd[r1_stream] = r1
print('\n------------\n\n new round ({}) => row {}, stream-id {}'.format(round_count, r1, r1_stream))
Dump_dd(active_stream_dd)
print('simPos {}'.format(simPos))
#if round_count == 11: break
#
# simulation position: simPos should be ahead of r1_start for wake call
#r1_start = GetInfo(df_all_api, r1, 'start')
#if r1_start >= simPos: simPos = r1_start
#print('simulation position : {}'.format(simPos))
#if round_count == 2: break
prev_row = r1
for i in range(0, more_streams):
#if round_count == 4 and i == 0: break
df_all_api, r2, r2_stream = cke.start_next_call(df_all_api, prev_row)
print('=> pick row {}, stream-id {}'.format(r2, r2_stream))
#if round_count == 10 and i == 1: break
#
# check r2 is already running, if yes,continue to pick next
if active_stream_dd[r2_stream] == r2:
print('=> picked row {} is already running. select next'.format(r2))
prev_row = r2
#if round_count == 8 and i == 0: break
continue
#if round_count == 3 and i == 0: break
#if round_count == 2: break
# current position
curPos = GetInfo(df_all_api, r2, 'start')
print('current position : {}'.format(curPos))
Dump_dd(active_stream_dd)
#if round_count == 10 and i == 1: break
#if round == 2 and i == 0: break
if active_stream_dd[r2_stream] == None:
active_stream_dd[r2_stream] = r2
# update trace during the range
print('simPos {} curPos {}'.format(simPos, curPos))
df_all_api,SM_resList, SM_traceList = cke.update_by_range(df_all_api, simPos, curPos,
gtx950,
SM_resList, SM_traceList,
stream_kernel_list)
#
# find the kernel execution time from the sm trace table
#result_kernel_runtime_dd = avgblk.Get_KernTime(SM_traceList)
#print result_kernel_runtime_dd
Dump_dd(active_stream_dd)
#print GetInfo(df_all_api, 2, 'pred_end')
#print GetInfo(df_all_api, 6, 'pred_end')
#if round_count == 8 and i == 1: break
else:
#
# there is stream api ahead, find out which call and terminate it
row_2nd = Find_prevapi_samestream(df_all_api, r2, r2_stream)
print('end prev api call at row {}'.format(row_2nd))
#if round_count == 10 and i == 1: break
#
# end the target row, update the bytes for other call
df_all_api = cke.end_target_row(df_all_api, row_2nd, simPos, curPos)
#if round_count == 9 and i == 1: break
#
# update curPos, since r2 start has been shifted right
curPos = GetInfo(df_all_api, r2, 'start')
df_all_api = UpdateCell(df_all_api, r2, 'current_pos', curPos)
print('row {}, Updated current position : {}'.format(r2, curPos))
#if round_count == 9 and i == 1: break
#
# assume there is no ovlp between row_2nd end and r2_start
row2nd_end = GetInfo(df_all_api, row_2nd, 'end')
df_all_api = cke.move_wake_for_coming_call(df_all_api, row2nd_end, curPos)
#
# update the count for active stream pool: remove row2nd, add r2 to the pool
active_stream_dd[r2_stream] = r2
#Dump_dd(active_stream_dd)
#if round_count == 9 and i == 1: break
#
# shift right
simPos = curPos
#
# update prev_row
prev_row = r2
#
# dump active dd
#Dump_dd(active_stream_dd)
#if i == 0: break
#if round_count == 6 and i == 1: break
#
# end of For loop
# dump active dd
Dump_dd(active_stream_dd)
print('simPos {} curPos {}'.format(simPos, curPos))
#if round_count == 11: break
#
#
# check whether the active pool is full, if yes, terminate the api that ends soon
df_all_api, active_stream_dd, simPos = cke.check_activestream_and_update(df_all_api, active_stream_dd, simPos)
print active_stream_dd
#if round_count == 11: break
round_count += 1
In [23]:
df_all_api
Out[23]:
In [24]:
#
# run above
#