In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt

sys.path.append('../')

import read_trace
import cuda_timeline
from avgblkmodel import *
import cke
from df_util import *
#from model_cke import *

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

gpu info


In [3]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048

2 stream info


In [4]:
# 10M for mem_mem : where the h2d between streams are overlapped
trace_file = 'trace_10M_s1.csv'
trace_file_2cke = 'trace_h2d_h2d_ovlp.csv'

df_trace = read_trace.trace2dataframe(trace_file) # read the trace to the dataframe
df_trace_2cke = read_trace.trace2dataframe(trace_file_2cke)

In [5]:
#df_trace

In [6]:
#cuda_timeline.plot_trace(df_trace)

In [7]:
#df_trace_2cke

In [8]:
#cuda_timeline.plot_trace(df_trace_2cke)

1cke - read trace and reset the timeline


In [9]:
df_single_stream = read_trace.get_timing(df_trace)

In [10]:
df_single_stream


Out[10]:
stream api_type start end size duration
0 0.0 h2d 610.840271 617.277086 38146.973 6.436815
1 0.0 h2d 617.278302 623.708301 38146.973 6.429999
2 0.0 kern 623.719789 624.996407 0.000 1.276618
3 0.0 d2h 625.003191 631.272837 38146.973 6.269646

In [11]:
df_s1 = read_trace.reset_starting(df_single_stream)

In [12]:
#df_s1

2cke case


In [13]:
df_2stream = read_trace.get_timing(df_trace_2cke)

In [14]:
df_2stream


Out[14]:
stream api_type start end size duration
0 0.0 h2d 665.032627 675.815726 38146.973 10.783099
1 0.0 h2d 675.816942 688.730069 38146.973 12.913127
2 0.0 kern 688.741397 690.119293 0.000 1.377896
3 0.0 d2h 690.126461 700.784951 38146.973 10.658490
4 1.0 h2d 668.190628 681.089483 38146.973 12.898855
5 1.0 h2d 681.090667 691.303811 38146.973 10.213144
6 1.0 kern 691.316867 692.682987 0.000 1.366120
7 1.0 d2h 693.294222 702.817090 38146.973 9.522868

In [15]:
tot_runtime = read_trace.getTotalRuntime(df_2stream)
print tot_runtime


37.784463

2 cke


In [16]:
stream_num = 2

# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431

df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)

In [17]:
#df_cke_list[0]

In [18]:
#df_cke_list[1]

sort


In [19]:
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)

In [20]:
#df_all_api

In [21]:
#df_all_api.loc[df_all_api.stream_id == 0]

In [22]:
# print df_all_api.iloc[0]
# print df_all_api.iloc[1]

In [23]:
#df_all_api

In [24]:
# pick the 1st sleep call and wake up
r1 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r1)

# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r2 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r2)
#print('r2 {}'.format(r2))

# check concurrency
conc = cke.check_cc(df_all_api, r1, r2)

# concurrency
if conc == True:
    df_all_api = cke.update_before_conc(df_all_api, r1, r2)
    # set r2 to wake
    df_all_api = SetWake(df_all_api, r2)
    # current concurrency
    cc = 2.0
    # predict with concurrency for rows 0 and 1
    df_all_api = cke.Predict_end(df_all_api, [r1, r2], ways = cc)
    # get the time range from wake api, to check the next concurrent api
    rangeT = cke.Get_next_range(df_all_api)
    #print rangeT
    
    extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
    
    if extra_conc == 0: # update timing using the pred_end
        df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = cc)
        # check if any api is done, and update the timing for the other apis in that stream
        df_all_api = cke.UpdateStreamTime(df_all_api)

In [25]:
df_all_api


Out[25]:
start end api_type size_kb stream_id status bw bytes_done bytes_left current_pos time_left pred_end
0 0.000000 9.715199 h2d 38146.973 0.0 done 5926.373991 38146.97300 0.00000 9.715199 0.0 9.715199
4 3.158431 9.595246 h2d 38146.973 1.0 wake 5926.373991 19428.92967 18718.04333 9.715199 0.0 0.000000
1 9.716415 16.146414 h2d 38146.973 0.0 sleep 5932.656133 0.00000 38146.97300 0.000000 0.0 0.000000
5 9.596462 16.026461 h2d 38146.973 1.0 sleep 5932.656133 0.00000 38146.97300 0.000000 0.0 0.000000
2 16.157902 17.434520 kern 0.000 0.0 sleep 0.000000 0.00000 0.00000 0.000000 0.0 0.000000
3 17.441304 23.710950 d2h 38146.973 0.0 sleep 6084.390251 0.00000 38146.97300 0.000000 0.0 0.000000
6 16.037949 17.314567 kern 0.000 1.0 sleep 0.000000 0.00000 0.00000 0.000000 0.0 0.000000
7 17.321351 23.590997 d2h 38146.973 1.0 sleep 6084.390251 0.00000 38146.97300 0.000000 0.0 0.000000

In [26]:
## select the next api/call to wake it up

# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r3 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r3)
#print r3

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext(df_all_api, [r2, r3])

# check the concurrency, and predict accordingly
# check concurrency
df_all_api = cke.Predict_checkCC(df_all_api, r2, r3)

# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
#print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = 2.0)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)

In [27]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r4 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r4)
#print r4

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext(df_all_api, [r3, r4])

# check the concurrency, and predict accordingly
# check concurrency
df_all_api = cke.Predict_checkCC(df_all_api, r3, r4)

# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
# print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = 2.0)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)

In [28]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r5 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r5)
print r5

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r4, r5])

# we need to check whether r4 and r5 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r4, r5)

if whichType == None:
    # api type is different, there is no conflict
    df_all_api = cke.Predict_noConflict(df_all_api, r4, r5)
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
#print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)


2

In [29]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r6 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r6)
print r4
print r6

# Noted: we need to work on active wake api, use r4 and r6
df_all_api = cke.StartNext_checktype(df_all_api, [r4, r6])

# we need to check whether r4 and r5 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r4, r6)
print whichType

if whichType == None:
    df_all_api = cke.Predict_noConflict(df_all_api, r4, r6) # api type is different, there is no conflict
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
# print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
# print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)


5
3
None

In [30]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r7 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r7)
print r7

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r6, r7])

# we need to check whether they are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r6, r7)
print whichType

if whichType == None:
    df_all_api = cke.Predict_noConflict(df_all_api, r6, r7) # api type is different, there is no conflict
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
print rangeT    

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)


6
None
[25.745115999999939, 27.021733999999924]
0

In [31]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r8 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r8)
print r8

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r6, r8])

# we need to check whether r6 and r8 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
# if they are overlapping, use conflict version
whichType = cke.checkType(df_all_api, r6, r8)
print whichType

if whichType in ['h2d', 'd2h']:
    df_all_api = cke.Predict_transferOvlp(df_all_api, r6, r8, ways = 2.0)
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
print rangeT    

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_transferOvlp(df_all_api, rangeT, ways = 2.0)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)


7
d2h
[27.028517999999963, 33.250947999999894]
0

In [32]:
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r9 = cke.pick_first_in_sleep(df_all_api)
print r9
# df_all_api = SetWake(df_all_api, r8)
# print r9

if r9 == None:
    # work on current wake api (this is the last api call)
    # current_wake
    df_all_api = cke.UpdateStream_lastapi(df_all_api)


None

In [33]:
df_all_api


Out[33]:
start end api_type size_kb stream_id status bw bytes_done bytes_left current_pos time_left pred_end
0 0.000000 9.715199 h2d 38146.973 0.0 done 5926.373991 38146.973 0.0 9.715199 0.0 9.715199
4 3.158431 16.030845 h2d 38146.973 1.0 done 5926.373991 38146.973 0.0 16.030845 0.0 16.030845
1 9.716415 22.575197 h2d 38146.973 0.0 done 5932.656133 38146.973 0.0 22.575197 0.0 22.575197
5 16.032061 25.733628 h2d 38146.973 1.0 done 5932.656133 38146.973 0.0 25.733628 0.0 25.733628
2 22.586685 23.863303 kern 0.000 0.0 done 0.000000 0.000 0.0 23.863303 0.0 23.863303
3 23.870087 33.250948 d2h 38146.973 0.0 done 6084.390251 38146.973 0.0 33.250948 0.0 33.250948
6 25.745116 27.021734 kern 0.000 1.0 done 0.000000 0.000 0.0 27.021734 0.0 27.021734
7 27.028518 36.409379 d2h 38146.973 1.0 done 6084.390251 38146.973 0.0 36.409379 0.0 36.409379

In [34]:
#
# run above
#

In [35]:
df_all_api.loc[df_all_api.stream_id == 0]


Out[35]:
start end api_type size_kb stream_id status bw bytes_done bytes_left current_pos time_left pred_end
0 0.000000 9.715199 h2d 38146.973 0.0 done 5926.373991 38146.973 0.0 9.715199 0.0 9.715199
1 9.716415 22.575197 h2d 38146.973 0.0 done 5932.656133 38146.973 0.0 22.575197 0.0 22.575197
2 22.586685 23.863303 kern 0.000 0.0 done 0.000000 0.000 0.0 23.863303 0.0 23.863303
3 23.870087 33.250948 d2h 38146.973 0.0 done 6084.390251 38146.973 0.0 33.250948 0.0 33.250948

In [36]:
df_all_api.loc[df_all_api.stream_id == 1]


Out[36]:
start end api_type size_kb stream_id status bw bytes_done bytes_left current_pos time_left pred_end
4 3.158431 16.030845 h2d 38146.973 1.0 done 5926.373991 38146.973 0.0 16.030845 0.0 16.030845
5 16.032061 25.733628 h2d 38146.973 1.0 done 5932.656133 38146.973 0.0 25.733628 0.0 25.733628
6 25.745116 27.021734 kern 0.000 1.0 done 0.000000 0.000 0.0 27.021734 0.0 27.021734
7 27.028518 36.409379 d2h 38146.973 1.0 done 6084.390251 38146.973 0.0 36.409379 0.0 36.409379

3cke

plot all the stream timeline


In [37]:
cuda_timeline.plot_cke_list(df_cke_list, savefig=True)


/Users/leiming/anaconda2/lib/python2.7/site-packages/matplotlib/axes/_base.py:1292: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if aspect == 'normal':
/Users/leiming/anaconda2/lib/python2.7/site-packages/matplotlib/axes/_base.py:1297: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif aspect in ('equal', 'auto'):

In [38]:
cuda_timeline.plot_cke_list(df_cke_list[0:2])



In [39]:
tot_runtime = read_trace.getTotalRuntime(df_cke_list[0:2])
print tot_runtime


20.432566