In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg, add the modules
import operator # sorting
from math import *
import matplotlib.pyplot as plt

sys.path.append('../')

import read_trace
import cuda_timeline
from avgblkmodel import *
import cke
from df_util import *
#from model_cke import *

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

gpu info



In [3]:

    
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048

2 stream info



In [4]:

    
# 10M for mem_mem : where the h2d between streams are overlapped
trace_file = 'trace_10M_s1.csv'
trace_file_2cke = 'trace_h2d_h2d_ovlp.csv'

df_trace = read_trace.trace2dataframe(trace_file) # read the trace to the dataframe
df_trace_2cke = read_trace.trace2dataframe(trace_file_2cke)



In [5]:

    
#df_trace



In [6]:

    
#cuda_timeline.plot_trace(df_trace)



In [7]:

    
#df_trace_2cke



In [8]:

    
#cuda_timeline.plot_trace(df_trace_2cke)

1cke - read trace and reset the timeline



In [9]:

    
df_single_stream = read_trace.get_timing(df_trace)



In [10]:

    
df_single_stream



In [11]:

    
df_s1 = read_trace.reset_starting(df_single_stream)



In [12]:

    
#df_s1

2cke case



In [13]:

    
df_2stream = read_trace.get_timing(df_trace_2cke)



In [14]:

    
df_2stream



In [15]:

    
tot_runtime = read_trace.getTotalRuntime(df_2stream)
print tot_runtime

2 cke



In [16]:

    
stream_num = 2

# find when to start the stream and update the starting pos for the trace
H2D_H2D_OVLP_TH = 3.158431

df_cke_list = cke.init_trace_list(df_s1, stream_num = stream_num, h2d_ovlp_th = H2D_H2D_OVLP_TH)



In [17]:

    
#df_cke_list[0]



In [18]:

    
#df_cke_list[1]

sort



In [19]:

    
df_all_api = cke.init_sort_api_with_extra_cols(df_cke_list)



In [20]:

    
#df_all_api



In [21]:

    
#df_all_api.loc[df_all_api.stream_id == 0]



In [22]:

    
# print df_all_api.iloc[0]
# print df_all_api.iloc[1]



In [23]:

    
#df_all_api



In [24]:

    
# pick the 1st sleep call and wake up
r1 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r1)

# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r2 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r2)
#print('r2 {}'.format(r2))

# check concurrency
conc = cke.check_cc(df_all_api, r1, r2)

# concurrency
if conc == True:
    df_all_api = cke.update_before_conc(df_all_api, r1, r2)
    # set r2 to wake
    df_all_api = SetWake(df_all_api, r2)
    # current concurrency
    cc = 2.0
    # predict with concurrency for rows 0 and 1
    df_all_api = cke.Predict_end(df_all_api, [r1, r2], ways = cc)
    # get the time range from wake api, to check the next concurrent api
    rangeT = cke.Get_next_range(df_all_api)
    #print rangeT
    
    extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
    
    if extra_conc == 0: # update timing using the pred_end
        df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = cc)
        # check if any api is done, and update the timing for the other apis in that stream
        df_all_api = cke.UpdateStreamTime(df_all_api)



In [25]:

    
df_all_api









    Out[25]:






  
    
      
      start
      end
      api_type
      size_kb
      stream_id
      status
      bw
      bytes_done
      bytes_left
      current_pos
      time_left
      pred_end
    
  
  
    
      0
      0.000000
      9.715199
      h2d
      38146.973
      0.0
      done
      5926.373991
      38146.97300
      0.00000
      9.715199
      0.0
      9.715199
    
    
      4
      3.158431
      9.595246
      h2d
      38146.973
      1.0
      wake
      5926.373991
      19428.92967
      18718.04333
      9.715199
      0.0
      0.000000
    
    
      1
      9.716415
      16.146414
      h2d
      38146.973
      0.0
      sleep
      5932.656133
      0.00000
      38146.97300
      0.000000
      0.0
      0.000000
    
    
      5
      9.596462
      16.026461
      h2d
      38146.973
      1.0
      sleep
      5932.656133
      0.00000
      38146.97300
      0.000000
      0.0
      0.000000
    
    
      2
      16.157902
      17.434520
      kern
      0.000
      0.0
      sleep
      0.000000
      0.00000
      0.00000
      0.000000
      0.0
      0.000000
    
    
      3
      17.441304
      23.710950
      d2h
      38146.973
      0.0
      sleep
      6084.390251
      0.00000
      38146.97300
      0.000000
      0.0
      0.000000
    
    
      6
      16.037949
      17.314567
      kern
      0.000
      1.0
      sleep
      0.000000
      0.00000
      0.00000
      0.000000
      0.0
      0.000000
    
    
      7
      17.321351
      23.590997
      d2h
      38146.973
      1.0
      sleep
      6084.390251
      0.00000
      38146.97300
      0.000000
      0.0
      0.000000



In [26]:

    
## select the next api/call to wake it up

# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r3 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r3)
#print r3

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext(df_all_api, [r2, r3])

# check the concurrency, and predict accordingly
# check concurrency
df_all_api = cke.Predict_checkCC(df_all_api, r2, r3)

# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
#print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = 2.0)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)



In [27]:

    
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r4 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r4)
#print r4

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext(df_all_api, [r3, r4])

# check the concurrency, and predict accordingly
# check concurrency
df_all_api = cke.Predict_checkCC(df_all_api, r3, r4)

# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
# print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_with_pred_end(df_all_api, rangeT, ways = 2.0)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)



In [28]:

    
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r5 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r5)
print r5

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r4, r5])

# we need to check whether r4 and r5 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r4, r5)

if whichType == None:
    # api type is different, there is no conflict
    df_all_api = cke.Predict_noConflict(df_all_api, r4, r5)
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
#print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
#print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)



In [29]:

    
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r6 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r6)
print r4
print r6

# Noted: we need to work on active wake api, use r4 and r6
df_all_api = cke.StartNext_checktype(df_all_api, [r4, r6])

# we need to check whether r4 and r5 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r4, r6)
print whichType

if whichType == None:
    df_all_api = cke.Predict_noConflict(df_all_api, r4, r6) # api type is different, there is no conflict
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
# print rangeT

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
# print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)









    



5
3
None



In [30]:

    
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r7 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r7)
print r7

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r6, r7])

# we need to check whether they are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
whichType = cke.checkType(df_all_api, r6, r7)
print whichType

if whichType == None:
    df_all_api = cke.Predict_noConflict(df_all_api, r6, r7) # api type is different, there is no conflict
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
print rangeT    

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_noConflict(df_all_api, rangeT)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)









    



6
None
[25.745115999999939, 27.021733999999924]
0



In [31]:

    
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r8 = cke.pick_first_in_sleep(df_all_api)
df_all_api = SetWake(df_all_api, r8)
print r8

# the current_pos is ahead of coming call start: move to the next start
df_all_api = cke.StartNext_checktype(df_all_api, [r6, r8])

# we need to check whether r6 and r8 are the same api type
# if not, there will be no conflict (in this case, we can directly predict their end time)
# if they are overlapping, use conflict version
whichType = cke.checkType(df_all_api, r6, r8)
print whichType

if whichType in ['h2d', 'd2h']:
    df_all_api = cke.Predict_transferOvlp(df_all_api, r6, r8, ways = 2.0)
    
# get the time range from wake api, to check the next concurrent api
rangeT = cke.Get_next_range(df_all_api)
print rangeT    

extra_conc = cke.check_cc_by_time(df_all_api, rangeT) # check whether there is conc during the rangeT
print extra_conc

if extra_conc == 0: # update timing using the pred_end
    df_all_api = cke.Update_wake_transferOvlp(df_all_api, rangeT, ways = 2.0)
    # check if any api is done, and update the timing for the other apis in that stream
    df_all_api = cke.UpdateStreamTime(df_all_api)









    



7
d2h
[27.028517999999963, 33.250947999999894]
0



In [32]:

    
# pick another in the sleep mode, if it is from the same stream, there is non ovlp
r9 = cke.pick_first_in_sleep(df_all_api)
print r9
# df_all_api = SetWake(df_all_api, r8)
# print r9

if r9 == None:
    # work on current wake api (this is the last api call)
    # current_wake
    df_all_api = cke.UpdateStream_lastapi(df_all_api)









    



None



In [33]:

    
df_all_api









    Out[33]:






  
    
      
      start
      end
      api_type
      size_kb
      stream_id
      status
      bw
      bytes_done
      bytes_left
      current_pos
      time_left
      pred_end
    
  
  
    
      0
      0.000000
      9.715199
      h2d
      38146.973
      0.0
      done
      5926.373991
      38146.973
      0.0
      9.715199
      0.0
      9.715199
    
    
      4
      3.158431
      16.030845
      h2d
      38146.973
      1.0
      done
      5926.373991
      38146.973
      0.0
      16.030845
      0.0
      16.030845
    
    
      1
      9.716415
      22.575197
      h2d
      38146.973
      0.0
      done
      5932.656133
      38146.973
      0.0
      22.575197
      0.0
      22.575197
    
    
      5
      16.032061
      25.733628
      h2d
      38146.973
      1.0
      done
      5932.656133
      38146.973
      0.0
      25.733628
      0.0
      25.733628
    
    
      2
      22.586685
      23.863303
      kern
      0.000
      0.0
      done
      0.000000
      0.000
      0.0
      23.863303
      0.0
      23.863303
    
    
      3
      23.870087
      33.250948
      d2h
      38146.973
      0.0
      done
      6084.390251
      38146.973
      0.0
      33.250948
      0.0
      33.250948
    
    
      6
      25.745116
      27.021734
      kern
      0.000
      1.0
      done
      0.000000
      0.000
      0.0
      27.021734
      0.0
      27.021734
    
    
      7
      27.028518
      36.409379
      d2h
      38146.973
      1.0
      done
      6084.390251
      38146.973
      0.0
      36.409379
      0.0
      36.409379



In [34]:

    
#
# run above
#



In [35]:

    
df_all_api.loc[df_all_api.stream_id == 0]









    Out[35]:






  
    
      
      start
      end
      api_type
      size_kb
      stream_id
      status
      bw
      bytes_done
      bytes_left
      current_pos
      time_left
      pred_end
    
  
  
    
      0
      0.000000
      9.715199
      h2d
      38146.973
      0.0
      done
      5926.373991
      38146.973
      0.0
      9.715199
      0.0
      9.715199
    
    
      1
      9.716415
      22.575197
      h2d
      38146.973
      0.0
      done
      5932.656133
      38146.973
      0.0
      22.575197
      0.0
      22.575197
    
    
      2
      22.586685
      23.863303
      kern
      0.000
      0.0
      done
      0.000000
      0.000
      0.0
      23.863303
      0.0
      23.863303
    
    
      3
      23.870087
      33.250948
      d2h
      38146.973
      0.0
      done
      6084.390251
      38146.973
      0.0
      33.250948
      0.0
      33.250948



In [36]:

    
df_all_api.loc[df_all_api.stream_id == 1]









    Out[36]:






  
    
      
      start
      end
      api_type
      size_kb
      stream_id
      status
      bw
      bytes_done
      bytes_left
      current_pos
      time_left
      pred_end
    
  
  
    
      4
      3.158431
      16.030845
      h2d
      38146.973
      1.0
      done
      5926.373991
      38146.973
      0.0
      16.030845
      0.0
      16.030845
    
    
      5
      16.032061
      25.733628
      h2d
      38146.973
      1.0
      done
      5932.656133
      38146.973
      0.0
      25.733628
      0.0
      25.733628
    
    
      6
      25.745116
      27.021734
      kern
      0.000
      1.0
      done
      0.000000
      0.000
      0.0
      27.021734
      0.0
      27.021734
    
    
      7
      27.028518
      36.409379
      d2h
      38146.973
      1.0
      done
      6084.390251
      38146.973
      0.0
      36.409379
      0.0
      36.409379

3cke

plot all the stream timeline



In [37]:

    
cuda_timeline.plot_cke_list(df_cke_list, savefig=True)









    



/Users/leiming/anaconda2/lib/python2.7/site-packages/matplotlib/axes/_base.py:1292: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if aspect == 'normal':
/Users/leiming/anaconda2/lib/python2.7/site-packages/matplotlib/axes/_base.py:1297: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif aspect in ('equal', 'auto'):



In [38]:

    
cuda_timeline.plot_cke_list(df_cke_list[0:2])



In [39]:

    
tot_runtime = read_trace.getTotalRuntime(df_cke_list[0:2])
print tot_runtime

	api_type	start	end	size	duration
0	h2d	610.840271	617.277086	38146.973	6.436815
1	h2d	617.278302	623.708301	38146.973	6.429999
2	kern	623.719789	624.996407	0.000	1.276618
3	d2h	625.003191	631.272837	38146.973	6.269646

	stream	api_type	start	end	size	duration
0	0.0	h2d	665.032627	675.815726	38146.973	10.783099
1	0.0	h2d	675.816942	688.730069	38146.973	12.913127
2	0.0	kern	688.741397	690.119293	0.000	1.377896
3	0.0	d2h	690.126461	700.784951	38146.973	10.658490
4	1.0	h2d	668.190628	681.089483	38146.973	12.898855
5	1.0	h2d	681.090667	691.303811	38146.973	10.213144
6	1.0	kern	691.316867	692.682987	0.000	1.366120
7	1.0	d2h	693.294222	702.817090	38146.973	9.522868

	start	end	api_type	size_kb	stream_id	status	bw	bytes_done	bytes_left	current_pos	pred_end
0	0.000000	9.715199	h2d	38146.973	0.0	done	5926.373991	38146.97300	0.00000	9.715199	9.715199
4	3.158431	9.595246	h2d	38146.973	1.0	wake	5926.373991	19428.92967	18718.04333	9.715199	0.000000
1	9.716415	16.146414	h2d	38146.973	0.0	sleep	5932.656133	0.00000	38146.97300	0.000000	0.000000
5	9.596462	16.026461	h2d	38146.973	1.0	sleep	5932.656133	0.00000	38146.97300	0.000000	0.000000
2	16.157902	17.434520	kern	0.000	0.0	sleep	0.000000	0.00000	0.00000	0.000000	0.000000
3	17.441304	23.710950	d2h	38146.973	0.0	sleep	6084.390251	0.00000	38146.97300	0.000000	0.000000
6	16.037949	17.314567	kern	0.000	1.0	sleep	0.000000	0.00000	0.00000	0.000000	0.000000
7	17.321351	23.590997	d2h	38146.973	1.0	sleep	6084.390251	0.00000	38146.97300	0.000000	0.000000

	start	end	api_type	size_kb	stream_id	status	bw	bytes_done	current_pos	pred_end
4	3.158431	16.030845	h2d	38146.973	1.0	done	5926.373991	38146.973	16.030845	16.030845
5	16.032061	25.733628	h2d	38146.973	1.0	done	5932.656133	38146.973	25.733628	25.733628
6	25.745116	27.021734	kern	0.000	1.0	done	0.000000	0.000	27.021734	27.021734
7	27.028518	36.409379	d2h	38146.973	1.0	done	6084.390251	38146.973	36.409379	36.409379