In [1]:
import numpy as np
from numba import cuda
from MyML.graph.mst import boruvka_minho_seq, boruvka_minho_gpu
from MyML.graph.mst import memSet, compute_cuda_grid_dim
from MyML.graph.connected_components import connected_comps_seq as getLabels_seq,\
                                            connected_comps_gpu as getLabels_gpu
from MyML.graph.build import getGraphFromEdges_gpu

from numbapro.cudalib.sorting import RadixSort


/home/chiroptera/anaconda/lib/python2.7/site-packages/numba/cuda/decorators.py:106: UserWarning: autojit is deprecated and will be removed in a future release. Use jit instead.
  warn('autojit is deprecated and will be removed in a future release. Use jit instead.')

The RadixSort.argsort() sorts the input array and outputs an array containing the indices of the sort. In the end the input array is sorted!


In [36]:
4e7*4/1024/1024


Out[36]:
152.587890625

In [43]:
a=np.random.randint(0,1e4,1e6)
dA = cuda.to_device(a)

In [42]:
del a

In [44]:
@cuda.reduce
def argmax_gpu(a,b):
    if a >= b:
        return a
    else:
        return b

In [45]:
%time a.max()


CPU times: user 0 ns, sys: 3.43 ms, total: 3.43 ms
Wall time: 1.81 ms
Out[45]:
9999

In [47]:
%time argmax_gpu(dA)


CPU times: user 9.39 ms, sys: 142 µs, total: 9.53 ms
Wall time: 8.64 ms
Out[47]:
9999

In [3]:
sorter = RadixSort(maxcount=dA.size, dtype=dA.dtype)

In [4]:
dRes = sorter.argsort(dA)

In [5]:
res = dRes.copy_to_host()

In [6]:
a


Out[6]:
array([9370830, 9329392, 5628464, ...,  693366, 2640363, 6979016])

In [7]:
res


Out[7]:
array([32651, 27703, 86290, ..., 94277, 87817, 80868], dtype=uint32)

sl mst lifetime gpu


In [48]:
weight = dA

In [36]:
MAX_TPB = 256
myStream = 0

Compute the MST for the input graph.


In [26]:
# get MST
mst, n_edges = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=MAX_TPB)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-93d7cb749dc5> in <module>()
----> 1 mst, n_edges = boruvka_minho_gpu(dest, weight, firstEdge, outDegree, MAX_TPB=MAX_TPB)

NameError: name 'boruvka_minho_gpu' is not defined

Allocate array for the mst weights.


In [ ]:
h_n_edges = n_edges.getitem(0)
mst_weights = cuda.device_array(h_n_edges, dtype = weight.dtype)

Get array with only the considered weights in the MST.


In [ ]:
mstGrid = compute_cuda_grid_dim(h_n_edges, MAX_TPB)
getWeightsOfEdges_gpu[mstGrid, MAX_TPB, myStream](mst, n_edges, weight, mst_weights)

Sort the MST weights. There are no repeted edges at this point since the output MST is like a directed graph.


In [ ]:
RadixSort.sort

In [31]:
sorter = RadixSort(maxcount = mst_weights.size, dtype = mst_weights.dtype)
sortedWeightArgs = sorter.argsort(mst_weights)

Allocate array for the lifetimes.


In [34]:
lifetimes = cuda.device_array(mst_weights.size - 1, dtype = nweight.dtype)

In [39]:
compute_lifetimes_CUDA[lifetimeGrid, MAX_TPB, myStream](nweight, lifetimes)

Between the above cell and the next, I have to get the argmax of lifetimes. I also have to sort the mst with the argsort from the weights. Don't forget to update the number of n_edges. Send those arrays to build the resulting graph.


In [ ]:
# get contracted graph from MST
ndest, nweight, nfe, nod = getGraphFromEdges_gpu(dest, weight, fe, od, edges = mst, n_edges = n_edges,\
                                                 MAX_TPB = MAX_TPB, stream = myStream)

In [ ]:


In [ ]:


In [30]:
nweight = dA

In [38]:
lifetimeGrid = compute_cuda_grid_dim(lifetimes.size, MAX_TPB)

In [49]:
@cuda.jit
def getWeightsOfEdges_gpu(edges, n_edges, weights, nweights):
    """
    This function will take a list of edges (edges), the number of edges to consider (n_edges,
    the weights of all the possible edges (weights) and the array for the weights of the list
    of edges and put the weight of each edge in the list of edges in the nweights, in the same
    position.
    """
    n_edges_sm = cuda.shared.array(1, dtype = np.int32)
    edge = cuda.grid(1)
    
    if edge == 0:
        n_edges_sm[0] = n_edges[0]
    
    if edge >= edges.size:
        return
    
    cuda.syncthreads()
    
    nweights[edge] = weights[edges[edge]]

In [35]:
@cuda.jit
def compute_lifetimes_CUDA(nweight, lifetimes):
    edge = cuda.grid(1)
    
    if edge >= lifetimes.size:
        return
    
    lifetimes[edge] = nweight[edge + 1] - nweight[edge]

In [32]:
res = sortedWeightArgs.copy_to_host()

In [33]:
res


Out[33]:
array([76114, 42708, 19995, ..., 69567, 66341, 81165], dtype=uint32)