In [1]:
include("../util.jl")
include("../io.jl")
include("../graph.jl")
include("../pr.jl")
include("../rw.jl")

using Statistics

In [2]:
@info("loading graph")
core = adjlist(UInt32, is_directed=true)
load_mgs3_graph(core, "../datasets/Arxiv_HEP-PH/Arxiv_HEP-PH_core.mgs")
#load_mgs4_graph(core, "../datasets/Arxiv_HEP-PH/Arxiv_HEP-PH_core.mgz")

In [3]:
@info("getting rcore")
rcore = get_reverse_graph(core)

@info("transforming core from adj to inc list")
core2 = get_inclist_from_adjlist(core)
#serialize_to_jld(core2, "core", "Arxiv_HEP-PH_core_inclist")


┌ Info: getting rcore
└ @ Main In[3]:1
┌ Info: transforming core from adj to inc list
└ @ Main In[3]:4
Out[3]:
Directed Graph (12711 vertices, 139981 edges)

In [4]:
@info("computing Djikstra from vertex 1")
dists = ones(num_edges(core2))
s = convert(UInt32,1)
t = convert(UInt32,100)
n = num_vertices(core)
@time r = dijkstra_shortest_paths(core2, dists, s)


┌ Info: computing Djikstra from vertex 1
└ @ Main In[4]:1
┌ Warning: `mutable_binary_minheap(::Type{T}) where T` is deprecated, use `MutableBinaryMinHeap{T}()` instead.
│   caller = create_dijkstra_states(::GenericIncidenceList{UInt32,Edge{UInt32},Array{UInt32,1},Array{Array{Edge{UInt32},1},1}}, ::Type{Float64}) at dijkstra_spath.jl:33
└ @ Graphs /home/jimmy/.julia/packages/Graphs/Oo0uj/src/dijkstra_spath.jl:33
  1.553710 seconds (1.40 M allocations: 64.550 MiB, 1.22% gc time)
Out[4]:
DijkstraStates{UInt32,Float64,MutableBinaryHeap{Graphs.DijkstraHEntry{UInt32,Float64},DataStructures.LessThan},Int64}(UInt32[0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000004, 0x00000004, 0x00000003, 0x00000005  …  0x00002f80, 0x000022c2, 0x00002eb5, 0x00000df6, 0x00001501, 0x00001194, 0x00001986, 0x00002ceb, 0x00002da2, 0x000028ad], [1, 1, 1, 1, 1, 1, 4, 4, 3, 5  …  12160, 8898, 11957, 3574, 5377, 4500, 6534, 11499, 11682, 10413], [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0  …  18.0, 19.0, 17.0, 9.0, 8.0, 15.0, 18.0, 19.0, 22.0, 8.0], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  2, 2, 2, 2, 2, 2, 2, 2, 2, 2], MutableBinaryHeap(), [0, 1, 2, 3, 4, 5, 14, 15, 12, 13  …  10855, 11267, 10286, 4423, 2875, 8515, 11033, 11426, 12138, 3542])

In [5]:
@info("mean distance from vertex 1: ", mean(r.dists))
paths = enumerate_paths(vertices(core2), r.parent_indices, [10,100])
@info("shortest paths from vertex 1 to vertices 10 and 100: ", convert(Array{Array{Int64,1},1}, paths))


┌ Info: mean distance from vertex 1: 
│   mean(r.dists) = 12.33286130123515
└ @ Main In[5]:1
┌ Info: shortest paths from vertex 1 to vertices 10 and 100: 
│   convert(Array{Array{Int64, 1}, 1}, paths) = Array{Int64,1}[[1, 5, 10], [1, 4, 7, 215, 1359, 100]]
└ @ Main In[5]:3

In [6]:
@info("computing Bellman-Ford from vertex 1")
@time r = bellman_ford_shortest_paths(core2, dists, UInt32[1])


┌ Info: computing Bellman-Ford from vertex 1
└ @ Main In[6]:1
  0.757690 seconds (436.73 k allocations: 17.399 MiB, 1.34% gc time)
Out[6]:
BellmanFordStates{UInt32,Float64}(UInt32[0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000004, 0x00000004, 0x00000003, 0x00000005  …  0x00002f80, 0x000022c2, 0x00002eb5, 0x00000df6, 0x00001501, 0x00001194, 0x00001986, 0x00002ceb, 0x00002da2, 0x000028ad], [1, 1, 1, 1, 1, 1, 4, 4, 3, 5  …  12160, 8898, 11957, 3574, 5377, 4500, 6534, 11499, 11682, 10413], [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0  …  18.0, 19.0, 17.0, 9.0, 8.0, 15.0, 18.0, 19.0, 22.0, 8.0])

In [7]:
@info("mean distance from vertex 1: ", mean(r.dists))
paths = enumerate_paths(vertices(core2), r.parent_indices, [10,100])
@info("shortest paths from vertex 1 to vertices 10 and 100: ", convert(Array{Array{Int64,1},1}, paths))


┌ Info: mean distance from vertex 1: 
│   mean(r.dists) = 12.33286130123515
└ @ Main In[7]:1
┌ Info: shortest paths from vertex 1 to vertices 10 and 100: 
│   convert(Array{Array{Int64, 1}, 1}, paths) = Array{Int64,1}[[1, 5, 10], [1, 4, 7, 215, 1359, 100]]
└ @ Main In[7]:3

In [9]:
# NB: does not work. Error is 'type UInt32 has no field index'
#@info("computing A* from node 1 to vertex 100")
#@time r = shortest_path(core2, dists, s, t)
#@info("shortest path from vertex 1 to vertex 100: ", r)

In [10]:
@info("getting P matrix for core and rcore")
P_core = get_sparse_P_matrix(core)
P_rcore = get_sparse_P_matrix(rcore)


┌ Info: getting P matrix for core and rcore
└ @ Main In[10]:1
Out[10]:
12711×12711 SparseMatrixCSC{Float64,UInt32} with 139981 stored entries:
  [2    ,     1]  =  0.125
  [3    ,     1]  =  0.0333333
  [4    ,     1]  =  0.333333
  [5    ,     1]  =  0.125
  [6    ,     1]  =  0.111111
  [11   ,     2]  =  0.0181818
  [12   ,     2]  =  0.0117647
  [13   ,     2]  =  0.0384615
  [9    ,     3]  =  0.00909091
  [2    ,     4]  =  0.125
  [7    ,     4]  =  0.0140845
  [8    ,     4]  =  1.0
  ⋮
  [5538 , 12709]  =  0.0163934
  [6579 , 12709]  =  0.0322581
  [7836 , 12709]  =  0.0769231
  [7837 , 12709]  =  0.0344828
  [7838 , 12709]  =  0.047619
  [7840 , 12709]  =  0.0526316
  [7849 , 12709]  =  0.0588235
  [7859 , 12709]  =  0.142857
  [8326 , 12709]  =  0.25
  [7367 , 12710]  =  0.0833333
  [7788 , 12710]  =  0.333333
  [8642 , 12711]  =  0.125

In [11]:
@info("computing Pagerank of core and rcore")
eps = 1e-4
#@time pr_core = PR(core, rcore, epsilon=eps)
#@time pr_rcore = PR(rcore, core, epsilon=eps)
@time pr_core = PR(P_core, epsilon=eps)
@time pr_rcore = PR(P_rcore, epsilon=eps)


┌ Info: computing Pagerank of core and rcore
└ @ Main In[11]:1
  0.645507 seconds (1.52 M allocations: 143.262 MiB, 4.24% gc time)
  0.050199 seconds (499 allocations: 62.924 MiB, 4.19% gc time)
Out[11]:
12711-element Array{Float64,1}:
 2.3506964650411283e-5 
 1.3191413443360207e-5 
 1.2708125781439715e-5 
 2.458289022013108e-5  
 1.3946375784855819e-5 
 1.585286447402359e-5  
 1.3228512296329774e-5 
 1.3202423281355177e-5 
 0.00011675083360173247
 1.4702379825133946e-5 
 1.5560337979231746e-5 
 1.3615841593588844e-5 
 3.100862587783376e-5  
 ⋮                     
 2.7115581298810245e-5 
 1.2899968839647104e-5 
 5.617321927161728e-5  
 1.2350273743955862e-5 
 1.896742702826373e-5  
 2.410345302407722e-5  
 7.770864623129471e-5  
 1.3970388111074435e-5 
 1.4415255470883909e-5 
 2.273973208510039e-5  
 1.736890154128226e-5  
 1.3206918517903125e-5 

In [12]:
@info("computing personalized Pageranks for nodes 1 and 100")
#@time pr_s = PPR(s, core, rcore, epsilon=eps)
#@time pr_t = PPR(t, rcore, core, epsilon=eps)
ppr_s = zeros(Float64,n)
ppr_s[s] = 1.
@time pr_s = PR(P_core, ppr=ppr_s, epsilon=1e-10)
ppr_t = zeros(Float64,n)
ppr_t[t] = 1.
@time pr_t = PR(P_rcore, ppr=ppr_t, epsilon=1e-10)


┌ Info: computing personalized Pageranks for nodes 1 and 100
└ @ Main In[12]:1
  0.217154 seconds (16.10 k allocations: 281.306 MiB, 7.71% gc time)
  0.240526 seconds (2.59 k allocations: 339.572 MiB, 4.65% gc time)
Out[12]:
12711-element Array{Float64,1}:
 1.1788265515358563e-8 
 8.072683281701687e-9  
 1.6580750143672115e-8 
 3.5985528437723655e-8 
 1.414443983577478e-9  
 1.2131004557771423e-9 
 2.8733066538121923e-6 
 8.577225986868396e-10 
 2.1457441361247574e-6 
 7.779520517177703e-9  
 7.310123732551845e-12 
 1.141723445703547e-13 
 2.4692564505614307e-7 
 ⋮                     
 3.686105154360519e-8  
 2.394202281468971e-9  
 1.8935007586698442e-8 
 1.3592252785873656e-13
 7.858348221012546e-11 
 2.0176700331222436e-7 
 5.20747614897535e-10  
 1.4714645441595853e-8 
 4.8877227293150425e-8 
 2.163904669296991e-9  
 9.425332763297654e-7  
 2.0628076663653054e-18

In [13]:
@info("computing diffusion fingerprints")
df = pr_s .* pr_t
pr_boost = pr_core .* pr_rcore
df_boost = df ./ pr_boost


┌ Info: computing diffusion fingerprints
└ @ Main In[13]:1
Out[13]:
12711-element Array{Float64,1}:
 6.175579439588858     
 0.4264694104968008    
 0.43029102217018755   
 1.9841322848590288    
 0.11241826120938118   
 0.0645886707977994    
 1.367708429272806     
 0.027399489696976847  
 0.4070095397477239    
 0.16677431317126673   
 6.078528099619864e-6  
 1.2244525025940415e-7 
 0.6122671009066754    
 ⋮                     
 5.3335299319671955e-21
 2.670420141079053e-22 
 5.723645350213881e-21 
 5.6003403799701095e-27
 2.2491104283117916e-21
 3.2568150151026073e-7 
 2.4002968795242596e-8 
 4.732754564957352e-17 
 1.0088513139408395e-21
 1.5691388131444544e-22
 9.594110258527804e-21 
 8.103246055303954e-16 

In [14]:
# max length
ml = log(length(vertices(core)))
# current vertex
cv = s
# current path
sp = UInt32[]

@info("finding shortest path (greedy approach)")

# greedy approach
while length(sp) < ml
	global cv
	@debug("adding vertex: ", cv)
	# add new vertex to the path
	push!(sp,cv)
	# get neighbors not already in the path
	nei = out_neighbors(cv,core)
	nnei = setdiff(nei,sp)
	# we reached a dead end...
	if length(nnei) == 0
		@info("--- exploration reached a dead end")
		@info("--- explored path: ", sp)
		break
	end
	# find the neighbor with highest DF value
	cv = nnei[findmax(df[nnei])[2]]
	#cv = nei[findmax(df_boost[nnei])[2]]

	# we found a path
	if cv == t
		@info("path found: ", sp)
		push!(sp,t)
		break
	end
end

@info("shortest path: ", convert(Array{Int64,1}, sp))


┌ Info: finding shortest path (greedy approach)
└ @ Main In[14]:8
┌ Info: path found: 
│   sp = UInt32[0x00000001, 0x00000004, 0x00000007, 0x000000d7, 0x0000054f]
└ @ Main In[14]:31
┌ Info: shortest path: 
│   convert(Array{Int64, 1}, sp) = [1, 4, 7, 215, 1359, 100]
└ @ Main In[14]:37
┌ Info: finding shortest path (probabilistic approach)
└ @ Main In[14]:38

In [15]:
@info("finding shortest path (probabilistic approach)")

# array of paths
sps = Array{Array{UInt32,1},1}()
# current path
sp = UInt32[]
cv = s

# search in a probabilistic way shortest paths between s and t
c = 0
max_iter = 1e3

while c < max_iter
	global c
	while length(sp) < ml 
		global sp, cv
		@debug("adding vertex: ", cv)
		push!(sp,cv)
		nei = out_neighbors(cv,core)
		nnei = setdiff(nei,sp)
		if length(nnei) == 0
			@debug("--- exploration reached a dead end")
			@debug("--- explored path: ", sp)
			sp = UInt32[]
			cv = s
			break
		end
		pos = get_flying_index(df[nnei] / sum(df[nnei]))
		cv = nnei[pos]

		# we found a path
		if cv == t
			@debug("path found: ", sp)
			push!(sp,t)
			if !(sp in sps)
				push!(sps,sp)
			end
			sp = UInt32[]
			cv = s
			break
		end
	end
	c += 1
end

@info("shortest paths: ", convert(Array{Array{Int64,1},1}, sps))


┌ Info: finding shortest path (probabilistic approach)
└ @ Main In[15]:1
┌ Info: shortest paths: 
│   convert(Array{Array{Int64, 1}, 1}, sps) = Array{Int64,1}[]
└ @ Main In[15]:46

In [ ]: