This notebook contains a detailed example, demonstrating the typical workflow Graft aims to support. The dataset used here was constructed by splicing together two separate datasets:
SOCR Data MLB HeightsWeights: Heights, ages and weights of Baseball players (Vertex Data). References:Advogato Trust Network : Edge weights between 0 and 1. References:The dataset has 6541 vertices, 51127 edges. Vertex properties: Age, Height(cm), Weight(kg) Edge properties : Trust(float)
In [1]:
## Load and summarize the graph.
using Graft
using StatsBase
import LightGraphs
# Load the graph
download(
"https://raw.githubusercontent.com/pranavtbhat/Graft.jl/gh-pages/Datasets/baseball.txt",
joinpath(Pkg.dir("Graft"), "examples/baseball.txt")
);
In [2]:
g = loadgraph(joinpath(Pkg.dir("Graft"), "examples/baseball.txt"))
Out[2]:
In [3]:
# Get the graph's size
size(g)
Out[3]:
In [ ]:
# Get an iterator over the graph's edges
edges(g)
In [ ]:
# List vertex labels
encode(g)
In [12]:
# Split the graph into vertex and edge descriptors
V,E = g;
In [13]:
# Display the vertex table
V
Out[13]:
In [14]:
# Display the edge table
E
Out[14]:
In [15]:
# Find the average BMI of baseball players
@query(g |> eachvertex(v.Weight / (v.Height / 100) ^ 2)) |> mean
Out[15]:
In [16]:
# Find the median height of baseball players in their 20s
@query(g |> filter(v.Age < 30,v.Age >= 20) |> eachvertex(v.Height * 0.0328084)) |> median
Out[16]:
In [17]:
# Find the mean age difference in strong relationships
@query(g |> filter(e.Trust > 0.8) |> eachedge(s.Age - t.Age)) |> abs |> mean
Out[17]:
In [18]:
# Find fred's 3 hop neighborhood (friends and friends-of-friends and so on)
fred_nhood = hopgraph(g, "fred", 3)
Out[18]:
In [19]:
# See how well younger players in fred's neighborhood trust each other
@query(fred_nhood |> filter(v.Age > 30) |> eachedge(e.Trust)) |> mean
Out[19]:
In [20]:
# Find the 2 hop neighborhood of 2 separate vertices (multi seed traversal)
sg = hopgraph(g, ["nikolay", "jbert"], 3)
Out[20]:
In [22]:
# Generate an edge distance property on the inverse of normalized-trust
dists = @query(sg |> eachedge(1 / e.Trust ));
seteprop!(sg, :, dists, :Dist);
In [23]:
# Trim edges of very high distance
sg = @query(sg |> filter(e.Dist < 10))
Out[23]:
In [24]:
# Export the graph's adjacency matrix
M = export_adjacency(sg)
lg = LightGraphs.DiGraph(M)
Out[24]:
In [26]:
# Export the edge distance property
D = export_edge_property(sg, :Dist);
In [ ]:
# Compute betweenness centrailty
centrality = LightGraphs.betweenness_centrality(lg)
In [29]:
# Set the centrality as a vertex property
setvprop!(sg, :, centrality, :Centrality);
In [30]:
# Apply all pair shortest paths on the graph
apsp = LightGraphs.floyd_warshall_shortest_paths(lg, D).dists;
In [31]:
# Add the new shortest paths as a property to the graph
eit = edges(sg);
seteprop!(sg, :, [apsp[e.second,e.first] for e in eit], :Shortest_Dists);
In [32]:
# Show new vertex descriptor
VertexDescriptor(sg)
Out[32]:
In [33]:
# Show the new edge descriptor
EdgeDescriptor(sg)
Out[33]: