In [4]:
using MetadataTools
using Interact
In [5]:
pkgs = get_all_pkg() # Returns a Dict{String,PkgMeta}
@manipulate for pkg_name in ["JuMP","DataArrays","BinDeps"]
pkgs[pkg_name]
end
Out[5]:
We can check that maximum supported Julia version using get_upper_limit
- useful for checking if a package is deprecated.
In [6]:
get_upper_limit(get_pkg("Monads"))
Out[6]:
In [10]:
get_upper_limit(get_pkg("DataFrames"))
Out[10]:
We can also requestion information about a package from GitHub (or wherever it is hosted - only GitHub needed right now!)
In [11]:
gadfly_info = get_pkg_info(get_pkg("Gadfly"))
Base.isless(a::MetadataTools.Contributor,b::MetadataTools.Contributor) =
isless(a.username,b.username)
@manipulate for top_x in 1:20
sort(gadfly_info.contributors, rev=true)[1:top_x]
end
Out[11]:
I pulled all the data a week or so and serialized it for later use.
In [12]:
f = open("20140904_metadatatools.jldata","r")
pkg_info = deserialize(f)
close(f)
pkg_info["Dates"]
Out[12]:
In [13]:
# Calculate commits stats
total_coms = Dict()
total_pkgs = Dict()
for pkg in values(pkg_info)
for contrib in pkg.contributors
commits, c = contrib
total_coms[c.username] = get(total_coms,c.username,0) + commits
total_pkgs[c.username] = get(total_pkgs,c.username,0) + 1
end
end
# Turn dicts into sorted (num,username) vectors
total_pkgs = sort([(total_pkgs[n],n) for n in keys(total_pkgs)],rev=true)
total_coms = sort([(total_coms[n],n) for n in keys(total_coms)],rev=true)
println("Number of packages contributed to")
map(println, total_pkgs[1:20])
println("Number of commits across all packages")
map(println, total_coms[1:20]);
In [14]:
using Graphs
# Get a directed graph where PkgA -> PkgB iff
# PkgA directly requires PkgB
g = get_pkgs_dep_graph(get_all_pkg())
Out[14]:
In [15]:
g_gadfly = get_pkg_dep_graph(get_pkg("Gadfly"),g)
Out[15]:
To plot the dependency graph for a package, we can use my GraphLayout.jl package which uses Compose.jl internally for drawing. I haven't got around to adding Graphs.jl support to GraphLayout.jl just yet though...
In [18]:
using GraphLayout
for pkg_name in ["Gadfly","QuantEcon","JuMP","Twitter"]
# Extract graph
g_pkg = get_pkg_dep_graph(get_pkg(pkg_name),g)
# Extract adjacency matrix
adj_mat = adjacency_matrix(g_pkg)
# Build layout
locs_x,locs_y = layout_spring_adj(adj_mat)
# Extract name for each vertex
vert_names = map(pm->pm.name, vertices(g_pkg))
# Draw as an SVG
draw_layout_adj(adj_mat, locs_x, locs_y, labels=vert_names)
end
We can also look at which packages depend on the most packages
In [17]:
num_pkg_req = [
(num_vertices(get_pkg_dep_graph(pkg, g)), pkg.name)
for pkg in values(pkgs)]
sort!(num_pkg_req, rev=true) # Sort descending
println("Top 10 packages by number of packages depended on:")
for i in 1:10
println(rpad(num_pkg_req[i][2],20," "), num_pkg_req[i][1]-1)
end
We can also reverse the graph - now an arc from PkgA to PkgB means PkgB requires PkgA
In [19]:
g_rev = get_pkgs_dep_graph(pkgs, reverse=true)
# Count size of every subgraphs like above
num_pkg_req = [
(num_vertices(get_pkg_dep_graph(pkg, g_rev)), pkg.name)
for pkg in values(pkgs)]
sort!(num_pkg_req, rev=true) # Sort descending
println("Top 10 packages by number of packages that depend on them:")
for i in 1:10
println(rpad(num_pkg_req[i][2],20," "), num_pkg_req[i][1]-1)
end
In [ ]: