projx with Neo4j and the Flickr group membership graph.


In [1]:
%load_ext cypher

In [2]:
import projx as px

Load Flickr membership graph from edgelist to a Neo4j database.


In [4]:
# Flicker group bipartite network from http://konect.uni-koblenz.de/networks/flickr-groupmemberships.
# 395,979 users, 103,631 groups, 8,545,307 edges (memberships).
flickergroup_etl = {
    "extractor": {
        "edgelist": {
            "filename": "data/flickr-groupmemberships/out.flickr-groupmemberships",
            "delim": " ",
            "pattern": [
                {"node": {"alias": "n"}},
                {"edge": {}},
                {"node": {"alias": "m"}}
            ]
        }
    },
    "transformers": [
        {
            "edge": {
                "pattern": [
                    {"node": {"alias": "n", "label": "User"}},
                    {"edge": {"label": "IN"}},
                    {"node": {"alias": "m", "label": "Group"}}
                ]
            }
        }
    ],
    "loader": {
        "edgelist2neo4j": {
            "uri": "http://localhost:7474/db/data",
            "stmt_per_req": 500,
            "req_per_tx": 25,
            "indicies": [
                {"label": "User", "attr": "UniqueId"},
                {"label": "Group", "attr": "UniqueId"}
            ]
        }
    }
}

In [5]:
px.execute_etl(flickergroup_etl)


Statements per request: 500
Requests per transactions: 25
Created index: CREATE INDEX ON :User(UniqueId);
Created index: CREATE INDEX ON :Group(UniqueId);
Merged 1000000 edges in 0:12:19.132064
Merged 2000000 edges in 0:23:51.167390
Merged 3000000 edges in 0:34:52.905085
Merged 4000000 edges in 0:45:31.490539
Merged 5000000 edges in 0:56:09.081776
Merged 6000000 edges in 1:06:26.836744
Merged 7000000 edges in 1:17:13.032958
Merged 8000000 edges in 1:27:56.693161
Load complete: merged 8545000 edges in 1:33:23.530656

In [6]:
# This uses ipython-cypher by @versae.
num_users = %cypher match (user:User) return count(user)
num_groups = %cypher match (group:Group) return count(group)
num_rels = %cypher match (user:User)-[rels:IN]->(group:Group) return count(rels)


1 rows affected.
1 rows affected.
1 rows affected.

In [7]:
print num_users, num_groups, num_rels


+-------------+
| count(user) |
+-------------+
|    395979   |
+-------------+ +--------------+
| count(group) |
+--------------+
|    103631    |
+--------------+ +-------------+
| count(rels) |
+-------------+
|   8545307   |
+-------------+