Importing an edge list into Titan 0.9.0-M1(Berkeley DB) w/TP3 Gremlin Server 3.0.0-M7 and Python


In [1]:
from datetime import datetime
from gizmo import AsyncGremlinClient

In [2]:
def build_schema():
    script = """
        mgmt = g.openManagement();
        uniqueId = mgmt.makePropertyKey('uniqueId').dataType(Integer.class).make();
        mgmt.buildIndex('byId', Vertex.class).addKey(uniqueId).unique().buildCompositeIndex();
        mgmt.makeEdgeLabel('collabs').make();
        mgmt.commit();"""
    gc = AsyncGremlinClient()
    t = gc.s(script,
        consumer=lambda x: print("Commited tx with response code: {}".format(x.status_code)))
    t.execute()
    

def load_edges():
    start = datetime.now()
    script = """     
        getOrCreate = { id ->
            def n = g.V().has('uniqueId', id)
            if (n.hasNext()) {n.next()} else {g.addVertex("uniqueId", id)}
        }

        new File('social_net.txt').eachLine {  
            (source, target) = it.split('\t').collect(getOrCreate)
            source.addEdge('collabs', target)
        }

        g.tx().commit()"""
    gc = AsyncGremlinClient()
    t = gc.s(script,
        consumer=lambda x: print("Commited tx with response code: {}".format(x.status_code)))
    t.execute()
    print("Loaded in {}".format(datetime.now() - start))


def count_nodes(gc):
    t = gc.s("g.V().count()", collect=False, consumer=lambda x: print(x))
    t.execute()


def count_edges(gc):
    t = gc.s("g.E().count()", collect=False, consumer=lambda x: print(x))
    t.execute()

In [3]:
build_schema()


Commited tx with response code: 200

In [4]:
load_edges()


Commited tx with response code: 200
Loaded in 0:00:17.949163

In [5]:
gc = AsyncGremlinClient()

In [6]:
count_nodes(gc)


[16264]

In [7]:
count_edges(gc)


[47594]

In [9]:
print("{} edges per second".format(47594 / 17.949))


2651.62404590785 edges per second