In [94]:
import urllib2
import StringIO
import gzip
import timeit

baseURL = "http://www.pathwaycommons.org/archives/PC2/current/"
filename = "PathwayCommons.8.All.BINARY_SIF.hgnc.txt.sif.gz"
outFilePath = "pc.sif"
interaction_types_ppi = set(["interacts-with","in-complex-with","neighbor-of"])

start_time = timeit.default_timer()

# read the compressed SIF data into memory
response = urllib2.urlopen(baseURL + filename)
compressedFile = StringIO.StringIO(response.read())
decompressedFile = gzip.GzipFile(fileobj=compressedFile)

# initialize the SIF file interaction counter
intctr = 0
linectr = 0
from collections import defaultdict

interactions = set()
proteins = set()
intnamectr = defaultdict(int)

# go through the SIF file data, line by line
while True:

    line = decompressedFile.readline()
    if not line:
        break

    if linectr < 6:
        print line
    linectr += 1
    
    [prot1, interaction_type, prot2] = line.rstrip("\n").split("\t")
    intnamectr[interaction_type] += 1
    if interaction_type in interaction_types_ppi:
        intctr += 1
        proteins |= set([prot1, prot2])
        interactions.add(min(prot1, prot2) + "-" + max(prot1, prot2))       
        
elapsed = timeit.default_timer() - start_time

print elapsed


A1BG	controls-expression-of	A2M

A1BG	interacts-with	ABCC6

A1BG	controls-phosphorylation-of	AKT1

A1BG	controls-state-change-of	AKT1

A1BG	interacts-with	ANXA7

A1BG	interacts-with	CDKN1A

6.16754293442

In [95]:
print intctr


523498

In [96]:
len(proteins)


Out[96]:
17020

In [97]:
len(interactions)


Out[97]:
491784

In [98]:
from operator import itemgetter
sorted(intnamectr.items(), key=itemgetter(1), reverse=True)


Out[98]:
[('interacts-with', 369895),
 ('in-complex-with', 153603),
 ('chemical-affects', 135268),
 ('catalysis-precedes', 120948),
 ('controls-expression-of', 110013),
 ('controls-state-change-of', 106156),
 ('controls-production-of', 18482),
 ('consumption-controlled-by', 16816),
 ('controls-phosphorylation-of', 15636),
 ('used-to-produce', 13705),
 ('controls-transport-of', 6960),
 ('reacts-with', 3607),
 ('controls-transport-of-chemical', 2847)]

In [ ]: