In [94]:
import urllib2
import StringIO
import gzip
import timeit
baseURL = "http://www.pathwaycommons.org/archives/PC2/current/"
filename = "PathwayCommons.8.All.BINARY_SIF.hgnc.txt.sif.gz"
outFilePath = "pc.sif"
interaction_types_ppi = set(["interacts-with","in-complex-with","neighbor-of"])
start_time = timeit.default_timer()
# read the compressed SIF data into memory
response = urllib2.urlopen(baseURL + filename)
compressedFile = StringIO.StringIO(response.read())
decompressedFile = gzip.GzipFile(fileobj=compressedFile)
# initialize the SIF file interaction counter
intctr = 0
linectr = 0
from collections import defaultdict
interactions = set()
proteins = set()
intnamectr = defaultdict(int)
# go through the SIF file data, line by line
while True:
line = decompressedFile.readline()
if not line:
break
if linectr < 6:
print line
linectr += 1
[prot1, interaction_type, prot2] = line.rstrip("\n").split("\t")
intnamectr[interaction_type] += 1
if interaction_type in interaction_types_ppi:
intctr += 1
proteins |= set([prot1, prot2])
interactions.add(min(prot1, prot2) + "-" + max(prot1, prot2))
elapsed = timeit.default_timer() - start_time
print elapsed
In [95]:
print intctr
In [96]:
len(proteins)
Out[96]:
In [97]:
len(interactions)
Out[97]:
In [98]:
from operator import itemgetter
sorted(intnamectr.items(), key=itemgetter(1), reverse=True)
Out[98]:
In [ ]: