load a target file
In [1]:
    
import requests
import numpy as np
target_path = open('/home/alsheikm/GitDir/EeDN_work/StoExamples/output/data.target', 'r')
for line in target_path:
    targets = line.split('\n')
    y = np.array(int(line.strip())) #for target in targets if target])
    
load data and convert it to graphs
In [2]:
    
from eden.converter.graph.gspan import gspan_to_eden
graphs = gspan_to_eden('http://www.bioinf.uni-freiburg.de/~costa/bursi.gspan')
    
setup the vectorizer
In [3]:
    
from eden.graph import Vectorizer
vectorizer = Vectorizer(complexity=3)
    
extract features and build data matrix
In [4]:
    
%%time
X = vectorizer.transform(graphs)
print 'Instances: %d Features: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1],  X.getnnz()/X.shape[0])
    
    
induce a predictor and evaluate its performance
In [5]:
    
%%time
#induce a predictive model
from sklearn.linear_model import SGDClassifier
predictor = SGDClassifier(average=True, class_weight='balanced', shuffle=True, n_jobs=-1)
from sklearn import cross_validation
scores = cross_validation.cross_val_score(predictor, X, y, cv=10, scoring='roc_auc')
import numpy as np
print('AUC ROC: %.4f +- %.4f' % (np.mean(scores),np.std(scores)))
    
    
In [ ]: