In [1]:
%matplotlib inline
from sklearn.datasets.samples_generator import make_blobs
X, y = make_blobs(n_samples=400, centers=8, n_features=20,random_state=0)
from eden.util.display import plot_embeddings
plot_embeddings(X,y, knn=14, knn_density=4)
In [2]:
%matplotlib inline
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
from sklearn.preprocessing import normalize,scale
X = normalize(X)
labels=[str(y_val)+'_'+str(i) for i,y_val in enumerate(y)]
from eden.util.display import plot_embeddings
plot_embeddings(X,y, labels=None, knn=14, knn_density=8)
In [3]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [4]:
%%time
%matplotlib inline
from sklearn import datasets
digits = datasets.load_digits(n_class=6)
X = digits.data
y = digits.target
from sklearn.preprocessing import normalize,scale
X = normalize(X)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [5]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [6]:
def rfam_to_matrix(rfam_id, n_max=50):
def rfam_uri(family_id):
return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)
from eden.converter.fasta import fasta_to_sequence
seqs = fasta_to_sequence(rfam_uri(rfam_id))
from itertools import islice
seqs = islice(seqs,n_max)
from eden.converter.rna.rnafold import rnafold_to_eden
graphs = rnafold_to_eden(seqs)
from eden.graph import Vectorizer
vectorizer = Vectorizer( complexity=1 )
X = vectorizer.transform(graphs)
return X
def rfam_data(rfam_ids, n_max=50):
import numpy as np
from scipy.sparse import vstack
for i,rfam_id in enumerate(rfam_ids):
X_=rfam_to_matrix(rfam_id, n_max=n_max)
y_ = [i] * X_.shape[0]
if i==0:
X=X_
y=y_
else:
X = vstack([X, X_], format="csr")
y=y+y_
y = np.array(y)
return X,y
In [7]:
%%time
%matplotlib inline
rfam_ids=['RF00004','RF00005','RF00015','RF00020','RF00026','RF00169',
'RF00380','RF00386','RF01051','RF01055','RF01234','RF01699',
'RF01701','RF01705','RF01731','RF01734','RF01745','RF01750',
'RF01942','RF01998','RF02005','RF02012','RF02034']
X,y = rfam_data(rfam_ids[2:7],n_max=100)
from eden.util.display import plot_embeddings
plot_embeddings(X,y, save_image_file_name='RNA_2D_embedding.pdf')
In [11]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [8]:
%%time
%matplotlib inline
pos = 'bursi.pos.gspan'
neg = 'bursi.neg.gspan'
from eden.converter.graph.gspan import gspan_to_eden
from itertools import islice
n_max=500
iterable_pos = islice(gspan_to_eden( pos ),n_max)
iterable_neg = islice(gspan_to_eden( neg ),n_max)
from eden.graph import Vectorizer
vectorizer = Vectorizer( complexity=1 )
import numpy as np
from scipy.sparse import vstack
Xpos = vectorizer.transform(iterable_pos)
Xneg = vectorizer.transform(iterable_neg)
yp = [1] * Xpos.shape[0]
yn = [-1] * Xneg.shape[0]
y = np.array(yp + yn)
X = vstack([Xpos, Xneg], format="csr")
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [9]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [10]:
%%time
%matplotlib inline
#breast-cancer-wisconsin
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'
from eden.util import read
M=[]
labels=[]
for line in read(uri):
line = line.strip()
if line:
items = line.split(',')
label = str(items[1])
labels.append(label)
data = [float(x) for x in items[2:]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = scale(np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [11]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [12]:
%matplotlib inline
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
from eden.util import read
M=[]
labels=[]
for line in read(uri):
line = line.strip()
if line:
items = line.split(',')
label = int(items[0])
labels.append(label)
data = [float(x) for x in items[1:]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = scale(np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [13]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [14]:
%%time
%matplotlib inline
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
n_max=700
from eden.util import read
M=[]
labels=[]
counter = 0
for line in read(uri):
counter += 1
if counter > n_max:
break
line = line.strip()
if line:
items = line.split(',')
label = int(items[-1])//7
labels.append(label)
data = [float(x) for x in items[1:-1]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = scale(np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [15]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [16]:
%%time
%matplotlib inline
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data'
n_max=700
from eden.util import read
M=[]
labels=[]
counter = 0
for line in read(uri):
counter += 1
if counter > n_max:
break
line = line.strip()
if line:
items = line.split(',')
label = hash(items[-1])
labels.append(label)
data = [float(x) for x in items[:-1]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = scale(np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [17]:
%matplotlib inline
from eden.util.display import KernelQuickShiftTreeEmbedding
Xemb=KernelQuickShiftTreeEmbedding(X, knn=16, k_threshold=0.9, metric='rbf', gamma=1e-4)
from sklearn.cluster import KMeans
km = KMeans(init='k-means++', n_clusters=len(set(y)), n_init=10)
yp = km.fit_predict(Xemb)
from sklearn.metrics import adjusted_rand_score
print 'ARS:', adjusted_rand_score(y,yp)
from eden.util.display import plot_embeddings
plot_embeddings(X,yp)
In [18]:
%%time
%matplotlib inline
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data'
n_max=700
from eden.util import read
M=[]
labels=[]
counter = 0
for line in read(uri):
counter += 1
if counter > n_max:
break
line = line.strip()
if line:
items = line.split(',')
label = hash(items[-1])
labels.append(label)
data = [float(x) for x in items[:-1]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = (np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [19]:
%%time
%matplotlib inline
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00254/biodeg.csv'
n_max=700
from eden.util import read
M=[]
labels=[]
counter = 0
for line in read(uri):
counter += 1
if counter > n_max:
break
line = line.strip()
if line:
items = line.split(';')
label = hash(items[-1])
labels.append(label)
data = [float(x) for x in items[:-1]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = (np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [20]:
%%time
%matplotlib inline
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00230/plrx.txt'
n_max=700
from eden.util import read
M=[]
labels=[]
counter = 0
for line in read(uri):
counter += 1
if counter > n_max:
break
line = line.strip()
if line:
items = line.split('\t')
label = hash(items[-1])
labels.append(label)
data = [float(x) for x in items[:-1]]
M.append(data)
import numpy as np
from sklearn.preprocessing import normalize,scale
X = normalize(np.array(M))
y=np.array(labels)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)
In [21]:
%%time
%matplotlib inline
import numpy as np
def load_data(uri):
from eden.util import read
M=[]
labels=[]
counter = 0
for line in read(uri):
counter += 1
if counter > n_max:
break
line = line.strip()
if line:
items = line.split(' ')
label = hash(items[-1])
labels.append(label)
data = [float(x) for x in items[:-1]]
M.append(data)
X = np.array(M)
y=np.array(labels)
return X,y
for i,c in enumerate('abcdefghi'):
uri = 'http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/vehicle/xa%s.dat'%c
X_,y_ = load_data(uri)
if i==0:
X=X_
y=y_
else:
X=np.vstack((X,X_))
y=np.vstack((y,y_))
from sklearn.preprocessing import normalize,scale
#X = scale(X)
from eden.util.display import plot_embeddings
plot_embeddings(X,y)