In [1]:
%matplotlib inline
import os
import sys
module_path = os.path.abspath(os.path.join('src/python'))
if module_path not in sys.path:
sys.path.append(module_path)
print(sys.version)
print(sys.path)
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import word2vec
from word2vec import Word2VecImpl
from word2vec import Word2VecAPI
c_stats = word2vec.clstr_stats
stats = word2vec.nn_stats
plot = word2vec.plot
pca = word2vec.pca
n_clstr = 5
In [2]:
w2v = Word2VecImpl(5)
checkpoint = torch.load('../../data/trained/w2v_sg_best.tar')
w2v.load_state_dict(checkpoint['state_dict'])
api = Word2VecAPI(w2v)
print(stats(api))
print(c_stats(api, n_clstr))
plot(pca(api.embeddings))
In [3]:
w2v = Word2VecImpl(5)
checkpoint = torch.load('../../data/trained/w2v_cbow_best.tar')
w2v.load_state_dict(checkpoint['state_dict'])
api = Word2VecAPI(w2v)
print(stats(api))
print(c_stats(api, n_clstr))
plot(pca(api.embeddings))
In [4]:
w2v = Word2VecImpl(5)
checkpoint = torch.load('../../data/trained/w2v_sf_best.tar')
w2v.load_state_dict(checkpoint['state_dict'])
api = Word2VecAPI(w2v)
print(stats(api))
print(c_stats(api, n_clstr))
plot(pca(api.embeddings))
In [7]:
from src.python.aa_predict import GoodOldCNN
w2v = GoodOldCNN(5, 10)
checkpoint = torch.load('../../data/trained/aapred_cnn_latest.tar', map_location=lambda storage, loc: storage)
w2v.load_state_dict(checkpoint['state_dict'])
api = Word2VecAPI(w2v)
print(stats(api))
print(c_stats(api, n_clstr))
plot(pca(api.embeddings))
In [10]:
from gensim.models.word2vec import Word2Vec
model_filename = "../../models/kmer/uniprot_1-mer_dim20_win5_mc2.emb"
w2v = Word2Vec.load(model_filename)
print(c_stats(w2v.wv, n_clstr))
print(w2v.similarity('A', 'V'))
print(w2v.similarity('R', 'L'))
print(w2v.similarity('F', 'D'))
print(w2v.similarity('H', 'C'))
model_filename = "../../models/kmer/uniprot_1-mer_dim20_win10_mc2.emb"
w2v = Word2Vec.load(model_filename)
print(c_stats(w2v.wv, n_clstr))
print(w2v.similarity('A', 'V'))
print(w2v.similarity('R', 'L'))
print(w2v.similarity('F', 'D'))
print(w2v.similarity('H', 'C'))
In [11]:
import numpy as np
a = [[1, 2], [3, 4]]
np.lib.pad(a, ((0, 2), (0, 0)), mode='constant', constant_values=0.)
Out[11]:
In [ ]: