In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
from gensim import corpora, models, similarities
from gensim.utils import simple_preprocess, lemmatize
from gensim.models.doc2vec import TaggedDocument,Doc2Vec
from sklearn.multiclass import OneVsRestClassifier,OneVsOneClassifier
from sklearn.cross_validation import cross_val_score, train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from gensim import models
from gensim.models.ldamodel import LdaModel
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords
from os import listdir
import pandas as pd
import numpy as np
import codecs
import re
from sklearn.learning_curve import learning_curve
In [ ]:
# TF-IDF, LDA, LSI, RPM, HDP
class MyCorpus(object):
def __init__(self,path):
self.path = path
self.text, self.klasses = self.get_klass_ids()
self.bigram = models.Phrases(self.text.values())
self.dictionary = corpora.Dictionary(self.bigram[self.text.get(text)] for text in self.klasses.keys())
def __iter__(self):
for sent in self.klasses.keys():
#yield self.dictionary.doc2bow(self.text.get(sent))
yield self.bigram[self.text.get(sent)]
def text_bow(self,sent):
return self.dictionary.doc2bow(self.text.get(sent))
def get_data(self,fname):
ids_text = {}
data = open(self.path+fname).readlines()
for sent in data:
yield self.pre_process(sent)
def get_klass_ids(self):
ids_text = {}
ids_klasses = {}
i=0
for klass in listdir(self.path):
for row in self.get_data(klass):
if row is not None and len(row) != 0:
ids_text['SENT_%s'%i] = row
ids_klasses['SENT_%s'%i] = klass.replace('.txt','')
i=i+1
return ids_text, ids_klasses
def pre_process(self,text):
sentence = re.sub('[.,"]','',text)
#sentence = sentence.lower().decode('ISO-8859-7').split()
# REMOVING STOPWORDS TEXT ~~~~~~~~~~~~~~
#stopCashed = set(stopwords.words('english'))
#sentence = [word for word in sentence.lower().decode('ISO-8859-7').split() if word not in (stopCashed)]
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# LEMMATIZE TEXT ~~~~~~~~~~~~~~~~~~~~~~~~
sentence = lemmatize(sentence.decode('ISO-8859-7'))
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return sentence
#return simple_preprocess(text)
In [ ]:
#DOC2VEC
class MyCorpus(object):
def __init__(self,path):
self.path = path
self.text, self.klasses = self.get_klass_ids()
def __iter__(self):
for sent in self.klasses.keys():
yield TaggedDocument(words = self.text.get(sent), tags = [sent])
def get_data(self,fname):
ids_text = {}
data = pd.read_csv(self.path+fname, header=None, sep='"\n', engine='python')
for sent in data[0]:
yield self.pre_process(sent)
def get_klass_ids(self):
ids_text = {}
ids_klasses = {}
i=0
for klass in listdir(self.path):
for row in self.get_data(klass):
if len(row) >= 2:
ids_text['SENT_%s'%i] = row
ids_klasses['SENT_%s'%i] = klass.replace('.txt','')
i=i+1
return ids_text, ids_klasses
def pre_process(self,text):
sentence = re.sub('[.,"]','',text)
#sentence = sentence.lower().decode('ISO-8859-7').split()
# REMOVING STOPWORDS TEXT ~~~~~~~~~~~~~~
#stopCashed = set(stopwords.words('english'))
#sentence = [word for word in sentence.lower().decode('ISO-8859-7').split() if word not in (stopCashed)]
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# LEMMATIZE TEXT ~~~~~~~~~~~~~~~~~~~~~~~~
sentence = lemmatize(sentence.decode('ISO-8859-7'))
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return sentence
#return simple_preprocess(text)
In [ ]:
mc = MyCorpus('Bases/nltk/abc/')
In [ ]:
import numpy as np
cont = []
cortados = []
i = 0
for sent in mc.klasses.keys():
temp = mc.text.get(sent)
if temp is None:
cortados.append(sent)
mc.text.pop(sent)
mc.klasses.pop(sent)
i = i+1
if len(temp) != 0:
cont.append(len(temp))
else:
cortados.append(sent)
mc.text.pop(sent)
mc.klasses.pop(sent)
i = i+1
print('Foram cortados %s sentenças vazias'%i)
print('Numero de Documentos: %s'%len(cont))
print('Numero total de termos: %s'%np.sum(cont))
print('Numero de termos do maior documento: %s'%np.max(cont))
print('Numero de termos do menor documento: %s'%np.min(cont))
print('Media de termos por documento: %8.6f'%np.mean(cont))
print('Medida de Desvio Padrão: %8.5f'%np.std(cont))
print('Skewness do numero de termos por documento: %6.4f'%skew(cont))
print('Kurtosis do numero de termos por documento: %6.4f'%kurtosis(cont))
In [ ]:
dictionary = corpora.Dictionary(mc)
corpus = [dictionary.doc2bow(text) for text in mc]
print('Tamanho do dicionário: %s'%len(dictionary.keys()))
In [ ]:
len(mc.text.get('SENT_5480'))
In [ ]:
cortados
In [ ]:
# gera o modelo tf-idf
tfidf = models.TfidfModel(corpus)
In [ ]:
# gera o modelo lda
lda = models.LdaModel(corpus, id2word=dictionary, passes=10)
print lda.show_topics(2)
In [ ]:
# gera o modelo lsi
lsi = models.LsiModel(corpus, id2word=dictionary)
print lsi.show_topics(1)
In [ ]:
# gera o modelo lsi com tfidf
lsi = models.LsiModel(tfidf[corpus], id2word=dictionary)
print lsi.show_topics(2)
In [ ]:
import platform
print 'Version :', platform.python_version()
print 'Compiler :', platform.python_compiler()
print 'Build :', platform.python_build()
In [ ]:
#gera o modelo doc2vec
model = Doc2Vec(mc, size=300, hs=1, iter=100, min_count=0, workers=4)
In [ ]:
model = Doc2Vec(dm=0, dbow_words=1, alpha=0.025, min_alpha=0.025, hs=1, size=300, min_count=0, workers=5, iter=20)#use fixed learning rate
model.build_vocab(mc)
model.train_words=True
model.train_labels=True
for epoch in range(7):
model.train(mc)
model.alpha -= 0.003 # decrease the learning rate
model.min_alpha = model.alpha # fix the learning rate, no decay
In [ ]:
# cria um grafo dirigido (Digrafo)
import networkx as nx
G = nx.DiGraph()
# cada texto é um nó do grafo
# a classe do texto é um atributo do nó do grafo
for k,v in mc.klasses.items():
G.add_node(k,klass=v)
In [ ]:
#for tf-idf model
# adiciona as arestas no grafo
# nomes dos arquivos...
# variável auxiliar...
names = mc.klasses.keys()
# gera o modelo de similaridades
# para encontaros k-vizinhos de cada nó
# num_best é o número de k-vizinhos + 1 (pois o nó é vizinho dele mesmo)
# num_best=11 gera um grafo com 10 vizinhos pra cada nó
#index = similarities.Similarity(None,tfidf[corpus],num_features=len(mc.dictionary.keys()),num_best=11)
# To lda. lsi
index = similarities.Similarity('temp',lda[corpus],num_features=len(dictionary.keys()),num_best=7)
# To rpm, hdp
#index = similarities.MatrixSimilarity(rpm[corpus],num_features=len(dictionary.keys()),num_best=7)
for k in names:
for nn in index[lda[mc.text_bow(k)]]:
#gap = set(dictionary.doc2bow(mc.text.get(k)))
#for nn in index[lsi[tfidf[mc.text_bow(k)]]]:
if not k==names[nn[0]]:
G.add_edge(k,names[nn[0]],weight=nn[1])
In [ ]:
#For doc2vec model
names = mc.klasses.keys()
for k in names:
for nn in model.docvecs.most_similar(k, topn=7):
G.add_edge(k,nn[0],weight=nn[1])
In [ ]:
# calcula a distribuição do grau de cada nó
from collections import Counter
# como o out_degree é sempre 10 (por construção)
# basta usar o in_degree
degree = G.in_degree().values()
cdegree = Counter(degree)
In [ ]:
# skewness and kurtosis mede o quanto não uniforme é a distribuição
print skew(degree), kurtosis(degree)
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(cdegree.keys(),cdegree.values(),'bo-')
#plt.savefig('Pictures/Doc2Vec-DBOW_SKIPGRAM/Com Stemming/abc-GoodDegrees-k11')
In [ ]:
good_bad_edges = {}
for k in names:
good_bad_edges[k] = {}
good_bad_edges[k]['good'] = 0
good_bad_edges[k]['bad'] = 0
good_bad_edges[k]['all'] = 0
for edge in G.in_edges(k):
if G.node[edge[0]]['klass'] == G.node[edge[1]]['klass']:
good_bad_edges[k]['good']+=1
else:
good_bad_edges[k]['bad']+=1
good_bad_edges[k]['all']+=1
In [ ]:
baddegree = [d['bad'] for d in good_bad_edges.values()]
CBad = Counter(baddegree)
plt.plot(cdegree.keys(),cdegree.values(),'bo-')
plt.plot(CBad.keys(),CBad.values(),'ro-')
#plt.savefig('Pictures/Doc2Vec-DBOW_SKIPGRAM/Com Stemming/abc-GoodBadDegrees-k11')
In [ ]:
print skew(baddegree), kurtosis(baddegree)
In [ ]:
from scipy.stats import spearmanr,pearsonr
import numpy as np
corr = np.array([[d['bad'], d['all']] for d in good_bad_edges.values()])
print('Spearman Correlation: %8.6f, %s'% spearmanr(corr[:,0],corr[:,1]))
print('Pearson Correlation: %8.6f, %s'%pearsonr(corr[:,0],corr[:,1]))
In [ ]:
print spearmanr(corr[:,0],corr[:,1])
print pearsonr(corr[:,0],corr[:,1])
In [ ]:
############# ---------------------- TRAINING MODEL ------------------##################
In [ ]:
## MATRIX FOR LDA AND TFIDF MODELS
def addMatrix(A,B):
""" Soma duas matrizes."""
sizeL=len(A)
sizeC=len(A[0])
s = (sizeL,sizeC)
C = np.zeros(s, dtype=np.float32)
# Soma
for i in range(sizeL):
for j in range(len(B[i])):
C[i][j]=A[i][j]+B[i][j]
return C
In [ ]:
## FOR TFIDF MODEL SKLEARN
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
def split_mc_corpus(corpus):
words = corpus
return [word for word in words]
bow_transformer = CountVectorizer(analyzer=split_mc_corpus).fit(mc)
messages_bow = bow_transformer.transform(mc)
tfidf_transformer = TfidfTransformer().fit(messages_bow)
print 'sparse matrix shape:', messages_bow.shape
print 'number of non-zeros:', messages_bow.nnz
print 'sparsity: %.2f%%' % (100.0 * messages_bow.nnz / (messages_bow.shape[0] * messages_bow.shape[1]))
vectors = tfidf_transformer.transform(messages_bow)
klasses = np.array(mc.klasses.values())
tag = np.array(mc.klasses.keys())
In [ ]:
## FOR TFIDF MODEL GENSIM
'''
TF-IDF Pega os vetores do modelo e armazena
'''
vectors = []
klasses = []
temp = 0
q = 0
for key in mc.klasses.keys():
try:
vecs = np.array(tfidf[mc.text_bow(key)], dtype=np.float32)
s = (1, int(vecs[:,0].max()+1))
A = np.zeros(s)
sizeL=len(A)
sizeC=len(A[0])
img = (sizeL,sizeC)
C = np.zeros(img, dtype=np.float32)
flag = 0
for row in vecs[:,0]:
dim = int(row)+1
for i in range(sizeL):
for j in range(dim):
if j == dim-1:
C[i][j]=1
if temp < dim:
temp = dim
vectors.append(np.hstack(C))
klasses.append(mc.klasses.get(key))
except IndexError:
q=q+1
continue
s = (len(vectors),temp)
A = np.zeros(s)
B = np.array(vectors)
klasses = np.array(klasses)
vectors = addMatrix(A,B)
print "%s: sentenças não continham pesos TFIDF"%q
print vectors.shape
In [ ]:
## FOR DOC2VEC MODEL
'''
Doc2Vec Pega os vetores do modelo e armazena
'''
vectors = []
klasses = []
tag = []
for key in mc.klasses.keys():
vectors.append(model.docvecs[key])
klasses.append(mc.klasses.get(key))
tag.append(key)
vectors = np.array(vectors)
klasses = np.array(klasses)
tag = np.array(tag)
In [ ]:
'''
LSI Pega os vetores do modelo e armazena
'''
vectorsX = []
vecs = []
tag = []
klasses = []
k = 0
for key in mc.klasses.keys():
vecs = np.array(lsi[mc.text_bow(key)], dtype=np.float32).T
#vecs = np.array(lsi[tfidf[mc.text_bow(key)]], dtype=np.float32).T
try:
if len(vecs[1]) == 200:
vectorsX.append(vecs[1])
klasses.append(mc.klasses.get(key))
tag.append(key)
else:
k = k+1
except IndexError:
k = k+1
continue
vectors = np.array(vectorsX)
klasses = np.array(klasses)
tag = np.array(tag)
print "%s: sentenças não continham pesos LSI"%k
print vectors.shape
In [ ]:
'''
LDA Pega os vetores do modelo e armazena
'''
vectors = []
klasses = []
names = []
tag = []
vecs = []
temp = 0
q = 0
for key in mc.klasses.keys():
try:
vecs = np.array(lda[mc.text_bow(key)], dtype=np.float32)
s = (1, int(vecs[:,0].max()+1))
A = np.zeros(s)
sizeL=len(A)
sizeC=len(A[0])
d = (sizeL,sizeC)
C = np.zeros(d, dtype=np.float32)
flag = 0
for row in vecs[:,0]:
dim = int(row)+1
for i in range(sizeL):
for j in range(dim):
if j == dim-1:
C[i][j]=1
if temp < dim:
temp = dim
vectors.append(np.hstack(C))
names.append(key)
klasses.append(mc.klasses.get(key))
tag.append(key)
except IndexError:
q=q+1
continue
s = (len(vectors),temp)
A = np.zeros(s)
B = np.array(vectors)
klasses = np.array(klasses)
vectors = addMatrix(A,B)
tag = np.array(tag)
print "%s: sentenças não continham pesos LDA"%q
print vectors.shape
In [ ]:
# APPLY DECOMPOSITION MATRIX CCA
from sklearn.cross_decomposition import CCA
vectorsCCA = CCA(n_components=300).fit(vectors, bin_klasses).transform(vectors)
In [ ]:
# FOR EVERY
# SET TEST VECTOR
vecs_train, vecs_test, label_train, label_test = \
train_test_split(mc.text.values(), mc.klasses.values(), test_size=0.4)
print len(vecs_train), len(vecs_test), len(vecs_train) + len(vecs_test)
In [ ]:
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
abc_detector = svm.LinearSVC().fit(vectors, klasses)
In [ ]:
# FOR MULTILABELS TASK
abc_detector = OneVsOneClassifier(LinearSVC(random_state=0)).fit(vectors, bin_klasses)
In [ ]:
## FOR CONSULTING
phase = mc.text.get('SENT_15')
it = lda.id2word.doc2bow(tx for tx in phase)
tvec = np.array(lda[it]).T
tvec = tvec[1,]
print 'Class predicted:', abc_detector.predict(tvec)[0]
In [ ]:
print 'predicted:', abc_detector.predict(vectors)[60]
print 'expected:', klasses[60]
In [ ]:
all_predictions = abc_detector.predict(vectors)
print all_predictions[0:20]
In [ ]:
#CONVERT KLASSES TO BINARY
#DEFINE MCC
def multiclass_matthews_corrcoef(y_true,y_pred):
cov_mat = np.cov(y_true,y_pred)
mcc = cov_mat[0][1]/np.sqrt(cov_mat[0][0]*cov_mat[1][1])
return mcc
pe = LabelEncoder()
#pe.fit(all_predictions)
le = LabelEncoder()
le.fit(klasses)
bin_klasses = le.transform(klasses)
#bin_predictions = pe.transform(all_predictions)
In [ ]:
modelo = 'LSI'
data = 'abc'
process = 'ComStop'
In [ ]:
import sys
temp = sys.stdout
sys.stdout = open('Logs/log3.txt', 'a')
print 'Confusion Matrix '+modelo+' Model >['+data+' dataset]< --'+process+'--'
print 'accuracy', accuracy_score(klasses, all_predictions)
print 'confusion matrix\n', confusion_matrix(klasses, all_predictions)
print '(row=expected, col=predicted)'
print 'Classification Report'
print classification_report(klasses, all_predictions)
sys.stdout.close()
sys.stdout = temp # restore print commands to interactive prompt
## Back to Normal
print 'accuracy', accuracy_score(klasses, all_predictions)
print 'confusion matrix\n', confusion_matrix(klasses, all_predictions)
print '(row=expected, col=predicted)'
In [ ]:
plt.matshow(confusion_matrix(klasses, all_predictions), cmap=plt.cm.binary, interpolation='nearest')
plt.title('confusion matrix')
plt.colorbar()
plt.ylabel('expected label')
plt.xlabel('predicted label')
#plt.savefig('Pictures/'+modelo+'/CMatrix/ConfusionMatrix-'+data+'-'+process+'')
In [ ]:
print classification_report(klasses, all_predictions)
In [ ]:
def Hubness(sent_vec, dic_hub, percent):
fatia = len(dic_hub)*percent/100
pos = []
i = 0
for row in sent_vec:
for nn in range(fatia):
if row == dic_hub[nn][0]:
pos.append(i)
i=i+1
return pos
def Hubness_ale(sent_vec, dic_hub):
pos = []
i = 0
for row in sent_vec:
for nn in range(len(dic_hub)):
if row == dic_hub[nn]:
pos.append(i)
i=i+1
return pos
In [ ]:
import scipy.sparse as sps
def delete_rows_csr(mat, indices):
"""
Remove the rows denoted by ``indices`` form the CSR sparse matrix ``mat``.
"""
if not isinstance(mat, sps.csr_matrix):
raise ValueError("works only for CSR format -- use .tocsr() first")
indices = list(indices)
mask = np.ones(mat.shape[0], dtype=bool)
mask[indices] = False
return mat[mask]
In [ ]:
# Doc2Vec-DM_CBOW
# Doc2Vec-DBOW_SKIPGRAM
modelo = 'Doc2Vec-DBOW_SKIPGRAM'
data = 'abc'
process = 'ComStemming'
log = 'log3'
import sys
temp = sys.stdout
sys.stdout = open('Logs/'+log+'.txt', 'a')
print 'Matrix '+modelo+' Model >['+data+' dataset]< --'+process+'--'
In [ ]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import cross_validation
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn import svm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.metrics import f1_score
import time
X = vectors
y = bin_klasses
s = tag
skf = StratifiedKFold(n_folds=2)
skf.get_n_splits(X, y)
sort_hub = []
per = 0
#all_hub = sorted(good_bad_edges.items(), key=lambda t: t[1]['all'], reverse=True)
#bad_hub = sorted(good_bad_edges.items(), key=lambda t: t[1]['bad'], reverse=True)
#for n in range(len(good_bad_edges)*per/100): sort_hub.append(choice(good_bad_edges.keys()))
classifiers = {'SVM Linear' : LinearSVC(),
#'RBF SVM' : SVC(gamma=2, C=1),
'3-NN' : KNeighborsClassifier(n_neighbors=3),
'5-NN' : KNeighborsClassifier(n_neighbors=5),
'AdaBoost' : AdaBoostClassifier(),
'Logistic' :LogisticRegression(),
'BernoulliNB' :BernoulliNB(),
'RF' : RandomForestClassifier(max_depth=100, max_features='auto'),
}
for name, clf in classifiers.items():
mccs = []
accs = []
f1s = []
for train_index, test_index in skf.split(X, y, s):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
s_train, s_test = s[train_index], s[test_index]
# ALEATÓRIO ******************************
#pos_train = Hubness_ale(s_train, sort_hub)
#pos_test = Hubness_ale(s_test, sort_hub)
#*****************************************
# pos_train = Hubness(s_train, bad_hub, per)
# pos_test = Hubness(s_test, bad_hub, per)
# pos_train_rev = sorted(pos_train, reverse=True)
# pos_test_rev = sorted(pos_test, reverse=True)
# for idxi in pos_train_rev:
#X_train = delete_rows_csr(X_train, [idxi])
# X_train = np.delete(X_train, idxi, 0)
# y_train = np.delete(y_train, idxi, 0)
# s_train = np.delete(s_train, idxi, 0)
# for idx in pos_test_rev:
#X_test = delete_rows_csr(X_test, [idx])
# X_test = np.delete(X_test, idx, 0)
# y_test = np.delete(y_test, idx, 0)
# s_test = np.delete(s_test, idx, 0)
clf.fit(X_train,y_train)
preds = clf.predict(X_test)
mccs.append(multiclass_matthews_corrcoef(y_test,preds))
accs.append(accuracy_score(y_test,preds))
f1s.append(f1_score(y_test,preds,average=None))
print name, "Accuracy: %0.3f"% np.mean(accs)
print name, "F1: %0.3f"% np.mean(f1s)
print name, "MCC: %0.3f"% np.mean(mccs)
print '=================================================================================================='
sys.stdout.close()
sys.stdout = temp # restore print commands to interactive prompt
In [ ]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=2)
testsvd = svd.fit_transform(X_test)
In [ ]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
testpca = pca.fit_transform(X_test.todense())
In [ ]:
testsvd.shape
In [ ]:
print(__doc__)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
n_neighbors = 5
# import some data to play with
X = testsvd # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = y_test
h = .02 # step size in the mesh
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
for weights in ['uniform', 'distance']:
# we create an instance of Neighbours Classifier and fit the data.
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X, y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
plt.show()
In [ ]: