In [2]:
%matplotlib inline
import configparser
import os
import requests
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse, stats, spatial
import scipy.sparse.linalg
from sklearn import preprocessing, decomposition
import librosa
import IPython.display as ipd
import json
from imdb import IMDb
import tmdbsimple as tmdb
from pygsp import graphs, filters, plotting
plt.rcParams['figure.figsize'] = (17, 5)
plotting.BACKEND = 'matplotlib'
In [3]:
dataset = pd.read_csv('Saved_Datasets/NewFeaturesDataset.csv', encoding='utf-8')
In [3]:
dataset.head(5)
Out[3]:
In [4]:
Actors = pd.read_csv('Saved_Datasets/Actorsv4Dataset.csv')
In [5]:
Actors.head(3)
Out[5]:
In [82]:
#W = np.ndarray(shape=(10, 10), dtype=int)
W_diff = np.zeros(shape=(len(dataset), len(dataset)), dtype=int)
for i in range(0,len(dataset)):
for j in range(i,len(dataset)):
W_diff[i][j] = abs(movies_actor_prof[i]-movies_actor_prof[j])
bigger = W_diff.transpose() > W_diff
W_diff = W_diff - W_diff*bigger + W_diff.transpose()*bigger
np.fill_diagonal(W_diff, 0)
In [83]:
plt.spy(W_diff)
Out[83]:
In [84]:
plt.hist(W_diff.reshape(-1),bins=50);
In [85]:
val_75 = np.percentile(W_diff,75)
print(val_75)
In [94]:
W_diff_norm = np.zeros(shape=(len(dataset), len(dataset)), dtype=float)
for i in range(0,len(dataset)):
for j in range(i,len(dataset)):
if W_diff[i][j] == 0:
W_diff_norm[i][j] = 1
elif W_diff[i][j] <= val_75:
W_diff_norm[i][j] = 1-(W_diff[i][j])/(val_75)
else:
W_diff_norm[i][j] = 0
In [97]:
bigger = W_diff_norm.transpose() > W_diff_norm
W_diff_norm = W_diff_norm - W_diff_norm*bigger + W_diff_norm.transpose()*bigger
np.fill_diagonal(W_diff_norm, 0)
In [98]:
plt.spy(W_diff_norm)
Out[98]:
In [99]:
plt.hist(W_diff_norm.reshape(-1),bins=50);
In [100]:
DiffNormW = pd.DataFrame(W_diff_norm)
DiffNormW.head()
Out[100]:
In [101]:
DiffNormW.to_csv('Saved_Datasets/DiffNormActProfW.csv', index=False)
In [104]:
G = graphs.Graph(W_diff_norm)
G.compute_laplacian('normalized')
G.compute_fourier_basis(recompute=True)
plt.plot(G.e[0:10]);
In [105]:
labels = preprocessing.LabelEncoder().fit_transform(dataset['success'])
G.set_coordinates(G.U[:,1:3])
G.plot_signal(labels, vertex_size=20)
In [128]:
NEIGHBORS = 400
#sort the order of the weights
sort_order = np.argsort(W_diff_norm, axis = 1)
#declaration of a sorted weight matrix
sorted_weights = np.zeros((len(W_diff_norm), len(W_diff_norm)))
for i in range (0, len(W_diff_norm)):
for j in range(0, len(W_diff_norm)):
if (j >= len(W_diff_norm) - NEIGHBORS):
#copy the k strongest edges for each node
sorted_weights[i, sort_order[i,j]] = W_diff_norm[i,sort_order[i,j]]
else:
#set the other edges to zero
sorted_weights[i, sort_order[i,j]] = 0
#ensure the matrix is symmetric
bigger = sorted_weights.transpose() > sorted_weights
sorted_weights = sorted_weights - sorted_weights*bigger + sorted_weights.transpose()*bigger
In [135]:
DiffNormSparsW = pd.DataFrame(sorted_weights)
DiffNormSparsW.head()
Out[135]:
In [136]:
DiffNormSparsW.to_csv('Saved_Datasets/DiffNormActProfSparsW.csv', index=False)
In [129]:
plt.spy(sorted_weights)
Out[129]:
In [130]:
plt.hist(sorted_weights.reshape(-1),bins=50);
In [131]:
G = graphs.Graph(sorted_weights)
G.compute_laplacian('normalized')
G.compute_fourier_basis(recompute=True)
plt.plot(G.e[0:10]);
In [139]:
labels = preprocessing.LabelEncoder().fit_transform(dataset['success'])
G.set_coordinates(G.U[:,1:3])
G.plot_signal(labels, vertex_size=20)
In [140]:
labels_reg = preprocessing.LabelEncoder().fit_transform(dataset['Normed_ROI'])
G.plot_signal(labels_reg, vertex_size=20)
In [ ]: