In [1]:
from __future__ import print_function
import lasagne
import numpy as np
import pandas as pd
import _pickle as pickle
In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
%matplotlib inline
In [4]:
npz_file = "../model-mlp_n100-e100.txt.npz"
In [5]:
# Load network params
with np.load(npz_file) as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
In [6]:
print_statement = "Number of params: %s" % len(param_values)
print(print_statement)
print("="*len(print_statement))
for i in param_values:
print(i.shape)
In [7]:
# save to variable
w_l1 = pd.DataFrame(param_values[0])
w_l2 = pd.DataFrame(param_values[2])
In [8]:
w_l1.head()
Out[8]:
In [9]:
w_l2.head()
Out[9]:
Quick visual of weights
In [10]:
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(w_l1, yticklabels="")
plt.ylabel("Input feature")
plt.xlabel("Hidden layer node")
Out[10]:
In [11]:
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(w_l2, yticklabels="")
plt.xlabel("Output class")
plt.ylabel("Hidden layer node")
Out[11]:
In [12]:
n = 150
f, ax = plt.subplots(figsize=(25, 6))
sns.boxplot(w_l1[:n].transpose())
plt.xlabel("Input feature")
plt.ylabel("weight of input-hidden node")
plt.title("Input-hidden node weights of the first {} features".format(n))
Out[12]:
Weight of some regions are more variable than others
In [13]:
def garson(A, B):
"""
Computes Garson's algorithm
A = matrix of weights of input-hidden layer (rows=input & cols=hidden)
B = vector of weights of hidden-output layer
"""
B = np.diag(B)
# connection weight through the different hidden node
cw = np.dot(A, B)
# weight through node (axis=0 is column; sum per input feature)
cw_h = abs(cw).sum(axis=0)
# relative contribution of input neuron to outgoing signal of each hidden neuron
# sum to find relative contribution of input neuron
rc = np.divide(abs(cw), abs(cw_h))
rc = rc.sum(axis=1)
# normalize to 100% for relative importance
ri = rc / rc.sum()
return(ri)
In [14]:
# Run Garson's algorithm
df = {}
for i in range(w_l2.shape[1]):
df[i] = garson(w_l1, w_l2[i])
# Reformat
df = pd.DataFrame(df)
df.head()
Out[14]:
In [15]:
f, ax = plt.subplots(figsize=(25, 6))
df.plot(kind="line", ax=ax)
plt.xlabel("Input feature")
plt.ylabel("Relative importance")
Out[15]:
In [16]:
## Index of top 10 feature & their relative imo
df_ri = pd.DataFrame(df[0], columns=["relative_importance"]).sort_values(by="relative_importance", ascending=False)
df_ri.head(10)
Out[16]: