In [1]:
from __future__ import print_function
import lasagne
import numpy as np
import pandas as pd
import _pickle as pickle
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
%matplotlib inline
Today, I want to implement the other metric to extract feature importance: connection weight mentioned in:
Ibrahim, OM. 2013. A comparison of methods for assessing the relative importance of input variables in artificial neural networks. Journal of Applied Sciences Research, 9(11): 5692-5700.
In [3]:
# Load network params
npz_file = "/Users/csiu/repo/predictTissue/src/model-mlp_n100-e100.txt.npz"
with np.load(npz_file) as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
# save to variable
w_l1 = pd.DataFrame(param_values[0])
w_l2 = pd.DataFrame(param_values[2])
In [4]:
def connection_weights(A, B):
"""
Computes Connection weights algorithm
A = matrix of weights of input-hidden layer (rows=input & cols=hidden)
B = matrix of weights of hidden-output layer (rows=hidden & cols=output)
"""
cw = np.dot(A, B)
# normalize to 100% for relative importance
ri = cw / cw.sum()
return(ri)
In [5]:
df = connection_weights(w_l1, w_l2)
df = pd.DataFrame(df)
df.head()
Out[5]:
In [6]:
n = 150
f, ax = plt.subplots(figsize=(25, 6))
sns.boxplot(w_l1[:n].transpose())
plt.xlabel("Input feature")
plt.ylabel("weight of input-hidden node")
plt.title("Input-hidden node weights of the first {} features".format(n))
Out[6]:
Again, weight of some regions are more variable than others
In [7]:
f, ax = plt.subplots(figsize=(25, 6))
df.plot(kind="line", ax=ax)
plt.xlabel("Input feature")
plt.ylabel("Relative importance")
Out[7]:
In [8]:
# Partitioning features to classes by max weight
df.idxmax(axis=1).value_counts(sort=False).plot(kind="bar")
plt.xlabel("Index of output class")
plt.ylabel("Count")
plt.title("Input features assigned to output class by maximizing importance")
Out[8]: