In [1]:
from autograd import grad
from memorytools import summarize_objects

import autograd.numpy as np
import pickle as pkl
import json
import pandas as pd
import matplotlib.pyplot as plt
import tracemalloc
import objgraph

%matplotlib inline
%load_ext autoreload
%autoreload 2


/Users/ericmjl/anaconda/lib/python3.5/site-packages/IPython/kernel/__init__.py:13: ShimWarning: The `IPython.kernel` package has been deprecated. You should import from ipykernel or jupyter_client instead.
  "You should import from ipykernel or jupyter_client instead.", ShimWarning)

In [2]:
tracemalloc.start()

In [3]:
# Open the data file that contains the HIV protease data

df = pd.read_csv('../data/hiv_data/hiv-protease-data-expanded.csv', index_col=0)
df = df.dropna(subset=['FPV'])
df.head()


Out[3]:
ATV DRV FPV IDV LPV NFV SQV SeqID TPV seqid sequence sequence_object weight
0 NaN NaN 2.5 16.3 NaN 38.6 16.1 2996 NaN 2996-0 PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDVNLPGRWKPKM... ID: 2996-0\nName: <unknown name>\nDescription:... 0.50
1 NaN NaN 2.5 16.3 NaN 38.6 16.1 2996 NaN 2996-1 PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDVNLPGRWKPKM... ID: 2996-1\nName: <unknown name>\nDescription:... 0.50
2 NaN NaN 0.7 0.8 NaN 0.8 1.1 4387 NaN 4387-0 PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM... ID: 4387-0\nName: <unknown name>\nDescription:... 0.25
3 NaN NaN 0.7 0.8 NaN 0.8 1.1 4387 NaN 4387-1 PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM... ID: 4387-1\nName: <unknown name>\nDescription:... 0.25
4 NaN NaN 0.7 0.8 NaN 0.8 1.1 4387 NaN 4387-2 PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM... ID: 4387-2\nName: <unknown name>\nDescription:... 0.25

In [4]:
# Open the numpy array of all graphs' data.
graph_arr = np.load('../data/feat_array.npy')

In [5]:
# Open the pickles that contain the graph information and node-nbr information.
def unpickle_data(path):
    with open(path, 'rb') as f:
        data = pkl.load(f)
    return data

graph_idxs = unpickle_data('../data/graph_idxs.pkl')
graph_nodes = unpickle_data('../data/graph_nodes.pkl')
nodes_nbrs = unpickle_data('../data/nodes_nbrs.pkl')

In [6]:
list(graph_idxs.keys())[0:5]
# len(graph_idxs.keys())


Out[6]:
['109452-3', '68072-3', '50656-1', '28212-1', '148081-7']

In [7]:
list(graph_nodes.items())[0]


Out[7]:
('109452-3',
 {277190: 'A70LYS',
  277191: 'A34GLU',
  277192: 'B73GLY',
  277193: 'A36MET',
  277194: 'B49GLY',
  277195: 'A12THR',
  277196: 'A24LEU',
  277197: 'B68GLY',
  277198: 'B4THR',
  277199: 'A88ASN',
  277200: 'A63LEU',
  277201: 'B2GLN',
  277202: 'A29ASP',
  277203: 'B90LEU',
  277204: 'A6TRP',
  277205: 'B93LEU',
  277206: 'A32VAL',
  277207: 'A3ILE',
  277208: 'B16GLY',
  277209: 'B75VAL',
  277210: 'A42TRP',
  277211: 'B48GLY',
  277212: 'A53PHE',
  277213: 'A85ILE',
  277214: 'B6TRP',
  277215: 'A94GLY',
  277216: 'B30ASP',
  277217: 'A52GLY',
  277218: 'B66ILE',
  277219: 'B63LEU',
  277220: 'B98ASN',
  277221: 'B70LYS',
  277222: 'B32VAL',
  277223: 'A2GLN',
  277224: 'B79PRO',
  277225: 'A44PRO',
  277226: 'A22ALA',
  277227: 'A82VAL',
  277228: 'B28ALA',
  277229: 'B94GLY',
  277230: 'B45ARG',
  277231: 'B22ALA',
  277232: 'B40GLY',
  277233: 'A19ILE',
  277234: 'A33VAL',
  277235: 'B76LEU',
  277236: 'B15ILE',
  277237: 'A28ALA',
  277238: 'A71VAL',
  277239: 'B57ARG',
  277240: 'B71VAL',
  277241: 'B78GLY',
  277242: 'A65LYS',
  277243: 'A20LYS',
  277244: 'A59TYR',
  277245: 'B14GLU',
  277246: 'A13ILE',
  277247: 'B18GLN',
  277248: 'A21GLU',
  277249: 'A96THR',
  277250: 'B13ILE',
  277251: 'A39PRO',
  277252: 'B55LYS',
  277253: 'A25ASP',
  277254: 'B64VAL',
  277255: 'A90LEU',
  277256: 'B9PRO',
  277257: 'A55LYS',
  277258: 'B7GLN',
  277259: 'B10LEU',
  277260: 'B38LEU',
  277261: 'A17GLY',
  277262: 'B42TRP',
  277263: 'B54ILE',
  277264: 'A43LYS',
  277265: 'A5LEU',
  277266: 'B58GLN',
  277267: 'A64VAL',
  277268: 'A73GLY',
  277269: 'B24LEU',
  277270: 'B21GLU',
  277271: 'A35GLU',
  277272: 'B19ILE',
  277273: 'A66ILE',
  277274: 'B51GLY',
  277275: 'B84ILE',
  277276: 'A46MET',
  277277: 'A18GLN',
  277278: 'B81PRO',
  277279: 'A89LEU',
  277280: 'A56VAL',
  277281: 'A78GLY',
  277282: 'B89LEU',
  277283: 'A77VAL',
  277284: 'B61GLN',
  277285: 'B17GLY',
  277286: 'B41LYS',
  277287: 'A60ASP',
  277288: 'B74THR',
  277289: 'B43LYS',
  277290: 'B65LYS',
  277291: 'B72ILE',
  277292: 'B3ILE',
  277293: 'A23LEU',
  277294: 'A68GLY',
  277295: 'B39PRO',
  277296: 'A9PRO',
  277297: 'B96THR',
  277298: 'A61GLN',
  277299: 'A1PRO',
  277300: 'A83ASN',
  277301: 'A11VAL',
  277302: 'A27GLY',
  277303: 'A10LEU',
  277304: 'B23LEU',
  277305: 'B77VAL',
  277306: 'B44PRO',
  277307: 'A7GLN',
  277308: 'A45ARG',
  277309: 'B95CYS',
  277310: 'A62VAL',
  277311: 'B67CYS',
  277312: 'A91THR',
  277313: 'A92GLN',
  277314: 'A8ARG',
  277315: 'A79PRO',
  277316: 'A69TYR',
  277317: 'B86GLY',
  277318: 'A47ILE',
  277319: 'A84ILE',
  277320: 'B37CYS',
  277321: 'A75VAL',
  277322: 'A50ILE',
  277323: 'B36MET',
  277324: 'A16GLY',
  277325: 'B34GLU',
  277326: 'A93LEU',
  277327: 'A87ARG',
  277328: 'A98ASN',
  277329: 'B47ILE',
  277330: 'B11VAL',
  277331: 'B87ARG',
  277332: 'A67CYS',
  277333: 'A72ILE',
  277334: 'A14GLU',
  277335: 'B80THR',
  277336: 'A57ARG',
  277337: 'B97LEU',
  277338: 'B60ASP',
  277339: 'B82VAL',
  277340: 'A31THR',
  277341: 'A99PHE',
  277342: 'B69TYR',
  277343: 'B83ASN',
  277344: 'B33VAL',
  277345: 'B46MET',
  277346: 'A74THR',
  277347: 'B20LYS',
  277348: 'B35GLU',
  277349: 'B12THR',
  277350: 'A37CYS',
  277351: 'A54ILE',
  277352: 'B53PHE',
  277353: 'A4THR',
  277354: 'B85ILE',
  277355: 'A76LEU',
  277356: 'B59TYR',
  277357: 'A58GLN',
  277358: 'B27GLY',
  277359: 'A49GLY',
  277360: 'B52GLY',
  277361: 'A41LYS',
  277362: 'A26THR',
  277363: 'A15ILE',
  277364: 'B62VAL',
  277365: 'B31THR',
  277366: 'B1PRO',
  277367: 'A95CYS',
  277368: 'B26THR',
  277369: 'A40GLY',
  277370: 'A30ASP',
  277371: 'B29ASP',
  277372: 'B99PHE',
  277373: 'A51GLY',
  277374: 'A80THR',
  277375: 'A86GLY',
  277376: 'A38LEU',
  277377: 'B5LEU',
  277378: 'A81PRO',
  277379: 'B92GLN',
  277380: 'B91THR',
  277381: 'B25ASP',
  277382: 'A97LEU',
  277383: 'B88ASN',
  277384: 'B50ILE',
  277385: 'A48GLY',
  277386: 'B56VAL',
  277387: 'B8ARG'})

In [8]:
summarize_objects()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-c46dbac041c7> in <module>()
----> 1 summarize_objects()

/Users/ericmjl/Documents/github/memory-tools/src/memorytools/__init__.py in summarize_objects(objs, echo, limit)
     90     objs = gc.get_objects()
     91 
---> 92   objs_dict = _summarize_objects(objs)
     93   size_summary = ['{0:>10s} {1:>5s} {2}'.format('Size', 'Count', 'Type')]
     94   count_summary = ['{0:>5s} {1:>10s} {2}'.format('Count', 'Size', 'Type')]

/Users/ericmjl/Documents/github/memory-tools/src/memorytools/__init__.py in _summarize_objects(objs)
    133   for obj in objs:
    134     _incr(objs_dict, type(obj), 'count')
--> 135     _incr(objs_dict, type(obj), 'size', sys.getsizeof(obj))
    136 
    137   return objs_dict

/Users/ericmjl/anaconda/lib/python3.5/site-packages/pandas/core/base.py in __sizeof__(self)
    128         # no memory_usage attribute, so fall back to
    129         # object's 'sizeof'
--> 130         return super(self, PandasObject).__sizeof__()
    131 
    132 

TypeError: must be type, not FrozenList

In [10]:
list(nodes_nbrs.items())[0]


Out[10]:
(0, [0, 23, 46])

In [11]:
# Keep track of only those that are in both the graph_idxs and in the df['seqid']
intersect = set(df['seqid'].values).intersection(graph_idxs.keys())
len(intersect)


Out[11]:
3200

In [12]:
# Get a reduced list of graph_idxs.
graph_idxs_red = dict()
graph_nodes_red = dict()
for g in intersect:
    graph_idxs_red[g] = graph_idxs[g]
    graph_nodes_red[g] = graph_nodes[g]

In [13]:
graph_idxs_red['46213-0']


Out[13]:
[535066,
 535067,
 535068,
 535069,
 535070,
 535071,
 535072,
 535073,
 535074,
 535075,
 535076,
 535077,
 535078,
 535079,
 535080,
 535081,
 535082,
 535083,
 535084,
 535085,
 535086,
 535087,
 535088,
 535089,
 535090,
 535091,
 535092,
 535093,
 535094,
 535095,
 535096,
 535097,
 535098,
 535099,
 535100,
 535101,
 535102,
 535103,
 535104,
 535105,
 535106,
 535107,
 535108,
 535109,
 535110,
 535111,
 535112,
 535113,
 535114,
 535115,
 535116,
 535117,
 535118,
 535119,
 535120,
 535121,
 535122,
 535123,
 535124,
 535125,
 535126,
 535127,
 535128,
 535129,
 535130,
 535131,
 535132,
 535133,
 535134,
 535135,
 535136,
 535137,
 535138,
 535139,
 535140,
 535141,
 535142,
 535143,
 535144,
 535145,
 535146,
 535147,
 535148,
 535149,
 535150,
 535151,
 535152,
 535153,
 535154,
 535155,
 535156,
 535157,
 535158,
 535159,
 535160,
 535161,
 535162,
 535163,
 535164,
 535165,
 535166,
 535167,
 535168,
 535169,
 535170,
 535171,
 535172,
 535173,
 535174,
 535175,
 535176,
 535177,
 535178,
 535179,
 535180,
 535181,
 535182,
 535183,
 535184,
 535185,
 535186,
 535187,
 535188,
 535189,
 535190,
 535191,
 535192,
 535193,
 535194,
 535195,
 535196,
 535197,
 535198,
 535199,
 535200,
 535201,
 535202,
 535203,
 535204,
 535205,
 535206,
 535207,
 535208,
 535209,
 535210,
 535211,
 535212,
 535213,
 535214,
 535215,
 535216,
 535217,
 535218,
 535219,
 535220,
 535221,
 535222,
 535223,
 535224,
 535225,
 535226,
 535227,
 535228,
 535229,
 535230,
 535231,
 535232,
 535233,
 535234,
 535235,
 535236,
 535237,
 535238,
 535239,
 535240,
 535241,
 535242,
 535243,
 535244,
 535245,
 535246,
 535247,
 535248,
 535249,
 535250,
 535251,
 535252,
 535253,
 535254,
 535255,
 535256,
 535257,
 535258,
 535259,
 535260,
 535261,
 535262,
 535263]

In [14]:
graph_arr.shape


Out[14]:
(659895, 36)

In [15]:
# Make one pass over the data to get the old/new index mapping, and
# make the final graph_array that gets passed in as an input.

def reindex_data_matrix(graph_idxs_red, graph_arr):
    """
    Parameters:
    ===========
    - graph_idxs_red: reduced graph indices
    - graph_arr: the original matrix of (nodes by node_features)
    
    Returns:
    ========
    - graph_arr_fin: a reduced matrix of (nodes by node_features)
    - nodes_oldnew, nodes_newold: mapping of new and old indices.
    """
    # Initialize a zero-matrix. 
    idxs = np.concatenate([i for i in graph_idxs_red.values()])
    graph_arr_fin = np.zeros(shape=graph_arr[idxs].shape)

    # Initialize empty maps of graph indices from the old to the new.
    nodes_oldnew = dict()  # {old_idx: new_idx}.
    nodes_newold = dict()  # {new_idx: old_idx}

    # Re-assign reduced graphs to the zero-matrix.
    curr_idx = 0
    for seqid, idxs in sorted(graph_idxs_red.items()):
        for idx in idxs:
            nodes_oldnew[idx] = curr_idx
            nodes_newold[curr_idx] = idx
            graph_arr_fin[curr_idx] = graph_arr[idx]
            curr_idx += 1
    return graph_arr_fin, nodes_oldnew, nodes_newold

graph_arr_fin, nodes_oldnew, nodes_newold = reindex_data_matrix(graph_idxs_red, graph_arr)

In [16]:
graph_arr_fin.shape


Out[16]:
(622671, 36)

In [17]:
len(nodes_oldnew)


Out[17]:
622671

In [18]:
len(nodes_newold)


Out[18]:
622671

In [19]:
# Check a random sample of the indices to make sure that they are sampled correctly.
from random import sample

n_samples = 10000
rnd_idxs = sample([i for i in range(graph_arr_fin.shape[0])], n_samples)
for new_idx in rnd_idxs:
    assert np.all(np.equal(graph_arr_fin[new_idx], graph_arr[nodes_newold[new_idx]]))

In [20]:
objgraph.most_common_types(limit=5)


Out[20]:
[('list', 666814),
 ('function', 26707),
 ('dict', 10797),
 ('tuple', 9310),
 ('weakref', 4117)]

In [21]:
graph_arr_fin.shape


Out[21]:
(622671, 36)

In [22]:
# Finally, rework the nodes_nbrs, graph_idxs, and graph_nodes dictionaries with the corrected idxs.
# THIS IS THE KEY STEP! MUST ENCAPSULATE IN A FUNCTION!
from collections import defaultdict

def filter_and_reindex_nodes_and_neighbors(nodes_nbrs, nodes_oldnew):
    """
    - nodes_nbrs: a dictionary of nodes and their neighbors.
    - nodes_oldnew: a dictionary mapping old node indices to their new node indices.
    """
    nodes_nbrs_fin = defaultdict(list)
    
    for node, nbrs in sorted(nodes_nbrs.items()):
        if node in nodes_oldnew.keys():  # 
            for nbr in nbrs:
                nodes_nbrs_fin[nodes_oldnew[node]].append(nodes_oldnew[nbr])
    return nodes_nbrs_fin

nodes_nbrs_fin = filter_and_reindex_nodes_and_neighbors(nodes_nbrs, nodes_oldnew)

In [23]:
objgraph.most_common_types(limit=5)


Out[23]:
[('list', 1289467),
 ('function', 26706),
 ('dict', 10751),
 ('tuple', 9283),
 ('weakref', 4121)]

In [24]:
def filter_and_reindex_graph_idxs(graph_idxs, nodes_oldnew):
    """
    - graph_idxs: a dictionary of graphs and their original indices.
    - nodes_oldnew: a dictionary mapping old node indices to their new node indices.
    """
    graph_idxs_fin = defaultdict(list)
    for seqid, nodes in sorted(graph_idxs.items()):
        for node in nodes:
            if node in nodes_oldnew.keys():
                graph_idxs_fin[seqid].append(nodes_oldnew[node])
    return graph_idxs_fin

graph_idxs_fin = filter_and_reindex_graph_idxs(graph_idxs, nodes_oldnew)

In [25]:
def filter_and_reindex_graph_nodes(graph_nodes, nodes_oldnew):
    """
    - graph_nodes: a dictionary mapping graphs to their dictionary mapping indices to node names.
    - nodes_oldnew: a dictionary mapping old node indices to their new node indices.
    """    
    graph_nodes_fin = defaultdict(dict)
    for seqid, idx_node in sorted(graph_nodes.items()):
        for old_idx, node_name in idx_node.items():
            if old_idx in nodes_oldnew.keys():
                graph_nodes_fin[seqid][nodes_oldnew[old_idx]] = node_name
    return graph_nodes_fin

graph_nodes_fin = filter_and_reindex_graph_nodes(graph_nodes, nodes_oldnew)

In [26]:
objgraph.most_common_types(limit=5)


Out[26]:
[('list', 1292671),
 ('function', 26708),
 ('dict', 10751),
 ('tuple', 9286),
 ('weakref', 4124)]

In [27]:
from graphfp.layers import FingerprintLayer, LinearRegressionLayer, GraphConvLayer
from graphfp.utils import initialize_network
from pyflatten import flatten

layers = [GraphConvLayer(weights_shape=(36, 36), biases_shape=(1, 36)),
          FingerprintLayer(weights_shape=(36, 36), biases_shape=(1, 36)),
          LinearRegressionLayer(weights_shape=(36, 1), biases_shape=(1, 1)),
]

wb = initialize_network(layers_spec=layers)
wb_vect, unflattener = flatten(wb)

In [28]:
objgraph.most_common_types(limit=5)


Out[28]:
[('list', 1293151),
 ('function', 30607),
 ('dict', 12794),
 ('tuple', 10839),
 ('weakref', 4550)]

In [29]:
# from random import sample

# def batch_sample(inputs, nodes_nbrs, graph_idxs, n_graphs):
#     """
#     Randomly samples n_graphs from all of the graphs, returns new inputs,
#     node_nbr dictionary, and graph_idx dictionary.
#     """
#     samp_graph_idxs = dict(sample(graph_idxs.items(), n_graphs))
#     assert len(samp_graph_idxs) == n_graphs, "There was an error in sampling."
#     concat_samp_idxs = np.concatenate([v for k, v in sorted(samp_graph_idxs.items())])
#     # print('Samp Idxs Shape')
#     # print(concat_samp_idxs.shape)
#     samp_nodes_nbrs = {i: nodes_nbrs[i] for i in concat_samp_idxs}
#     assert len(samp_nodes_nbrs) == len(concat_samp_idxs)

#     samp_inputs, samp_nodes_oldnew, samp_nodes_newold = reindex_data_matrix(samp_graph_idxs, inputs)
    
#     samp_nodes_nbrs = filter_and_reindex_nodes_and_neighbors(samp_nodes_nbrs, samp_nodes_oldnew)
#     samp_graph_idxs = filter_and_reindex_graph_idxs(samp_graph_idxs, samp_nodes_oldnew)
    
#     return samp_inputs, samp_nodes_nbrs, samp_graph_idxs

# n_sampled_graphs = 100
# samp_inputs, samp_nodes_nbrs, samp_graph_idxs = batch_sample(graph_arr_fin, nodes_nbrs_fin, graph_idxs_fin, n_sampled_graphs)

# assert samp_inputs.shape[1] == 36
# assert len(samp_nodes_nbrs) == samp_inputs.shape[0]
# assert len(samp_graph_idxs) == n_sampled_graphs

In [30]:
# # Scratchpad cell
# samp_graph_idxs = dict(sample(graph_idxs.items(), 10))
# assert len(samp_graph_idxs) == 10
# concat_samp_idxs = np.concatenate([i for i in samp_graph_idxs.values()])
# samp_nodes_nbrs = {i: nodes_nbrs[i] for i in concat_samp_idxs}
# assert len(samp_nodes_nbrs) == len(concat_samp_idxs)

In [31]:
# Used in conjunction with train_loss function in cell below.
from graphfp.binary_matrix_utils import to_sparse_format, to_scipy_csr_matrix
# samp_graph_arr, samp_node_nbrs, samp_graph_idx = batch_sample(graph_arr_fin, nodes_nbrs_fin, graph_idxs_fin, 10)
node_rows, node_cols, ones = to_sparse_format(nodes_nbrs_fin)
# nodes_nbrs_sparse = to_scipy_csr_matrix(nodes_nbrs_fin)

In [32]:
objgraph.most_common_types(limit=5)


Out[32]:
[('list', 1293156),
 ('function', 30612),
 ('dict', 12781),
 ('tuple', 10676),
 ('weakref', 4554)]

In [33]:
len(nodes_nbrs_fin)


Out[33]:
622671

In [34]:
from scipy.sparse import csr_matrix 
nodes_nbrs_compressed = csr_matrix((ones, (node_rows, node_cols)), shape=(len(nodes_nbrs_fin), len(nodes_nbrs_fin)))
nodes_nbrs_compressed


Out[34]:
<622671x622671 sparse matrix of type '<class 'numpy.int32'>'
	with 2990113 stored elements in Compressed Sparse Row format>

In [35]:
objgraph.most_common_types(limit=5)


Out[35]:
[('list', 1293160),
 ('function', 30612),
 ('dict', 12782),
 ('tuple', 10685),
 ('weakref', 4557)]

In [36]:
# %%prun
def predict(wb_struct, inputs, nodes_nbrs_compressed, graph_idxs, layers):
    curr_inputs = inputs
    
    for i, layer in enumerate(layers):
        wb = wb_struct['layer{0}_{1}'.format(i, layer)]
        curr_inputs = layer.forward_pass(wb, curr_inputs, nodes_nbrs_compressed, graph_idxs)
    return curr_inputs


predict(wb, graph_arr_fin, nodes_nbrs_compressed, graph_idxs_fin, layers).shape


Out[36]:
(3200, 1)

In [37]:
# %%prun

# Prototype train_loss function
wb_vect, unflattener = flatten(wb)

def get_actual(graph_idxs, df, preds):
    sorted_graphs = sorted(graph_idxs.keys())
    # print(sorted_graphs)
    sorted_resistances = df[df['seqid'].isin(sorted_graphs)].set_index('seqid').ix[sorted_graphs]['FPV'].values
    # print(sorted_resistances)
    actual = sorted_resistances.reshape(preds.shape)
    
    return actual

train_losses = []
preds_iter = []
actual_iter = []

def train_loss(wb_vect, unflattener):
    
    # Old version - sample one random one each time.
    # ----------------------------------------------
    # samp_graph_arr, samp_nodes_nbrs, samp_graph_idxs = batch_sample(graph_arr_fin, nodes_nbrs_fin, graph_idxs_fin, 1)
    # wb_struct = unflattener(wb_vect)
    # preds = predict(wb_struct, samp_graph_arr, samp_nodes_nbrs, samp_graph_idxs, layers)
    
    # New version - train on just one sample.
    # Uses code in cell above.
    # ---------------------------------------
    wb_struct = unflattener(wb_vect)
    preds = predict(wb_struct, graph_arr_fin, nodes_nbrs_compressed, graph_idxs_fin, layers)
    graph_scores = get_actual(graph_idxs_fin, df, preds)
    mse = np.mean(np.power(preds - graph_scores, 2))
    
#     train_losses.append(mse)
#     preds_iter.append(preds)
#     actual_iter.append(graph_scores)
    # print(mse)
    return mse

train_loss(wb_vect, unflattener)


Out[37]:
2372.9289541595585

In [38]:
objgraph.most_common_types(limit=5)


Out[38]:
[('list', 1293173),
 ('function', 30619),
 ('dict', 12794),
 ('tuple', 10687),
 ('weakref', 4560)]

In [39]:
gradfunc = grad(train_loss)

In [40]:
gradfunc(wb_vect, unflattener)


Out[40]:
array([ -1.99004175e+00,  -9.95647841e-03,   4.87130438e-01, ...,
        -5.24873985e+02,  -7.65882729e+02,  -1.23648463e-01])

In [41]:
objgraph.most_common_types(limit=5)


Out[41]:
[('list', 1315651),
 ('function', 43455),
 ('tuple', 36361),
 ('cell', 23731),
 ('dict', 15995)]

In [44]:
from pympler import muppy, summary

In [43]:
all_objects = muppy.get_objects()

In [45]:
sum1 = summary.summarize(all_objects)
summary.print_(sum1)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-45-9ac36a350e08> in <module>()
----> 1 sum1 = summary.summarize(all_objects)
      2 summary.print_(sum1)

/Users/ericmjl/anaconda/lib/python3.5/site-packages/pympler/summary.py in summarize(objects)
    129         else:
    130             count[otype] = 1
--> 131             total_size[otype] = _getsizeof(o)
    132     rows = []
    133     for otype in count:

/Users/ericmjl/anaconda/lib/python3.5/site-packages/pandas/core/base.py in __sizeof__(self)
    128         # no memory_usage attribute, so fall back to
    129         # object's 'sizeof'
--> 130         return super(self, PandasObject).__sizeof__()
    131 
    132 

TypeError: must be type, not FrozenList

In [44]:
top_stats = post_grad.compare_to(pre_grad, 'traceback')

In [45]:
print("[ Top 10 differences ]")
for stat in top_stats[:10]:
    print(stat)


[ Top 10 differences ]
/Users/ericmjl/Documents/github/autograd/autograd/core.py:121: size=808 KiB (+806 KiB), count=12928 (+12892), average=64 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_extra.py:11: size=700 KiB (+700 KiB), count=12798 (+12798), average=56 B
/Users/ericmjl/Documents/github/autograd/autograd/core.py:226: size=677 KiB (+677 KiB), count=9634 (+9634), average=72 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_extra.py:154: size=487 KiB (+487 KiB), count=6399 (+6399), average=78 B
/Users/ericmjl/Documents/github/autograd/autograd/core.py:99: size=459 KiB (+459 KiB), count=9657 (+9657), average=49 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_grads.py:442: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_grads.py:402: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_grads.py:263: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_extra.py:13: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/core.py:211: size=401 KiB (+401 KiB), count=6414 (+6414), average=64 B

In [31]:
from graphfp.optimizers import adam

In [32]:
import gc
from time import time
training_losses = []
def callback(wb, i):
    start = time()
    tl = train_loss(*flatten(wb))
    if i % 1 == 0:
        print(tl, time() - start)
    training_losses.append(tl)
    gc.collect()

In [ ]:
# adam(gradfunc, wb, callback=callback, num_iters=10)
wb_vect, unflattener = adam(gradfunc, wb, callback=callback, num_iters=5)

In [ ]:
# %matplotlib inline

import matplotlib.pyplot as plt
from autograd.core import getval

plt.plot([getval(i) for i in train_losses])
plt.yscale('log')

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: