notebook.community

Edit and run



In [1]:

    
from autograd import grad
from memorytools import summarize_objects

import autograd.numpy as np
import pickle as pkl
import json
import pandas as pd
import matplotlib.pyplot as plt
import tracemalloc
import objgraph

%matplotlib inline
%load_ext autoreload
%autoreload 2









    



/Users/ericmjl/anaconda/lib/python3.5/site-packages/IPython/kernel/__init__.py:13: ShimWarning: The `IPython.kernel` package has been deprecated. You should import from ipykernel or jupyter_client instead.
  "You should import from ipykernel or jupyter_client instead.", ShimWarning)



In [2]:

    
tracemalloc.start()



In [3]:

    
# Open the data file that contains the HIV protease data

df = pd.read_csv('../data/hiv_data/hiv-protease-data-expanded.csv', index_col=0)
df = df.dropna(subset=['FPV'])
df.head()









    Out[3]:






  
    
      
      ATV
      DRV
      FPV
      IDV
      LPV
      NFV
      SQV
      SeqID
      TPV
      seqid
      sequence
      sequence_object
      weight
    
  
  
    
      0
      NaN
      NaN
      2.5
      16.3
      NaN
      38.6
      16.1
      2996
      NaN
      2996-0
      PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDVNLPGRWKPKM...
      ID: 2996-0\nName: <unknown name>\nDescription:...
      0.50
    
    
      1
      NaN
      NaN
      2.5
      16.3
      NaN
      38.6
      16.1
      2996
      NaN
      2996-1
      PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDVNLPGRWKPKM...
      ID: 2996-1\nName: <unknown name>\nDescription:...
      0.50
    
    
      2
      NaN
      NaN
      0.7
      0.8
      NaN
      0.8
      1.1
      4387
      NaN
      4387-0
      PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM...
      ID: 4387-0\nName: <unknown name>\nDescription:...
      0.25
    
    
      3
      NaN
      NaN
      0.7
      0.8
      NaN
      0.8
      1.1
      4387
      NaN
      4387-1
      PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM...
      ID: 4387-1\nName: <unknown name>\nDescription:...
      0.25
    
    
      4
      NaN
      NaN
      0.7
      0.8
      NaN
      0.8
      1.1
      4387
      NaN
      4387-2
      PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM...
      ID: 4387-2\nName: <unknown name>\nDescription:...
      0.25



In [4]:

    
# Open the numpy array of all graphs' data.
graph_arr = np.load('../data/feat_array.npy')



In [5]:

    
# Open the pickles that contain the graph information and node-nbr information.
def unpickle_data(path):
    with open(path, 'rb') as f:
        data = pkl.load(f)
    return data

graph_idxs = unpickle_data('../data/graph_idxs.pkl')
graph_nodes = unpickle_data('../data/graph_nodes.pkl')
nodes_nbrs = unpickle_data('../data/nodes_nbrs.pkl')



In [6]:

    
list(graph_idxs.keys())[0:5]
# len(graph_idxs.keys())









    Out[6]:





['109452-3', '68072-3', '50656-1', '28212-1', '148081-7']



In [7]:

    
list(graph_nodes.items())[0]









    Out[7]:





('109452-3',
 {277190: 'A70LYS',
  277191: 'A34GLU',
  277192: 'B73GLY',
  277193: 'A36MET',
  277194: 'B49GLY',
  277195: 'A12THR',
  277196: 'A24LEU',
  277197: 'B68GLY',
  277198: 'B4THR',
  277199: 'A88ASN',
  277200: 'A63LEU',
  277201: 'B2GLN',
  277202: 'A29ASP',
  277203: 'B90LEU',
  277204: 'A6TRP',
  277205: 'B93LEU',
  277206: 'A32VAL',
  277207: 'A3ILE',
  277208: 'B16GLY',
  277209: 'B75VAL',
  277210: 'A42TRP',
  277211: 'B48GLY',
  277212: 'A53PHE',
  277213: 'A85ILE',
  277214: 'B6TRP',
  277215: 'A94GLY',
  277216: 'B30ASP',
  277217: 'A52GLY',
  277218: 'B66ILE',
  277219: 'B63LEU',
  277220: 'B98ASN',
  277221: 'B70LYS',
  277222: 'B32VAL',
  277223: 'A2GLN',
  277224: 'B79PRO',
  277225: 'A44PRO',
  277226: 'A22ALA',
  277227: 'A82VAL',
  277228: 'B28ALA',
  277229: 'B94GLY',
  277230: 'B45ARG',
  277231: 'B22ALA',
  277232: 'B40GLY',
  277233: 'A19ILE',
  277234: 'A33VAL',
  277235: 'B76LEU',
  277236: 'B15ILE',
  277237: 'A28ALA',
  277238: 'A71VAL',
  277239: 'B57ARG',
  277240: 'B71VAL',
  277241: 'B78GLY',
  277242: 'A65LYS',
  277243: 'A20LYS',
  277244: 'A59TYR',
  277245: 'B14GLU',
  277246: 'A13ILE',
  277247: 'B18GLN',
  277248: 'A21GLU',
  277249: 'A96THR',
  277250: 'B13ILE',
  277251: 'A39PRO',
  277252: 'B55LYS',
  277253: 'A25ASP',
  277254: 'B64VAL',
  277255: 'A90LEU',
  277256: 'B9PRO',
  277257: 'A55LYS',
  277258: 'B7GLN',
  277259: 'B10LEU',
  277260: 'B38LEU',
  277261: 'A17GLY',
  277262: 'B42TRP',
  277263: 'B54ILE',
  277264: 'A43LYS',
  277265: 'A5LEU',
  277266: 'B58GLN',
  277267: 'A64VAL',
  277268: 'A73GLY',
  277269: 'B24LEU',
  277270: 'B21GLU',
  277271: 'A35GLU',
  277272: 'B19ILE',
  277273: 'A66ILE',
  277274: 'B51GLY',
  277275: 'B84ILE',
  277276: 'A46MET',
  277277: 'A18GLN',
  277278: 'B81PRO',
  277279: 'A89LEU',
  277280: 'A56VAL',
  277281: 'A78GLY',
  277282: 'B89LEU',
  277283: 'A77VAL',
  277284: 'B61GLN',
  277285: 'B17GLY',
  277286: 'B41LYS',
  277287: 'A60ASP',
  277288: 'B74THR',
  277289: 'B43LYS',
  277290: 'B65LYS',
  277291: 'B72ILE',
  277292: 'B3ILE',
  277293: 'A23LEU',
  277294: 'A68GLY',
  277295: 'B39PRO',
  277296: 'A9PRO',
  277297: 'B96THR',
  277298: 'A61GLN',
  277299: 'A1PRO',
  277300: 'A83ASN',
  277301: 'A11VAL',
  277302: 'A27GLY',
  277303: 'A10LEU',
  277304: 'B23LEU',
  277305: 'B77VAL',
  277306: 'B44PRO',
  277307: 'A7GLN',
  277308: 'A45ARG',
  277309: 'B95CYS',
  277310: 'A62VAL',
  277311: 'B67CYS',
  277312: 'A91THR',
  277313: 'A92GLN',
  277314: 'A8ARG',
  277315: 'A79PRO',
  277316: 'A69TYR',
  277317: 'B86GLY',
  277318: 'A47ILE',
  277319: 'A84ILE',
  277320: 'B37CYS',
  277321: 'A75VAL',
  277322: 'A50ILE',
  277323: 'B36MET',
  277324: 'A16GLY',
  277325: 'B34GLU',
  277326: 'A93LEU',
  277327: 'A87ARG',
  277328: 'A98ASN',
  277329: 'B47ILE',
  277330: 'B11VAL',
  277331: 'B87ARG',
  277332: 'A67CYS',
  277333: 'A72ILE',
  277334: 'A14GLU',
  277335: 'B80THR',
  277336: 'A57ARG',
  277337: 'B97LEU',
  277338: 'B60ASP',
  277339: 'B82VAL',
  277340: 'A31THR',
  277341: 'A99PHE',
  277342: 'B69TYR',
  277343: 'B83ASN',
  277344: 'B33VAL',
  277345: 'B46MET',
  277346: 'A74THR',
  277347: 'B20LYS',
  277348: 'B35GLU',
  277349: 'B12THR',
  277350: 'A37CYS',
  277351: 'A54ILE',
  277352: 'B53PHE',
  277353: 'A4THR',
  277354: 'B85ILE',
  277355: 'A76LEU',
  277356: 'B59TYR',
  277357: 'A58GLN',
  277358: 'B27GLY',
  277359: 'A49GLY',
  277360: 'B52GLY',
  277361: 'A41LYS',
  277362: 'A26THR',
  277363: 'A15ILE',
  277364: 'B62VAL',
  277365: 'B31THR',
  277366: 'B1PRO',
  277367: 'A95CYS',
  277368: 'B26THR',
  277369: 'A40GLY',
  277370: 'A30ASP',
  277371: 'B29ASP',
  277372: 'B99PHE',
  277373: 'A51GLY',
  277374: 'A80THR',
  277375: 'A86GLY',
  277376: 'A38LEU',
  277377: 'B5LEU',
  277378: 'A81PRO',
  277379: 'B92GLN',
  277380: 'B91THR',
  277381: 'B25ASP',
  277382: 'A97LEU',
  277383: 'B88ASN',
  277384: 'B50ILE',
  277385: 'A48GLY',
  277386: 'B56VAL',
  277387: 'B8ARG'})



In [8]:

    
summarize_objects()









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-c46dbac041c7> in <module>()
----> 1 summarize_objects()

/Users/ericmjl/Documents/github/memory-tools/src/memorytools/__init__.py in summarize_objects(objs, echo, limit)
     90     objs = gc.get_objects()
     91 
---> 92   objs_dict = _summarize_objects(objs)
     93   size_summary = ['{0:>10s} {1:>5s} {2}'.format('Size', 'Count', 'Type')]
     94   count_summary = ['{0:>5s} {1:>10s} {2}'.format('Count', 'Size', 'Type')]

/Users/ericmjl/Documents/github/memory-tools/src/memorytools/__init__.py in _summarize_objects(objs)
    133   for obj in objs:
    134     _incr(objs_dict, type(obj), 'count')
--> 135     _incr(objs_dict, type(obj), 'size', sys.getsizeof(obj))
    136 
    137   return objs_dict

/Users/ericmjl/anaconda/lib/python3.5/site-packages/pandas/core/base.py in __sizeof__(self)
    128         # no memory_usage attribute, so fall back to
    129         # object's 'sizeof'
--> 130         return super(self, PandasObject).__sizeof__()
    131 
    132 

TypeError: must be type, not FrozenList



In [10]:

    
list(nodes_nbrs.items())[0]









    Out[10]:





(0, [0, 23, 46])



In [11]:

    
# Keep track of only those that are in both the graph_idxs and in the df['seqid']
intersect = set(df['seqid'].values).intersection(graph_idxs.keys())
len(intersect)









    Out[11]:





3200



In [12]:

    
# Get a reduced list of graph_idxs.
graph_idxs_red = dict()
graph_nodes_red = dict()
for g in intersect:
    graph_idxs_red[g] = graph_idxs[g]
    graph_nodes_red[g] = graph_nodes[g]



In [13]:

    
graph_idxs_red['46213-0']



In [14]:

    
graph_arr.shape









    Out[14]:





(659895, 36)



In [15]:

    
# Make one pass over the data to get the old/new index mapping, and
# make the final graph_array that gets passed in as an input.

def reindex_data_matrix(graph_idxs_red, graph_arr):
    """
    Parameters:
    ===========
    - graph_idxs_red: reduced graph indices
    - graph_arr: the original matrix of (nodes by node_features)
    
    Returns:
    ========
    - graph_arr_fin: a reduced matrix of (nodes by node_features)
    - nodes_oldnew, nodes_newold: mapping of new and old indices.
    """
    # Initialize a zero-matrix. 
    idxs = np.concatenate([i for i in graph_idxs_red.values()])
    graph_arr_fin = np.zeros(shape=graph_arr[idxs].shape)

    # Initialize empty maps of graph indices from the old to the new.
    nodes_oldnew = dict()  # {old_idx: new_idx}.
    nodes_newold = dict()  # {new_idx: old_idx}

    # Re-assign reduced graphs to the zero-matrix.
    curr_idx = 0
    for seqid, idxs in sorted(graph_idxs_red.items()):
        for idx in idxs:
            nodes_oldnew[idx] = curr_idx
            nodes_newold[curr_idx] = idx
            graph_arr_fin[curr_idx] = graph_arr[idx]
            curr_idx += 1
    return graph_arr_fin, nodes_oldnew, nodes_newold

graph_arr_fin, nodes_oldnew, nodes_newold = reindex_data_matrix(graph_idxs_red, graph_arr)



In [16]:

    
graph_arr_fin.shape









    Out[16]:





(622671, 36)



In [17]:

    
len(nodes_oldnew)









    Out[17]:





622671



In [18]:

    
len(nodes_newold)









    Out[18]:





622671



In [19]:

    
# Check a random sample of the indices to make sure that they are sampled correctly.
from random import sample

n_samples = 10000
rnd_idxs = sample([i for i in range(graph_arr_fin.shape[0])], n_samples)
for new_idx in rnd_idxs:
    assert np.all(np.equal(graph_arr_fin[new_idx], graph_arr[nodes_newold[new_idx]]))



In [20]:

    
objgraph.most_common_types(limit=5)









    Out[20]:





[('list', 666814),
 ('function', 26707),
 ('dict', 10797),
 ('tuple', 9310),
 ('weakref', 4117)]



In [21]:

    
graph_arr_fin.shape









    Out[21]:





(622671, 36)



In [22]:

    
# Finally, rework the nodes_nbrs, graph_idxs, and graph_nodes dictionaries with the corrected idxs.
# THIS IS THE KEY STEP! MUST ENCAPSULATE IN A FUNCTION!
from collections import defaultdict

def filter_and_reindex_nodes_and_neighbors(nodes_nbrs, nodes_oldnew):
    """
    - nodes_nbrs: a dictionary of nodes and their neighbors.
    - nodes_oldnew: a dictionary mapping old node indices to their new node indices.
    """
    nodes_nbrs_fin = defaultdict(list)
    
    for node, nbrs in sorted(nodes_nbrs.items()):
        if node in nodes_oldnew.keys():  # 
            for nbr in nbrs:
                nodes_nbrs_fin[nodes_oldnew[node]].append(nodes_oldnew[nbr])
    return nodes_nbrs_fin

nodes_nbrs_fin = filter_and_reindex_nodes_and_neighbors(nodes_nbrs, nodes_oldnew)



In [23]:

    
objgraph.most_common_types(limit=5)









    Out[23]:





[('list', 1289467),
 ('function', 26706),
 ('dict', 10751),
 ('tuple', 9283),
 ('weakref', 4121)]



In [24]:

    
def filter_and_reindex_graph_idxs(graph_idxs, nodes_oldnew):
    """
    - graph_idxs: a dictionary of graphs and their original indices.
    - nodes_oldnew: a dictionary mapping old node indices to their new node indices.
    """
    graph_idxs_fin = defaultdict(list)
    for seqid, nodes in sorted(graph_idxs.items()):
        for node in nodes:
            if node in nodes_oldnew.keys():
                graph_idxs_fin[seqid].append(nodes_oldnew[node])
    return graph_idxs_fin

graph_idxs_fin = filter_and_reindex_graph_idxs(graph_idxs, nodes_oldnew)



In [25]:

    
def filter_and_reindex_graph_nodes(graph_nodes, nodes_oldnew):
    """
    - graph_nodes: a dictionary mapping graphs to their dictionary mapping indices to node names.
    - nodes_oldnew: a dictionary mapping old node indices to their new node indices.
    """    
    graph_nodes_fin = defaultdict(dict)
    for seqid, idx_node in sorted(graph_nodes.items()):
        for old_idx, node_name in idx_node.items():
            if old_idx in nodes_oldnew.keys():
                graph_nodes_fin[seqid][nodes_oldnew[old_idx]] = node_name
    return graph_nodes_fin

graph_nodes_fin = filter_and_reindex_graph_nodes(graph_nodes, nodes_oldnew)



In [26]:

    
objgraph.most_common_types(limit=5)









    Out[26]:





[('list', 1292671),
 ('function', 26708),
 ('dict', 10751),
 ('tuple', 9286),
 ('weakref', 4124)]



In [27]:

    
from graphfp.layers import FingerprintLayer, LinearRegressionLayer, GraphConvLayer
from graphfp.utils import initialize_network
from pyflatten import flatten

layers = [GraphConvLayer(weights_shape=(36, 36), biases_shape=(1, 36)),
          FingerprintLayer(weights_shape=(36, 36), biases_shape=(1, 36)),
          LinearRegressionLayer(weights_shape=(36, 1), biases_shape=(1, 1)),
]

wb = initialize_network(layers_spec=layers)
wb_vect, unflattener = flatten(wb)



In [28]:

    
objgraph.most_common_types(limit=5)









    Out[28]:





[('list', 1293151),
 ('function', 30607),
 ('dict', 12794),
 ('tuple', 10839),
 ('weakref', 4550)]



In [29]:

    
# from random import sample

# def batch_sample(inputs, nodes_nbrs, graph_idxs, n_graphs):
#     """
#     Randomly samples n_graphs from all of the graphs, returns new inputs,
#     node_nbr dictionary, and graph_idx dictionary.
#     """
#     samp_graph_idxs = dict(sample(graph_idxs.items(), n_graphs))
#     assert len(samp_graph_idxs) == n_graphs, "There was an error in sampling."
#     concat_samp_idxs = np.concatenate([v for k, v in sorted(samp_graph_idxs.items())])
#     # print('Samp Idxs Shape')
#     # print(concat_samp_idxs.shape)
#     samp_nodes_nbrs = {i: nodes_nbrs[i] for i in concat_samp_idxs}
#     assert len(samp_nodes_nbrs) == len(concat_samp_idxs)

#     samp_inputs, samp_nodes_oldnew, samp_nodes_newold = reindex_data_matrix(samp_graph_idxs, inputs)
    
#     samp_nodes_nbrs = filter_and_reindex_nodes_and_neighbors(samp_nodes_nbrs, samp_nodes_oldnew)
#     samp_graph_idxs = filter_and_reindex_graph_idxs(samp_graph_idxs, samp_nodes_oldnew)
    
#     return samp_inputs, samp_nodes_nbrs, samp_graph_idxs

# n_sampled_graphs = 100
# samp_inputs, samp_nodes_nbrs, samp_graph_idxs = batch_sample(graph_arr_fin, nodes_nbrs_fin, graph_idxs_fin, n_sampled_graphs)

# assert samp_inputs.shape[1] == 36
# assert len(samp_nodes_nbrs) == samp_inputs.shape[0]
# assert len(samp_graph_idxs) == n_sampled_graphs



In [30]:

    
# # Scratchpad cell
# samp_graph_idxs = dict(sample(graph_idxs.items(), 10))
# assert len(samp_graph_idxs) == 10
# concat_samp_idxs = np.concatenate([i for i in samp_graph_idxs.values()])
# samp_nodes_nbrs = {i: nodes_nbrs[i] for i in concat_samp_idxs}
# assert len(samp_nodes_nbrs) == len(concat_samp_idxs)



In [31]:

    
# Used in conjunction with train_loss function in cell below.
from graphfp.binary_matrix_utils import to_sparse_format, to_scipy_csr_matrix
# samp_graph_arr, samp_node_nbrs, samp_graph_idx = batch_sample(graph_arr_fin, nodes_nbrs_fin, graph_idxs_fin, 10)
node_rows, node_cols, ones = to_sparse_format(nodes_nbrs_fin)
# nodes_nbrs_sparse = to_scipy_csr_matrix(nodes_nbrs_fin)



In [32]:

    
objgraph.most_common_types(limit=5)









    Out[32]:





[('list', 1293156),
 ('function', 30612),
 ('dict', 12781),
 ('tuple', 10676),
 ('weakref', 4554)]



In [33]:

    
len(nodes_nbrs_fin)









    Out[33]:





622671



In [34]:

    
from scipy.sparse import csr_matrix 
nodes_nbrs_compressed = csr_matrix((ones, (node_rows, node_cols)), shape=(len(nodes_nbrs_fin), len(nodes_nbrs_fin)))
nodes_nbrs_compressed









    Out[34]:





<622671x622671 sparse matrix of type '<class 'numpy.int32'>'
	with 2990113 stored elements in Compressed Sparse Row format>



In [35]:

    
objgraph.most_common_types(limit=5)









    Out[35]:





[('list', 1293160),
 ('function', 30612),
 ('dict', 12782),
 ('tuple', 10685),
 ('weakref', 4557)]



In [36]:

    
# %%prun
def predict(wb_struct, inputs, nodes_nbrs_compressed, graph_idxs, layers):
    curr_inputs = inputs
    
    for i, layer in enumerate(layers):
        wb = wb_struct['layer{0}_{1}'.format(i, layer)]
        curr_inputs = layer.forward_pass(wb, curr_inputs, nodes_nbrs_compressed, graph_idxs)
    return curr_inputs


predict(wb, graph_arr_fin, nodes_nbrs_compressed, graph_idxs_fin, layers).shape









    Out[36]:





(3200, 1)



In [37]:

    
# %%prun

# Prototype train_loss function
wb_vect, unflattener = flatten(wb)

def get_actual(graph_idxs, df, preds):
    sorted_graphs = sorted(graph_idxs.keys())
    # print(sorted_graphs)
    sorted_resistances = df[df['seqid'].isin(sorted_graphs)].set_index('seqid').ix[sorted_graphs]['FPV'].values
    # print(sorted_resistances)
    actual = sorted_resistances.reshape(preds.shape)
    
    return actual

train_losses = []
preds_iter = []
actual_iter = []

def train_loss(wb_vect, unflattener):
    
    # Old version - sample one random one each time.
    # ----------------------------------------------
    # samp_graph_arr, samp_nodes_nbrs, samp_graph_idxs = batch_sample(graph_arr_fin, nodes_nbrs_fin, graph_idxs_fin, 1)
    # wb_struct = unflattener(wb_vect)
    # preds = predict(wb_struct, samp_graph_arr, samp_nodes_nbrs, samp_graph_idxs, layers)
    
    # New version - train on just one sample.
    # Uses code in cell above.
    # ---------------------------------------
    wb_struct = unflattener(wb_vect)
    preds = predict(wb_struct, graph_arr_fin, nodes_nbrs_compressed, graph_idxs_fin, layers)
    graph_scores = get_actual(graph_idxs_fin, df, preds)
    mse = np.mean(np.power(preds - graph_scores, 2))
    
#     train_losses.append(mse)
#     preds_iter.append(preds)
#     actual_iter.append(graph_scores)
    # print(mse)
    return mse

train_loss(wb_vect, unflattener)









    Out[37]:





2372.9289541595585



In [38]:

    
objgraph.most_common_types(limit=5)









    Out[38]:





[('list', 1293173),
 ('function', 30619),
 ('dict', 12794),
 ('tuple', 10687),
 ('weakref', 4560)]



In [39]:

    
gradfunc = grad(train_loss)



In [40]:

    
gradfunc(wb_vect, unflattener)









    Out[40]:





array([ -1.99004175e+00,  -9.95647841e-03,   4.87130438e-01, ...,
        -5.24873985e+02,  -7.65882729e+02,  -1.23648463e-01])



In [41]:

    
objgraph.most_common_types(limit=5)









    Out[41]:





[('list', 1315651),
 ('function', 43455),
 ('tuple', 36361),
 ('cell', 23731),
 ('dict', 15995)]



In [44]:

    
from pympler import muppy, summary



In [43]:

    
all_objects = muppy.get_objects()



In [45]:

    
sum1 = summary.summarize(all_objects)
summary.print_(sum1)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-45-9ac36a350e08> in <module>()
----> 1 sum1 = summary.summarize(all_objects)
      2 summary.print_(sum1)

/Users/ericmjl/anaconda/lib/python3.5/site-packages/pympler/summary.py in summarize(objects)
    129         else:
    130             count[otype] = 1
--> 131             total_size[otype] = _getsizeof(o)
    132     rows = []
    133     for otype in count:

/Users/ericmjl/anaconda/lib/python3.5/site-packages/pandas/core/base.py in __sizeof__(self)
    128         # no memory_usage attribute, so fall back to
    129         # object's 'sizeof'
--> 130         return super(self, PandasObject).__sizeof__()
    131 
    132 

TypeError: must be type, not FrozenList



In [44]:

    
top_stats = post_grad.compare_to(pre_grad, 'traceback')



In [45]:

    
print("[ Top 10 differences ]")
for stat in top_stats[:10]:
    print(stat)









    



[ Top 10 differences ]
/Users/ericmjl/Documents/github/autograd/autograd/core.py:121: size=808 KiB (+806 KiB), count=12928 (+12892), average=64 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_extra.py:11: size=700 KiB (+700 KiB), count=12798 (+12798), average=56 B
/Users/ericmjl/Documents/github/autograd/autograd/core.py:226: size=677 KiB (+677 KiB), count=9634 (+9634), average=72 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_extra.py:154: size=487 KiB (+487 KiB), count=6399 (+6399), average=78 B
/Users/ericmjl/Documents/github/autograd/autograd/core.py:99: size=459 KiB (+459 KiB), count=9657 (+9657), average=49 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_grads.py:442: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_grads.py:402: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_grads.py:263: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/numpy/numpy_extra.py:13: size=425 KiB (+425 KiB), count=3200 (+3200), average=136 B
/Users/ericmjl/Documents/github/autograd/autograd/core.py:211: size=401 KiB (+401 KiB), count=6414 (+6414), average=64 B



In [31]:

    
from graphfp.optimizers import adam



In [32]:

    
import gc
from time import time
training_losses = []
def callback(wb, i):
    start = time()
    tl = train_loss(*flatten(wb))
    if i % 1 == 0:
        print(tl, time() - start)
    training_losses.append(tl)
    gc.collect()



In [ ]:

    
# adam(gradfunc, wb, callback=callback, num_iters=10)
wb_vect, unflattener = adam(gradfunc, wb, callback=callback, num_iters=5)



In [ ]:

    
# %matplotlib inline

import matplotlib.pyplot as plt
from autograd.core import getval

plt.plot([getval(i) for i in train_losses])
plt.yscale('log')



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	ATV	DRV	FPV	IDV	LPV	NFV	SQV	SeqID	TPV	seqid	sequence	sequence_object	weight
0	NaN	NaN	2.5	16.3	NaN	38.6	16.1	2996	NaN	2996-0	PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDVNLPGRWKPKM...	ID: 2996-0\nName: <unknown name>\nDescription:...	0.50
1	NaN	NaN	2.5	16.3	NaN	38.6	16.1	2996	NaN	2996-1	PQITLWQRPIVTIKIGGQLKEALLDTGADDTVLEDVNLPGRWKPKM...	ID: 2996-1\nName: <unknown name>\nDescription:...	0.50
2	NaN	NaN	0.7	0.8	NaN	0.8	1.1	4387	NaN	4387-0	PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM...	ID: 4387-0\nName: <unknown name>\nDescription:...	0.25
3	NaN	NaN	0.7	0.8	NaN	0.8	1.1	4387	NaN	4387-1	PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM...	ID: 4387-1\nName: <unknown name>\nDescription:...	0.25
4	NaN	NaN	0.7	0.8	NaN	0.8	1.1	4387	NaN	4387-2	PQITLWQRPLVTIKVGGQLKEALLDTGADDTVLEDMELPGRWKPKM...	ID: 4387-2\nName: <unknown name>\nDescription:...	0.25