Digit Classification with Neural Networks

Interest in neural nets, and in particular those with more than than one hidden layer, has been surging in recent years. In this notebook we will be revisiting the problem of digit classification on the MNIST data. We will be introducing a new Python library, Theano, for working with neural nets. Theano is a popular choice as the same code can be run on either CPUs or GPUs. GPUs greatly speed up the training and prediction, and easily available (Amazon even offers GPU machines on EC2).

Part 1: Basics

In part 1, we will review the basics of neural nets, and introduce Theano. In part 2, we will investigate more advanced topics in neural nets, including deep learning. I'd encourage you to read this paper as well as a supplementary explanation of Theano (http://www.iro.umontreal.ca/~lisa/pointeurs/theano_scipy2010.pdf).


In [1]:
%matplotlib inline

# Familiar libraries.
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
import time

# Take a moment to install Theano.  We will use it for building neural networks.
import theano 
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
print theano.config.device # We're using CPUs (for now)
print theano.config.floatX # Should be 64 bit for CPUs

np.random.seed(0)


WARNING (theano.configdefaults): g++ not detected ! Theano will be unable to execute optimized C-implementations (for both CPU and GPU) and will default to Python implementations. Performance will be severely degraded. To remove this warning, set Theano flags cxx to an empty string.
WARNING:theano.configdefaults:g++ not detected ! Theano will be unable to execute optimized C-implementations (for both CPU and GPU) and will default to Python implementations. Performance will be severely degraded. To remove this warning, set Theano flags cxx to an empty string.
===============================
===============================
'g++' is not recognized as an internal or external command,
operable program or batch file.

00001	#include <Python.h>
00002	#include "structmember.h"
00003	#include <sys/time.h>
00004	
00005	// Old Python compatibility from here:
00006	// http://www.python.org/dev/peps/pep-0353/
00007	#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
00008	typedef int Py_ssize_t;
00009	#define PY_SSIZE_T_MAX INT_MAX
00010	#define PY_SSIZE_T_MIN INT_MIN
00011	// This one was taken from:
00012	// http://svn.python.org/projects/python/trunk/Modules/_ctypes/ctypes.h
00013	#define PyNumber_AsSsize_t(ob, exc) PyInt_AsLong(ob)
00014	#endif
00015	
00016	#if PY_VERSION_HEX >= 0x03000000
00017	#include "numpy/npy_3kcompat.h"
00018	#define PyCObject_AsVoidPtr  NpyCapsule_AsVoidPtr
00019	#define PyCObject_GetDesc  NpyCapsule_GetDesc
00020	#define PyCObject_Check NpyCapsule_Check
00021	#endif
00022	
00023	#ifndef Py_TYPE
00024	#define Py_TYPE(obj) obj->ob_type
00025	#endif
00026	
00027	/**
00028	
00029	TODO: 
00030	- Check max supported depth of recursion
00031	- CLazyLinker should add context information to errors caught during evaluation. Say what node we were on, add the traceback attached to the node.
00032	- Clear containers of fully-useed intermediate results if allow_gc is 1
00033	- Add timers for profiling
00034	- Add support for profiling space used.
00035	
00036	
00037	  */
00038	static double pytime(const struct timeval * tv)
00039	{
00040	  struct timeval t;
00041	  if (!tv)
00042	    {
00043	      tv = &t;
00044	      gettimeofday(&t, NULL);
00045	    }
00046	  return (double) tv->tv_sec + (double) tv->tv_usec / 1000000.0;
00047	}
00048	
00049	/**
00050	  Helper routine to convert a PyList of integers to a c array of integers.
00051	  */
00052	static int unpack_list_of_ssize_t(PyObject * pylist, Py_ssize_t **dst, Py_ssize_t *len,
00053	                                  const char* kwname)
00054	{
00055	  Py_ssize_t buflen, *buf;
00056	  if (!PyList_Check(pylist))
00057	    {
00058	      PyErr_Format(PyExc_TypeError, "%s must be list", kwname);
00059	      return -1;
00060	    }
00061	  assert (NULL == *dst);
00062	  *len = buflen = PyList_Size(pylist);
00063	  *dst = buf = (Py_ssize_t*)calloc(buflen, sizeof(Py_ssize_t));
00064	  assert(buf);
00065	  for (int ii = 0; ii < buflen; ++ii)
00066	    {
00067	      PyObject * el_i = PyList_GetItem(pylist, ii);
00068	      Py_ssize_t n_i = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
00069	      if (PyErr_Occurred())
00070	        {
00071	          free(buf);
00072	          *dst = NULL;
00073	          return -1;
00074	        }
00075	      buf[ii] = n_i;
00076	    }
00077	  return 0;
00078	}
00079	
00080	/**
00081	
00082	  CLazyLinker
00083	
00084	
00085	  */
00086	typedef struct {
00087	    PyObject_HEAD
00088	    /* Type-specific fields go here. */
00089	    PyObject * nodes; // the python list of nodes
00090	    PyObject * thunks; // python list of thunks
00091	    PyObject * pre_call_clear; //list of cells to clear on call.
00092	    int allow_gc;
00093	    Py_ssize_t n_applies;
00094	    int n_vars;    // number of variables in the graph
00095	    int * var_computed; // 1 or 0 for every variable
00096	    PyObject ** var_computed_cells;
00097	    PyObject ** var_value_cells;
00098	    Py_ssize_t **dependencies; // list of vars dependencies for GC
00099	    Py_ssize_t *n_dependencies;
00100	
00101	    Py_ssize_t n_output_vars;
00102	    Py_ssize_t * output_vars; // variables that *must* be evaluated by call
00103	
00104	    int * is_lazy; // 1 or 0 for every thunk
00105	
00106	    Py_ssize_t * var_owner; // nodes[[var_owner[var_idx]]] is var[var_idx]->owner
00107	    int * var_has_owner; //  1 or 0
00108	
00109	    Py_ssize_t * node_n_inputs;
00110	    Py_ssize_t * node_n_outputs;
00111	    Py_ssize_t ** node_inputs;
00112	    Py_ssize_t ** node_outputs;
00113	    Py_ssize_t * node_inputs_outputs_base; // node_inputs and node_outputs point into this
00114	    Py_ssize_t * node_n_prereqs;
00115	    Py_ssize_t ** node_prereqs;
00116	
00117	    Py_ssize_t * update_storage; // input cells to update with the last outputs in output_vars
00118	    Py_ssize_t n_updates;
00119	
00120	    void ** thunk_cptr_fn;
00121	    void ** thunk_cptr_data;
00122	    PyObject * call_times;
00123	    PyObject * call_counts;
00124	    int do_timing;
00125	    int need_update_inputs;
00126	    int position_of_error; // -1 for no error, otw the index into `thunks` that failed.
00127	} CLazyLinker;
00128	
00129	
00130	static void
00131	CLazyLinker_dealloc(PyObject* _self)
00132	{
00133	  CLazyLinker* self = (CLazyLinker *) _self;
00134	  free(self->thunk_cptr_fn);
00135	  free(self->thunk_cptr_data);
00136	
00137	  free(self->is_lazy);
00138	
00139	  free(self->update_storage);
00140	
00141	  if (self->node_n_prereqs)
00142	    {
00143	      for (int i = 0; i < self->n_applies; ++i)
00144	        {
00145	          free(self->node_prereqs[i]);
00146	        }
00147	    }
00148	  free(self->node_n_prereqs);
00149	  free(self->node_prereqs);
00150	  free(self->node_inputs_outputs_base);
00151	  free(self->node_n_inputs);
00152	  free(self->node_n_outputs);
00153	  free(self->node_inputs);
00154	  free(self->node_outputs);
00155	
00156	  if (self->dependencies)
00157	    {
00158	      for (int i = 0; i < self->n_vars; ++i)
00159	        {
00160	          free(self->dependencies[i]);
00161	        }
00162	      free(self->dependencies);
00163	      free(self->n_dependencies);
00164	    }
00165	
00166	  free(self->var_owner);
00167	  free(self->var_has_owner);
00168	  free(self->var_computed);
00169	  if (self->var_computed_cells)
00170	    {
00171	      for (int i = 0; i < self->n_vars; ++i)
00172	        {
00173	          Py_DECREF(self->var_computed_cells[i]);
00174	          Py_DECREF(self->var_value_cells[i]);
00175	        }
00176	    }
00177	  free(self->var_computed_cells);
00178	  free(self->var_value_cells);
00179	  free(self->output_vars);
00180	
00181	  Py_XDECREF(self->nodes);
00182	  Py_XDECREF(self->thunks);
00183	  Py_XDECREF(self->call_times);
00184	  Py_XDECREF(self->call_counts);
00185	  Py_XDECREF(self->pre_call_clear);
00186	  Py_TYPE(self)->tp_free((PyObject*)self);
00187	}
00188	static PyObject *
00189	CLazyLinker_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
00190	{
00191	    CLazyLinker *self;
00192	
00193	    self = (CLazyLinker *)type->tp_alloc(type, 0);
00194	    if (self != NULL) {
00195	      self->nodes = NULL;
00196	      self->thunks = NULL;
00197	      self->pre_call_clear = NULL;
00198	
00199	      self->allow_gc = 1;
00200	      self->n_applies = 0;
00201	      self->n_vars = 0;
00202	      self->var_computed = NULL;
00203	      self->var_computed_cells = NULL;
00204	      self->var_value_cells = NULL;
00205	      self->dependencies = NULL;
00206	      self->n_dependencies = NULL;
00207	
00208	      self->n_output_vars = 0;
00209	      self->output_vars = NULL;
00210	
00211	      self->is_lazy = NULL;
00212	
00213	      self->var_owner = NULL;
00214	      self->var_has_owner = NULL;
00215	
00216	      self->node_n_inputs = NULL;
00217	      self->node_n_outputs = NULL;
00218	      self->node_inputs = NULL;
00219	      self->node_outputs = NULL;
00220	      self->node_inputs_outputs_base = NULL;
00221	      self->node_prereqs = NULL;
00222	      self->node_n_prereqs = NULL;
00223	
00224	      self->update_storage = NULL;
00225	      self->n_updates = 0;
00226	
00227	      self->thunk_cptr_data = NULL;
00228	      self->thunk_cptr_fn = NULL;
00229	      self->call_times = NULL;
00230	      self->call_counts = NULL;
00231	      self->do_timing = 0;
00232	
00233	      self->need_update_inputs = 0;
00234	      self->position_of_error = -1;
00235	    }
00236	    return (PyObject *)self;
00237	}
00238	
00239	static int
00240	CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
00241	{
00242	    static char *kwlist[] = {
00243	      (char*)"nodes",
00244	      (char*)"thunks",
00245	      (char*)"pre_call_clear",
00246	      (char*)"allow_gc",
00247	      (char*)"call_counts",
00248	      (char*)"call_times",
00249	      (char*)"compute_map_list",
00250	      (char*)"storage_map_list",
00251	      (char*)"base_input_output_list",
00252	      (char*)"node_n_inputs",
00253	      (char*)"node_n_outputs",
00254	      (char*)"node_input_offset",
00255	      (char*)"node_output_offset",
00256	      (char*)"var_owner",
00257	      (char*)"is_lazy_list",
00258	      (char*)"output_vars",
00259	      (char*)"node_prereqs",
00260	      (char*)"node_output_size",
00261	      (char*)"update_storage",
00262	      (char*)"dependencies",
00263	      NULL};
00264	
00265	    PyObject *compute_map_list=NULL,
00266	             *storage_map_list=NULL,
00267	             *base_input_output_list=NULL,
00268	             *node_n_inputs=NULL,
00269	             *node_n_outputs=NULL,
00270	             *node_input_offset=NULL,
00271	             *node_output_offset=NULL,
00272	             *var_owner=NULL,
00273	             *is_lazy=NULL,
00274	             *output_vars=NULL,
00275	             *node_prereqs=NULL,
00276	             *node_output_size=NULL,
00277	             *update_storage=NULL,
00278	             *dependencies=NULL;
00279	
00280	    assert(!self->nodes);
00281	    if (! PyArg_ParseTupleAndKeywords(args, kwds, "OOOiOOOOOOOOOOOOOOOO", kwlist,
00282	                                      &self->nodes,
00283	                                      &self->thunks,
00284	                                      &self->pre_call_clear,
00285	                                      &self->allow_gc,
00286	                                      &self->call_counts,
00287	                                      &self->call_times,
00288	                                      &compute_map_list,
00289	                                      &storage_map_list,
00290	                                      &base_input_output_list,
00291	                                      &node_n_inputs,
00292	                                      &node_n_outputs,
00293	                                      &node_input_offset,
00294	                                      &node_output_offset,
00295	                                      &var_owner,
00296	                                      &is_lazy,
00297	                                      &output_vars,
00298	                                      &node_prereqs,
00299	                                      &node_output_size,
00300	                                      &update_storage,
00301	                                      &dependencies
00302	                                      ))
00303	        return -1;
00304	    Py_INCREF(self->nodes);
00305	    Py_INCREF(self->thunks);
00306	    Py_INCREF(self->pre_call_clear);
00307	    Py_INCREF(self->call_counts);
00308	    Py_INCREF(self->call_times);
00309	
00310	    Py_ssize_t n_applies = PyList_Size(self->nodes);
00311	
00312	    self->n_applies = n_applies;
00313	    self->n_vars = PyList_Size(var_owner);
00314	
00315	    if (PyList_Size(self->thunks) != n_applies) return -1;
00316	    if (PyList_Size(self->call_counts) != n_applies) return -1;
00317	    if (PyList_Size(self->call_times) != n_applies) return -1;
00318	
00319	    // allocated and initialize thunk_cptr_data and thunk_cptr_fn
00320	    if (n_applies)
00321	      {
00322	        self->thunk_cptr_data = (void**)calloc(n_applies, sizeof(void*));
00323	        self->thunk_cptr_fn = (void**)calloc(n_applies, sizeof(void*));
00324	        self->is_lazy = (int*)calloc(n_applies, sizeof(int));
00325	        self->node_prereqs = (Py_ssize_t**)calloc(n_applies, sizeof(Py_ssize_t*));
00326	        self->node_n_prereqs = (Py_ssize_t*)calloc(n_applies, sizeof(Py_ssize_t));
00327	        assert(self->node_prereqs);
00328	        assert(self->node_n_prereqs);
00329	        assert(self->is_lazy);
00330	        assert(self->thunk_cptr_fn);
00331	        assert(self->thunk_cptr_data);
00332	
00333	        for (int i = 0; i < n_applies; ++i)
00334	          {
00335	            PyObject * thunk = PyList_GetItem(self->thunks, i);
00336	            //thunk is borrowed
00337	            if (PyObject_HasAttrString(thunk, "cthunk"))
00338	              {
00339	                PyObject * cthunk = PyObject_GetAttrString(thunk, "cthunk");
00340	                //new reference
00341	                assert (cthunk && PyCObject_Check(cthunk));
00342	                self->thunk_cptr_fn[i] = PyCObject_AsVoidPtr(cthunk);
00343	                self->thunk_cptr_data[i] = PyCObject_GetDesc(cthunk);
00344	                Py_DECREF(cthunk);
00345	                // cthunk is kept alive by membership in self->thunks
00346	              }
00347	
00348	            PyObject * el_i = PyList_GetItem(is_lazy, i);
00349	            self->is_lazy[i] = PyNumber_AsSsize_t(el_i, NULL);
00350	
00351	            /* now get the prereqs */
00352	            el_i = PyList_GetItem(node_prereqs, i);
00353	            assert (PyList_Check(el_i));
00354	            self->node_n_prereqs[i] = PyList_Size(el_i);
00355	            if (self->node_n_prereqs[i])
00356	              {
00357	                self->node_prereqs[i] = (Py_ssize_t*)malloc(
00358	                              PyList_Size(el_i)*sizeof(Py_ssize_t));
00359	                for (int j = 0; j < PyList_Size(el_i); ++j)
00360	                  {
00361	                    PyObject * el_ij = PyList_GetItem(el_i, j);
00362	                    Py_ssize_t N = PyNumber_AsSsize_t(el_ij, PyExc_IndexError);
00363	                    if (PyErr_Occurred())
00364	                      return -1;
00365	                    // N < n. variables
00366	                    assert(N < PyList_Size(var_owner));
00367	                    self->node_prereqs[i][j] = N;
00368	                  }
00369	              }
00370	          }
00371	      }
00372	    if (PyList_Check(base_input_output_list))
00373	      {
00374	        Py_ssize_t n_inputs_outputs_base = PyList_Size(base_input_output_list);
00375	        self->node_inputs_outputs_base = (Py_ssize_t*)calloc(n_inputs_outputs_base,sizeof(Py_ssize_t));
00376	        assert(self->node_inputs_outputs_base);
00377	        for (int i = 0; i < n_inputs_outputs_base; ++i)
00378	          {
00379	            PyObject *el_i = PyList_GetItem(base_input_output_list, i);
00380	            Py_ssize_t idx = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
00381	            if (PyErr_Occurred()) return -1;
00382	            self->node_inputs_outputs_base[i] = idx;
00383	          }
00384	        self->node_n_inputs = (Py_ssize_t*)calloc(n_applies,sizeof(Py_ssize_t));
00385	        assert(self->node_n_inputs);
00386	        self->node_n_outputs = (Py_ssize_t*)calloc(n_applies,sizeof(Py_ssize_t));
00387	        assert(self->node_n_outputs);
00388	        self->node_inputs = (Py_ssize_t**)calloc(n_applies,sizeof(Py_ssize_t*));
00389	        assert(self->node_inputs);
00390	        self->node_outputs = (Py_ssize_t**)calloc(n_applies,sizeof(Py_ssize_t*));
00391	        assert(self->node_outputs);
00392	        for (int i = 0; i < n_applies; ++i)
00393	          {
00394	            Py_ssize_t N;
00395	            N = PyNumber_AsSsize_t(PyList_GetItem(node_n_inputs, i),PyExc_IndexError);
00396	            if (PyErr_Occurred()) return -1;
00397	            assert (N <= n_inputs_outputs_base);
00398	            self->node_n_inputs[i] = N;
00399	            N = PyNumber_AsSsize_t(PyList_GetItem(node_n_outputs, i),PyExc_IndexError);
00400	            if (PyErr_Occurred()) return -1;
00401	            assert (N <= n_inputs_outputs_base);
00402	            self->node_n_outputs[i] = N;
00403	            N = PyNumber_AsSsize_t(PyList_GetItem(node_input_offset, i),PyExc_IndexError);
00404	            if (PyErr_Occurred()) return -1;
00405	            assert (N <= n_inputs_outputs_base);
00406	            self->node_inputs[i] = &self->node_inputs_outputs_base[N];
00407	            N = PyNumber_AsSsize_t(PyList_GetItem(node_output_offset, i),PyExc_IndexError);
00408	            if (PyErr_Occurred()) return -1;
00409	            assert (N <= n_inputs_outputs_base);
00410	            self->node_outputs[i] = &self->node_inputs_outputs_base[N];
00411	          }
00412	      }
00413	    else
00414	      {
00415	        PyErr_SetString(PyExc_TypeError, "base_input_output_list must be list");
00416	        return -1;
00417	      }
00418	
00419	    // allocation for var_owner
00420	    if (PyList_Check(var_owner))
00421	      {
00422	        self->var_owner = (Py_ssize_t*)calloc(self->n_vars,sizeof(Py_ssize_t));
00423	        self->var_has_owner = (int*)calloc(self->n_vars,sizeof(int));
00424	        self->var_computed = (int*)calloc(self->n_vars,sizeof(int));
00425	        self->var_computed_cells = (PyObject**)calloc(self->n_vars,sizeof(PyObject*));
00426	        self->var_value_cells = (PyObject**)calloc(self->n_vars,sizeof(PyObject*));
00427	        for (int i = 0; i < self->n_vars; ++i)
00428	          {
00429	            PyObject * el_i = PyList_GetItem(var_owner, i);
00430	            if (el_i == Py_None)
00431	              {
00432	                self->var_has_owner[i] = 0;
00433	              }
00434	            else
00435	              {
00436	                Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
00437	                if (PyErr_Occurred()) return -1;
00438	                assert (N <= n_applies);
00439	                self->var_owner[i] = N;
00440	                self->var_has_owner[i] = 1;
00441	              }
00442	            self->var_computed_cells[i] = PyList_GetItem(compute_map_list, i);
00443	            Py_INCREF(self->var_computed_cells[i]);
00444	            self->var_value_cells[i] = PyList_GetItem(storage_map_list, i);
00445	            Py_INCREF(self->var_value_cells[i]);
00446	          }
00447	      }
00448	    else
00449	      {
00450	        PyErr_SetString(PyExc_TypeError, "var_owner must be list");
00451	        return -1;
00452	      }
00453	
00454	    if (dependencies != Py_None)
00455	      {
00456	        self->dependencies = (Py_ssize_t**)calloc(self->n_vars, sizeof(Py_ssize_t *));
00457	        self->n_dependencies = (Py_ssize_t*)calloc(self->n_vars, sizeof(Py_ssize_t));
00458	        assert(self->dependencies);
00459	        assert(self->n_dependencies);
00460	
00461	        for (int i = 0; i < self->n_vars; ++i)
00462	          {
00463	            PyObject *tmp = PyList_GetItem(dependencies, i);
00464	            // refcounting - tmp is borrowed
00465	            if (unpack_list_of_ssize_t(tmp, &self->dependencies[i], &self->n_dependencies[i],
00466	                                       "dependencies"))
00467	              return -1;
00468	          }
00469	      }
00470	
00471	    if (unpack_list_of_ssize_t(output_vars, &self->output_vars, &self->n_output_vars,
00472	                               "output_vars"))
00473	      return -1;
00474	    for (int i = 0; i < self->n_output_vars; ++i)
00475	      {
00476	        assert(self->output_vars[i] < self->n_vars);
00477	      }
00478	    if (unpack_list_of_ssize_t(update_storage, &self->update_storage, &self->n_updates,
00479	                               "updates_storage"))
00480	      return -1;
00481	    return 0;
00482	}
00483	static void set_position_of_error(CLazyLinker * self, int owner_idx)
00484	{
00485	  if (self->position_of_error == -1)
00486	    {
00487	      self->position_of_error = owner_idx;
00488	    }
00489	}
00490	static PyObject * pycall(CLazyLinker * self, Py_ssize_t node_idx, int verbose)
00491	{
00492	  // call thunk to see which inputs it wants
00493	  PyObject * thunk = PyList_GetItem(self->thunks, node_idx);
00494	  // refcounting - thunk is borrowed
00495	  PyObject * rval = NULL;
00496	  if (self->do_timing)
00497	    {
00498	      double t0 = pytime(NULL);
00499	      if (verbose) fprintf(stderr, "calling via Python (node %i)\n", (int)node_idx);
00500	      rval = PyObject_CallObject(thunk, NULL);
00501	      if (rval)
00502	        {
00503	          double t1 = pytime(NULL);
00504	          double ti = PyFloat_AsDouble(
00505	                         PyList_GetItem(self->call_times, node_idx));
00506	          PyList_SetItem(self->call_times, node_idx,
00507	                         PyFloat_FromDouble(t1 - t0 + ti));
00508	          PyObject * count = PyList_GetItem(self->call_counts, node_idx);
00509	          long icount = PyInt_AsLong(count);
00510	          PyList_SetItem(self->call_counts, node_idx,
00511	                         PyInt_FromLong(icount + 1));
00512	      }
00513	    }
00514	  else
00515	    {
00516	      if (verbose)
00517	        {
00518	          fprintf(stderr, "calling via Python (node %i)\n", (int)node_idx);
00519	        }
00520	      rval = PyObject_CallObject(thunk, NULL);
00521	    }
00522	  return rval;
00523	}
00524	static int c_call(CLazyLinker * self, Py_ssize_t node_idx, int verbose)
00525	{
00526	  void * ptr_addr = self->thunk_cptr_fn[node_idx];
00527	  int (*fn)(void*) = (int (*)(void*))(ptr_addr);
00528	  if (verbose) fprintf(stderr, "calling non-lazy shortcut (node %i)\n", (int)node_idx);
00529	  int err = 0;
00530	  if (self->do_timing)
00531	    {
00532	      double t0 = pytime(NULL);
00533	      err = fn(self->thunk_cptr_data[node_idx]);
00534	      double t1 = pytime(NULL);
00535	      double ti = PyFloat_AsDouble(PyList_GetItem(self->call_times, node_idx));
00536	      PyList_SetItem(self->call_times, node_idx, PyFloat_FromDouble(t1 - t0 + ti));
00537	      PyObject * count = PyList_GetItem(self->call_counts, node_idx);
00538	      long icount = PyInt_AsLong(count);
00539	      PyList_SetItem(self->call_counts, node_idx, PyInt_FromLong(icount+1));
00540	    }
00541	  else
00542	    {
00543	      err = fn(self->thunk_cptr_data[node_idx]);
00544	    }
00545	
00546	  if (err)
00547	    {
00548	      // cast the argument to a PyList (as described near line 226 of cc.py)
00549	      PyObject * __ERROR = ((PyObject**)self->thunk_cptr_data[node_idx])[0];
00550	      assert (PyList_Check(__ERROR));
00551	      assert (PyList_Size(__ERROR) == 3);
00552	      PyObject * err_type = PyList_GetItem(__ERROR, 0); //stolen ref
00553	      PyObject * err_msg = PyList_GetItem(__ERROR, 1); //stolen ref
00554	      PyObject * err_trace = PyList_GetItem(__ERROR, 2); //stolen ref
00555	      PyList_SET_ITEM(__ERROR, 0, Py_None); Py_INCREF(Py_None); //clobbers old ref
00556	      PyList_SET_ITEM(__ERROR, 1, Py_None); Py_INCREF(Py_None); //clobbers old ref
00557	      PyList_SET_ITEM(__ERROR, 2, Py_None); Py_INCREF(Py_None); //clobbers old ref
00558	
00559	      assert(!PyErr_Occurred()); // because CLinker hid the exception in __ERROR aka data
00560	      PyErr_Restore(err_type, err_msg, err_trace); //steals refs to args
00561	    }
00562	  if (err) set_position_of_error(self, node_idx);
00563	  return err;
00564	}
00565	static
00566	int lazy_rec_eval(CLazyLinker * self, Py_ssize_t var_idx, PyObject*one, PyObject*zero)
00567	{
00568	  PyObject *rval = NULL;
00569	  int verbose = 0;
00570	  int err = 0;
00571	
00572	  if (verbose) fprintf(stderr, "lazy_rec computing %i\n", (int)var_idx);
00573	
00574	  if (self->var_computed[var_idx] || !self->var_has_owner[var_idx])
00575	    return 0;
00576	
00577	  Py_ssize_t owner_idx = self->var_owner[var_idx];
00578	
00579	  // STEP 1: compute the pre-requirements of the node
00580	  // Includes input nodes for non-lazy ops.
00581	  for (int i = 0; i < self->node_n_prereqs[owner_idx]; ++i)
00582	    {
00583	      Py_ssize_t prereq_idx = self->node_prereqs[owner_idx][i];
00584	      if (!self->var_computed[prereq_idx])
00585	        {
00586	          err = lazy_rec_eval(self, prereq_idx, one, zero);
00587	          if (err) return err;
00588	        }
00589	      assert (self->var_computed[prereq_idx]);
00590	    }
00591	
00592	  // STEP 2: compute the node itself
00593	  if (self->is_lazy[owner_idx])
00594	    {
00595	      // update the compute_map cells corresponding to the inputs of this thunk
00596	      for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
00597	        {
00598	          int in_idx = self->node_inputs[owner_idx][i];
00599	          if (self->var_computed[in_idx])
00600	            {
00601	              Py_INCREF(one);
00602	              err = PyList_SetItem(self->var_computed_cells[in_idx], 0, one);
00603	            }
00604	          else
00605	            {
00606	              Py_INCREF(zero);
00607	              err = PyList_SetItem(self->var_computed_cells[in_idx], 0, zero);
00608	            }
00609	          if (err) goto fail;
00610	        }
00611	
00612	      rval = pycall(self, owner_idx, verbose);
00613	      // refcounting - rval is new ref
00614	      //TODO: to prevent infinite loops
00615	      // - consider check that a thunk does not ask for an input that is already computed
00616	      if (rval == NULL)
00617	        {
00618	          assert (PyErr_Occurred());
00619	          err = 1;
00620	          goto fail;
00621	        }
00622	
00623	      //update the computed-ness of any output cells
00624	      for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
00625	        {
00626	          int out_idx = self->node_outputs[owner_idx][i];
00627	          PyObject * el_i = PyList_GetItem(self->var_computed_cells[out_idx], 0);
00628	          Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
00629	          if (PyErr_Occurred())
00630	            {
00631	              err = -1;
00632	              goto pyfail;
00633	            }
00634	          assert (N==0 || N==1);
00635	          self->var_computed[out_idx] = N;
00636	        }
00637	      if (!self->var_computed[var_idx])
00638	        {
00639	          /*
00640	           * If self is not computed after the call, this means that some
00641	           * inputs are needed.  Compute the ones on the returned list
00642	           * and try to compute the current node again (with recursive call).
00643	           * This allows a node to request more nodes more than once before
00644	           * finally yielding a result.
00645	           */
00646	          if (!PyList_Check(rval))
00647	            {
00648	              //TODO: More helpful error to help find *which node* made this
00649	              // bad thunk
00650	              PyErr_SetString(PyExc_TypeError,
00651	                              "lazy thunk should return a list");
00652	              err = 1;
00653	              goto pyfail;
00654	            }
00655	
00656	          if (!PyList_Size(rval))
00657	            {
00658	              PyErr_SetString(PyExc_ValueError,
00659	                              "lazy thunk returned empty list without computing output");
00660	              err = 1;
00661	              goto pyfail;
00662	            }
00663	
00664	          for (int i = 0; i < PyList_Size(rval); ++i)
00665	            {
00666	              PyObject * el_i = PyList_GetItem(rval, i);
00667	              Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
00668	              if (PyErr_Occurred())
00669	                {
00670	                  err = 1;
00671	                  goto pyfail;
00672	                }
00673	              assert (N <= self->node_n_inputs[owner_idx]);
00674	              Py_ssize_t input_idx = self->node_inputs[owner_idx][N];
00675	              err = lazy_rec_eval(self, input_idx, one, zero);
00676	              if (err) goto pyfail;
00677	            }
00678	
00679	          Py_DECREF(rval);
00680	          /*
00681	           * We intentionally skip all the end-of-function processing
00682	           * (mark outputs, GC) as it will be performed by the call
00683	           * that actually manages to compute the result.
00684	           */
00685	          return lazy_rec_eval(self, var_idx, one, zero);
00686	        }
00687	
00688	      Py_DECREF(rval);
00689	    }
00690	  else //owner is not a lazy op. Ensure all intputs are evaluated.
00691	    {
00692	      // loop over inputs to owner
00693	      // call lazy_rec_eval on each one that is not computed.
00694	      // if there's an error, pass it up the stack
00695	      for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
00696	        {
00697	          Py_ssize_t input_idx = self->node_inputs[owner_idx][i];
00698	          if (!self->var_computed[input_idx])
00699	            {
00700	              err = lazy_rec_eval(self, input_idx, one, zero);
00701	              if (err) return err;
00702	            }
00703	          assert (self->var_computed[input_idx]);
00704	        }
00705	
00706	      // call the thunk for this owner.
00707	      if (self->thunk_cptr_fn[owner_idx])
00708	        {
00709	          err = c_call(self, owner_idx, verbose);
00710	          if (err) goto fail;
00711	        }
00712	      else
00713	        {
00714	          rval = pycall(self, owner_idx, verbose);
00715	          //rval is new ref
00716	          if (rval) //pycall returned normally (no exception)
00717	            {
00718	              if (rval == Py_None)
00719	                {
00720	                  Py_DECREF(rval); //ignore a return of None
00721	                }
00722	              else if (PyList_Check(rval))
00723	                {
00724	                  PyErr_SetString(PyExc_TypeError,
00725	                                  "non-lazy thunk should return None, not list");
00726	                  err = 1;
00727	                  goto pyfail;
00728	                }
00729	              else // don't know what it returned, but it wasn't right.
00730	                {
00731	                  PyErr_SetObject(PyExc_TypeError, rval);
00732	                  err = 1;
00733	                  // We don't release rval since we put it in the error above
00734	                  goto fail;
00735	                }
00736	            }
00737	          else // pycall returned NULL (internal error)
00738	            {
00739	              err = 1;
00740	              goto fail;
00741	            }
00742	        }
00743	    }
00744	
00745	  // loop over all outputs and mark them as computed
00746	  for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
00747	    {
00748	      self->var_computed[self->node_outputs[owner_idx][i]] = 1;
00749	    }
00750	
00751	  // Free vars that are not needed anymore
00752	  if (self->allow_gc)
00753	    {
00754	      for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
00755	        {
00756	          int cleanup = 1;
00757	          Py_ssize_t i_idx = self->node_inputs[owner_idx][i];
00758	          if (!self->var_has_owner[i_idx])
00759	            continue;
00760	
00761	          for (int j = 0; j < self->n_output_vars; ++j)
00762	            {
00763	              if (i_idx == self->output_vars[j])
00764	                {
00765	                  cleanup = 0;
00766	                  break;
00767	                }
00768	            }
00769	          if (!cleanup) continue;
00770	
00771	          for (int j = 0; j < self->n_dependencies[i_idx]; ++j)
00772	            {
00773	              if (!self->var_computed[self->dependencies[i_idx][j]])
00774	                {
00775	                  cleanup = 0;
00776	                  break;
00777	                }
00778	            }
00779	          if (!cleanup) continue;
00780	
00781	          Py_INCREF(Py_None);
00782	          err = PyList_SetItem(self->var_value_cells[i_idx], 0, Py_None);
00783	//See the Stack gc implementation for why we change it to 2 and not 0.
00784	          self->var_computed[i_idx] = 2;
00785	          if (err) goto fail;
00786	        }
00787	    }
00788	
00789	  return 0;
00790	 pyfail:
00791	  Py_DECREF(rval);
00792	 fail:
00793	  set_position_of_error(self, owner_idx);
00794	  return err;
00795	}
00796	
00797	static PyObject *
00798	CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
00799	{
00800	  CLazyLinker * self = (CLazyLinker*)_self;
00801	  static char *kwlist[] = {
00802	    (char*)"time_thunks",
00803	    (char *)"n_calls",
00804	    NULL};
00805	  int n_calls=1;
00806	  if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ii", kwlist,
00807	                                    &self->do_timing,
00808	                                    &n_calls))
00809	    return NULL;
00810	  int err = 0;
00811	  self->position_of_error = -1;
00812	  // create constants used to fill the var_compute_cells
00813	  PyObject * one = PyInt_FromLong(1);
00814	  PyObject * zero = PyInt_FromLong(0);
00815	
00816	  // pre-allocate our return value
00817	  Py_INCREF(Py_None);
00818	  PyObject * rval = Py_None;
00819	  //clear storage of pre_call_clear elements
00820	  for (int call_i = 0; call_i < n_calls && (!err); ++call_i)
00821	    {
00822	      Py_ssize_t n_pre_call_clear = PyList_Size(self->pre_call_clear);
00823	      assert(PyList_Check(self->pre_call_clear));
00824	      for (int i = 0; i < n_pre_call_clear; ++i)
00825	        {
00826	          PyObject * el_i = PyList_GetItem(self->pre_call_clear, i);
00827	          Py_INCREF(Py_None);
00828	          PyList_SetItem(el_i, 0, Py_None);
00829	        }
00830	      //clear the computed flag out of all non-input vars
00831	      for (int i = 0; i < self->n_vars; ++i)
00832	        {
00833	          self->var_computed[i] = !self->var_has_owner[i];
00834	          if (self->var_computed[i])
00835	            {
00836	              Py_INCREF(one);
00837	              PyList_SetItem(self->var_computed_cells[i], 0, one);
00838	            }
00839	          else
00840	            {
00841	              Py_INCREF(zero);
00842	              PyList_SetItem(self->var_computed_cells[i], 0, zero);
00843	            }
00844	        }
00845	
00846	      for (int i = 0; i < self->n_output_vars && (!err); ++i)
00847	        {
00848	          err = lazy_rec_eval(self, self->output_vars[i], one, zero);
00849	        }
00850	
00851	      if (!err)
00852	        {
00853	          // save references to outputs prior to updating storage containers
00854	          assert (self->n_output_vars >= self->n_updates);
00855	          Py_DECREF(rval);
00856	          rval = PyList_New(self->n_output_vars);
00857	          for (int i = 0; i < (self->n_output_vars); ++i)
00858	            {
00859	              Py_ssize_t src = self->output_vars[i];
00860	              PyObject * item = PyList_GetItem(self->var_value_cells[src], 0);
00861	              if (self->var_computed[src] != 1)
00862	                {
00863	                  err = 1;
00864	                  PyErr_Format(PyExc_AssertionError,
00865	                               "The compute map of output %d should contain "
00866	                               "1 at the end of execution, not %d.",
00867	                               i, self->var_computed[src]);
00868	                  break;
00869	                }
00870	              Py_INCREF(item);
00871	              PyList_SetItem(rval, i, item);
00872	            }
00873	        }
00874	
00875	      if (!err)
00876	        {
00877	          // Update the inputs that have an update rule
00878	          for (int i = 0; i < self->n_updates; ++i)
00879	            {
00880	              PyObject* tmp = PyList_GetItem(rval, self->n_output_vars - self->n_updates + i);
00881	              Py_INCREF(tmp);
00882	              Py_ssize_t dst = self->update_storage[i];
00883	              PyList_SetItem(self->var_value_cells[dst], 0, tmp);
00884	            }
00885	        }
00886	    }
00887	
00888	  /*
00889	    Clear everything that is left and not an output.  This is needed
00890	    for lazy evaluation since the current GC algo is too conservative
00891	    with lazy graphs.
00892	  */
00893	  if (self->allow_gc && !err)
00894	    {
00895	      for (Py_ssize_t i = 0; i < self->n_vars; ++i)
00896	        {
00897	          int do_cleanup = 1;
00898	          if (!self->var_has_owner[i] || !self->var_computed[i])
00899	            continue;
00900	          for (int j = 0; j < self->n_output_vars; ++j)
00901	            {
00902	              if (i == self->output_vars[j])
00903	                {
00904	                  do_cleanup = 0;
00905	                  break;
00906	                }
00907	            }
00908	          if (!do_cleanup)
00909	            continue;
00910	          Py_INCREF(Py_None);
00911	          PyList_SetItem(self->var_value_cells[i], 0, Py_None);
00912	        }
00913	    }
00914	  Py_DECREF(one);
00915	  Py_DECREF(zero);
00916	  if (err)
00917	    {
00918	      Py_DECREF(rval);
00919	      return NULL;
00920	    }
00921	  return rval;
00922	}
00923	
00924	#if 0
00925	static PyMethodDef CLazyLinker_methods[] = {
00926	    {
00927	      //"name", (PyCFunction)CLazyLinker_accept, METH_VARARGS, "Return the name, combining the first and last name"
00928	    },
00929	    {NULL}  /* Sentinel */
00930	};
00931	#endif
00932	
00933	
00934	static PyObject *
00935	CLazyLinker_get_allow_gc(CLazyLinker *self, void *closure)
00936	{
00937	    return PyBool_FromLong(self->allow_gc);
00938	}
00939	
00940	static int
00941	CLazyLinker_set_allow_gc(CLazyLinker *self, PyObject *value, void *closure)
00942	{
00943	  if(!PyBool_Check(value))
00944	    return -1;
00945	
00946	  if (value == Py_True)
00947	    self->allow_gc = true;
00948	  else
00949	    self->allow_gc = false;
00950	  return 0;
00951	}
00952	
00953	static PyGetSetDef CLazyLinker_getset[] = {
00954	  {(char*)"allow_gc",
00955	   (getter)CLazyLinker_get_allow_gc,
00956	   (setter)CLazyLinker_set_allow_gc,
00957	   (char*)"do this function support allow_gc",
00958	   NULL},
00959	  {NULL, NULL, NULL, NULL}  /* Sentinel */
00960	};
00961	static PyMemberDef CLazyLinker_members[] = {
00962	    {(char*)"nodes", T_OBJECT_EX, offsetof(CLazyLinker, nodes), 0,
00963	     (char*)"list of nodes"},
00964	    {(char*)"thunks", T_OBJECT_EX, offsetof(CLazyLinker, thunks), 0,
00965	     (char*)"list of thunks in program"},
00966	    {(char*)"call_counts", T_OBJECT_EX, offsetof(CLazyLinker, call_counts), 0,
00967	     (char*)"number of calls of each thunk"},
00968	    {(char*)"call_times", T_OBJECT_EX, offsetof(CLazyLinker, call_times), 0,
00969	     (char*)"total runtime in each thunk"},
00970	    {(char*)"position_of_error", T_INT, offsetof(CLazyLinker, position_of_error), 0,
00971	     (char*)"position of failed thunk"},
00972	    {(char*)"time_thunks", T_INT, offsetof(CLazyLinker, do_timing), 0,
00973	     (char*)"bool: nonzero means call will time thunks"},
00974	    {(char*)"need_update_inputs", T_INT, offsetof(CLazyLinker, need_update_inputs), 0,
00975	     (char*)"bool: nonzero means Function.__call__ must implement update mechanism"},
00976	    {NULL}  /* Sentinel */
00977	};
00978	
00979	static PyTypeObject lazylinker_ext_CLazyLinkerType = {
00980	#if defined(NPY_PY3K)
00981	    PyVarObject_HEAD_INIT(NULL, 0)
00982	#else
00983	    PyObject_HEAD_INIT(NULL)
00984	    0,                         /*ob_size*/
00985	#endif
00986	    "lazylinker_ext.CLazyLinker",             /*tp_name*/
00987	    sizeof(CLazyLinker), /*tp_basicsize*/
00988	    0,                         /*tp_itemsize*/
00989	    CLazyLinker_dealloc,       /*tp_dealloc*/
00990	    0,                         /*tp_print*/
00991	    0,                         /*tp_getattr*/
00992	    0,                         /*tp_setattr*/
00993	    0,                         /*tp_compare*/
00994	    0,                         /*tp_repr*/
00995	    0,                         /*tp_as_number*/
00996	    0,                         /*tp_as_sequence*/
00997	    0,                         /*tp_as_mapping*/
00998	    0,                         /*tp_hash */
00999	    CLazyLinker_call,          /*tp_call*/
01000	    0,                         /*tp_str*/
01001	    0,                         /*tp_getattro*/
01002	    0,                         /*tp_setattro*/
01003	    0,                         /*tp_as_buffer*/
01004	    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,        /*tp_flags*/
01005	    "CLazyLinker object",      /* tp_doc */
01006	    0,                         /* tp_traverse */
01007	    0,                         /* tp_clear */
01008	    0,                         /* tp_richcompare */
01009	    0,                         /* tp_weaklistoffset */
01010	    0,                         /* tp_iter */
01011	    0,                         /* tp_iternext */
01012	    0,//CLazyLinker_methods,       /* tp_methods */
01013	    CLazyLinker_members,       /* tp_members */
01014	    CLazyLinker_getset,        /* tp_getset */
01015	    0,                         /* tp_base */
01016	    0,                         /* tp_dict */
01017	    0,                         /* tp_descr_get */
01018	    0,                         /* tp_descr_set */
01019	    0,                         /* tp_dictoffset */
01020	    (initproc)CLazyLinker_init,/* tp_init */
01021	    0,                         /* tp_alloc */
01022	    CLazyLinker_new,           /* tp_new */
01023	};
01024	
01025	static PyObject * get_version(PyObject *dummy, PyObject *args)
01026	{
01027	  PyObject *result = PyFloat_FromDouble(0.21);
01028	  return result;
01029	}
01030	
01031	static PyMethodDef lazylinker_ext_methods[] = {
01032	  {"get_version",  get_version, METH_VARARGS, "Get extension version."},
01033	  {NULL, NULL, 0, NULL}        /* Sentinel */
01034	};
01035	
01036	#ifndef PyMODINIT_FUNC  /* declarations for DLL import/export */
01037	#define PyMODINIT_FUNC void
01038	#endif
01039	
01040	#if defined(NPY_PY3K)
01041	static struct PyModuleDef moduledef = {
01042	        PyModuleDef_HEAD_INIT,
01043	        "lazylinker_ext",
01044	        NULL,
01045	        -1,
01046	        lazylinker_ext_methods,
01047	        NULL,
01048	        NULL,
01049	        NULL,
01050	        NULL
01051	};
01052	#endif
01053	#if defined(NPY_PY3K)
01054	#define RETVAL m
01055	PyMODINIT_FUNC
01056	PyInit_lazylinker_ext(void) {
01057	#else
01058	#define RETVAL
01059	PyMODINIT_FUNC
01060	initlazylinker_ext(void) 
01061	{
01062	#endif
01063	    PyObject* m;
01064	
01065	    lazylinker_ext_CLazyLinkerType.tp_new = PyType_GenericNew;
01066	    if (PyType_Ready(&lazylinker_ext_CLazyLinkerType) < 0)
01067	        return RETVAL;
01068	#if defined(NPY_PY3K)
01069	    m = PyModule_Create(&moduledef);
01070	#else
01071	    m = Py_InitModule3("lazylinker_ext", lazylinker_ext_methods,
01072	                       "Example module that creates an extension type.");
01073	#endif
01074	    Py_INCREF(&lazylinker_ext_CLazyLinkerType);
01075	    PyModule_AddObject(m, "CLazyLinker", (PyObject *)&lazylinker_ext_CLazyLinkerType);
01076	
01077	    return RETVAL;
01078	}
01079	
01080	
Problem occurred during compilation with the command line below:
g++ -shared -g -D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION -m32 -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -o C:\Users\trtrmitya\AppData\Local\Theano\compiledir_Windows-8-6.2.9200-Intel64_Family_6_Model_69_Stepping_1_GenuineIntel-2.7.9-32\lazylinker_ext\lazylinker_ext.pyd C:\Users\trtrmitya\AppData\Local\Theano\compiledir_Windows-8-6.2.9200-Intel64_Family_6_Model_69_Stepping_1_GenuineIntel-2.7.9-32\lazylinker_ext\mod.cpp -LC:\Anaconda\libs -LC:\Anaconda -lpython27
---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-1-cfd2f29f29cb> in <module>()
      9 
     10 # Take a moment to install Theano.  We will use it for building neural networks.
---> 11 import theano
     12 from theano import tensor as T
     13 from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

C:\Anaconda\lib\site-packages\theano\__init__.pyc in <module>()
     53     object2, utils
     54 
---> 55 from theano.compile import \
     56     SymbolicInput, In, \
     57     SymbolicOutput, Out, \

C:\Anaconda\lib\site-packages\theano\compile\__init__.py in <module>()
      7         SpecifyShape, specify_shape, register_specify_shape_c_code)
      8 
----> 9 from theano.compile.function_module import *
     10 
     11 from theano.compile.mode import *

C:\Anaconda\lib\site-packages\theano\compile\function_module.py in <module>()
     16 from theano import gof
     17 from theano.compat.python2x import partial
---> 18 import theano.compile.mode
     19 from theano.compile.io import (
     20     In, SymbolicInput, SymbolicInputKit, SymbolicOutput)

C:\Anaconda\lib\site-packages\theano\compile\mode.py in <module>()
      9 import  theano
     10 from theano import gof
---> 11 import theano.gof.vm
     12 from theano.configparser import config, AddConfigVar, StrParam
     13 from theano.compile.ops import register_view_op_c_code, _output_guard

C:\Anaconda\lib\site-packages\theano\gof\vm.py in <module>()
    566 
    567 try:
--> 568     import lazylinker_c
    569 
    570     class CVM(lazylinker_c.CLazyLinker, VM):

C:\Anaconda\lib\site-packages\theano\gof\lazylinker_c.py in <module>()
    114             args = cmodule.GCC_compiler.compile_args()
    115             cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
--> 116                                              preargs=args)
    117             # Save version into the __init__.py file.
    118             init_py = os.path.join(loc, '__init__.py')

C:\Anaconda\lib\site-packages\theano\gof\cmodule.pyc in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module)
   2008             # difficult to read.
   2009             raise Exception('Compilation failed (return status=%s): %s' %
-> 2010                             (status, compile_stderr.replace('\n', '. ')))
   2011         elif config.cmodule.compilation_warning and compile_stderr:
   2012             # Print errors just below the command line.

Exception: Compilation failed (return status=1): 'g++' is not recognized as an internal or external command,
. operable program or batch file.
. 

In [3]:
# Repeating steps from Project 1 to prepare mnist dataset. 
mnist = fetch_mldata('MNIST original', data_home='~/datasets/mnist')
X, Y = mnist.data, mnist.target
X = X / 255.0
shuffle = np.random.permutation(np.arange(X.shape[0]))
X, Y = X[shuffle], Y[shuffle]
numExamples = 2000
test_data, test_labels = X[70000-numExamples:], Y[70000-numExamples:]
train_data, train_labels = X[:numExamples], Y[:numExamples]
numFeatures = train_data[1].size
numTrainExamples = train_data.shape[0]
numTestExamples = test_data.shape[0]
print 'Features = %d' %(numFeatures)
print 'Train set = %d' %(numTrainExamples)
print 'Test set = %d' %(numTestExamples)


Features = 784
Train set = 2000
Test set = 2000

In [4]:
# Convert labels into a set of binary variables, one for each class (sometimes called a 1-of-n encoding).  
# This makes working with NNs easier: there will be one output node for each class.
def binarizeY(data):
    binarized_data = np.zeros((data.size,10))
    for j in range(0,data.size):
        feature = data[j:j+1]
        i = feature.astype(np.int64) 
        binarized_data[j,i]=1
    return binarized_data
train_labels_b = binarizeY(train_labels)
test_labels_b = binarizeY(test_labels)
numClasses = train_labels_b[1].size
print 'Classes = %d' %(numClasses)


Classes = 10

In [5]:
# Lets start with a simple KNN model to establish a baseline accuracy.
# Question: You've seen a number of different machine learning algos.  What's your intuition about KNN scaling and 
# accuracy characteristics vs. other algos? 
neighbors = 1
 # we'll be waiting quite a while if we use 60K
knn = KNeighborsClassifier(neighbors)
mini_train_data, mini_train_labels = X[:numExamples], Y[:numExamples] 
start_time = time.time()
knn.fit(mini_train_data, mini_train_labels)
print 'Train time = %.2f' %(time.time() - start_time)
start_time = time.time()
accuracy = knn.score(test_data, test_labels)
print 'Accuracy = %.4f' %(accuracy)
print 'Prediction time = %.2f' %(time.time() - start_time)


Train time = 0.09
Accuracy = 0.9070
Prediction time = 6.84

In [6]:
# We'll start in Theano with implententing logistic regression.  
# Recall the four key components: (1) parms, (2) model, (3) cost, and (4) objective. 

## (1) Parms 
# Init weights to small, but non-zero, values.
w = theano.shared(np.asarray((np.random.randn(*(numFeatures, numClasses))*.01)))

## (2) Model
# Theano objects accessed with standard Python variables
X = T.matrix()
Y = T.matrix()
# Two things to note here.
# First, logistic regression can be thought of as a neural net with no hidden layers.  So the output values are 
# just the dot product of the inputs and the edge weights.
# Second, we have 10 classes.  So we can either train separate 1 vs all classification using sigmoid activation, 
# which would be a hassle, or we can use the softmax activation, which is essentially a multi-class version of sigmoid. 

def model(X, w):
    return T.nnet.softmax(T.dot(X, w))
y_hat = model(X, w)

## (3) Cost
# Cross entropy only considers the error between the true class and the prediction, and not the errors for the false 
# classes.  This tends to cause the network to converge faster.
cost = T.mean(T.nnet.categorical_crossentropy(y_hat, Y))

## (4) Objective
# Minimization using gradient descent.
alpha = 0.01
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * alpha]] 
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True) # computes cost, then runs update
y_pred = T.argmax(y_hat, axis=1) # select largest probability as prediction
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

def gradientDescent(epochs):
    trainTime = 0.0
    predictTime = 0.0
    for i in range(epochs):
        start_time = time.time()
        cost = train(train_data[0:len(train_data)], train_labels_b[0:len(train_data)])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescent(50)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-cb0b848acb01> in <module>()
      4 ## (1) Parms
      5 # Init weights to small, but non-zero, values.
----> 6 w = theano.shared(np.asarray((np.random.randn(*(numFeatures, numClasses))*.01)))
      7 
      8 ## (2) Model

NameError: name 'theano' is not defined

In [20]:
## Let's switch to SGD and observe the impact. 

## (1) Parms
w = theano.shared(np.asarray((np.random.randn(*(numFeatures, numClasses))*.01)))

## (2) Model
X = T.matrix()
Y = T.matrix()
def model(X, w):
    return T.nnet.softmax(T.dot(X, w))
y_hat = model(X, w)

## (3) Cost
cost = T.mean(T.nnet.categorical_crossentropy(y_hat, Y))

## (4) Objective
alpha = 0.01
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * alpha]] 
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True) 
y_pred = T.argmax(y_hat, axis=1) 
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

## Play with this value and notice the impact.
miniBatchSize = 1 
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):       
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))     
    print 'train time = %.2f' %(trainTime)
    
gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.8445
2) accuracy = 0.8640
3) accuracy = 0.8710
4) accuracy = 0.8720
5) accuracy = 0.8705
6) accuracy = 0.8695
7) accuracy = 0.8695
8) accuracy = 0.8685
9) accuracy = 0.8705
10) accuracy = 0.8715
train time = 7.91
predict time = 0.00

In [21]:
## Now let's add a hidden layer (two layer neural net).

## (1) Parms
# Try playing with this value.
numHiddenNodes = 600 
w_1 = theano.shared(np.asarray((np.random.randn(*(numFeatures, numHiddenNodes))*.01)))
w_2 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numClasses))*.01)))
params = [w_1, w_2]


## (2) Model
X = T.matrix()
Y = T.matrix()
# Two notes:
# First, feed forward is the composition of layers (dot product + activation function)
# Second, activation on the hidden layer still uses sigmoid
def model(X, w_1, w_2):
    return T.nnet.softmax(T.dot(T.nnet.sigmoid(T.dot(X, w_1)), w_2))
y_hat = model(X, w_1, w_2)


## (3) Cost...same as logistic regression
cost = T.mean(T.nnet.categorical_crossentropy(y_hat, Y))


## (4) Minimization.  Update rule changes to backpropagation.
alpha = 0.01
def backprop(cost, w):
    grads = T.grad(cost=cost, wrt=w)
    updates = []
    for w1, grad in zip(w, grads):
        updates.append([w1, w1 - grad * alpha])
    return updates
update = backprop(cost, params)
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
y_pred = T.argmax(y_hat, axis=1)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

miniBatchSize = 1 
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.2505
2) accuracy = 0.6845
3) accuracy = 0.7795
4) accuracy = 0.8170
5) accuracy = 0.8370
6) accuracy = 0.8535
7) accuracy = 0.8625
8) accuracy = 0.8645
9) accuracy = 0.8655
10) accuracy = 0.8680
train time = 140.46
predict time = 0.07

PART 2: Ideas from 2010 onward

As interest in bigger and deeper networks has increased, a couple of tricks have emerged and become standard practice. Let's look at two of those--rectifier activation and dropout noise--that we'll use with deep networks.

For a more in-depth examination of the topic, check out this 1-day tutorial from KDD2014:

Part 1: http://videolectures.net/kdd2014_bengio_deep_learning/

Part 2: http://videolectures.net/tcmm2014_taylor_deep_learning/


In [22]:
## A curiousity: what happens if we simply add a third layer?

## (1) Parms
numHiddenNodes = 600 
w_1 = theano.shared(np.asarray((np.random.randn(*(numFeatures, numHiddenNodes))*.01)))
w_2 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numHiddenNodes))*.01)))
w_3 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numClasses))*.01)))
params = [w_1, w_2, w_3]

## (2) Model
X = T.matrix()
Y = T.matrix()
def model(X, w_1, w_2, w_3):
    return T.nnet.softmax(T.dot(T.nnet.sigmoid(T.dot(T.nnet.sigmoid(T.dot(X, w_1)), w_2)), w_3))
y_hat = model(X, w_1, w_2, w_3)


## (3) Cost...same as logistic regression
cost = T.mean(T.nnet.categorical_crossentropy(y_hat, Y))


## (4) Minimization.  Update rule changes to backpropagation.
alpha = 0.01
def backprop(cost, w):
    grads = T.grad(cost=cost, wrt=w)
    updates = []
    for w1, grad in zip(w, grads):
        updates.append([w1, w1 - grad * alpha])
    return updates
update = backprop(cost, params)
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
y_pred = T.argmax(y_hat, axis=1)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

miniBatchSize = 1 
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.1045
2) accuracy = 0.1045
3) accuracy = 0.1045
4) accuracy = 0.1045
5) accuracy = 0.1045
6) accuracy = 0.1045
7) accuracy = 0.1045
8) accuracy = 0.1045
9) accuracy = 0.1045
10) accuracy = 0.1630
train time = 300.86
predict time = 0.13

Activation Revisted

Before we revisit adding layers, let's look at a recent idea around activation closely associated with deep learning. In 2010, in a paper published at NIPS (https://www.utc.fr/~bordesan/dokuwiki/_media/en/glorot10nipsworkshop.pdf), Yoshua Bengio showed that rectifier activation works better empirically than sigmoid activation when used in the hidden layers.

The rectifier activation is simple: f(x)=max(0,x). Intuitively, the difference is that as a sigmoid activated node approaches 1 it stops learning even if error continues to be propagated to it, whereas the rectifier activated node continue to learn (at least in the positive direction). It is not completely understood (per Yoshua Bengio) why this helps, but there are some theories being explored including as related to the benefits of sparse representations in networks. (http://www.iro.umontreal.ca/~bengioy/talks/KDD2014-tutorial.pdf). Rectifiers also speed up training.

Although the paper was published in 2010, the technique didn't gain widespread adoption until 2012 when members of Hinton's group spread the word, including with this Kaggle entry: http://blog.kaggle.com/2012/11/01/deep-learning-how-i-did-it-merck-1st-place-interview/


In [23]:
## 2-layer NN with rectify activation on the hidden layer.

## (1) Parms
numHiddenNodes = 600 
w_1 = theano.shared(np.asarray((np.random.randn(*(numFeatures, numHiddenNodes))*.01)))
w_2 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numClasses))*.01)))
params = [w_1, w_2]


## (2) Model
X = T.matrix()
Y = T.matrix()

def model(X, w_1, w_2):
    return T.nnet.softmax(T.dot(T.maximum(T.dot(X, w_1), 0.), w_2))
y_hat = model(X, w_1, w_2)


## (3) Cost...same as logistic regression
cost = T.mean(T.nnet.categorical_crossentropy(y_hat, Y))


## (4) Minimization.  Update rule changes to backpropagation.
alpha = 0.01
def backprop(cost, w):
    grads = T.grad(cost=cost, wrt=w)
    updates = []
    for w1, grad in zip(w, grads):
        updates.append([w1, w1 - grad * alpha])
    return updates
update = backprop(cost, params)
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
y_pred = T.argmax(y_hat, axis=1)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

miniBatchSize = 1 
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.8220
2) accuracy = 0.8615
3) accuracy = 0.8720
4) accuracy = 0.8755
5) accuracy = 0.8810
6) accuracy = 0.8840
7) accuracy = 0.8930
8) accuracy = 0.8980
9) accuracy = 0.9015
10) accuracy = 0.9020
train time = 130.99
predict time = 0.07

Maxout

As an exercise, switch to Maxout (or Max Pooling) activiation. Maxout activation just selects the max input as the output. Maxout is a type of pooling, a technique which performs particularly well for vision problems. (http://jmlr.org/proceedings/papers/v28/goodfellow13.pdf, http://www.quora.com/What-is-impact-of-different-pooling-methods-in-convolutional-neural-networks).

Noise

A second trick closely associated with deep learning, and that is now commonplace, is called 'Dropouts'. The idea is that instead of (or in addition to) adding noise to our inputs, we add noise by having each node return 0 with a certain probability during training. This trick both improves generalization in large networks and speeds up training.

Hinton introduced the idea in 2012 and gave an explanation of why it's similar to bagging (http://arxiv.org/pdf/1207.0580v1.pdf)


In [24]:
# Dropouts
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

## (1) Parms
numHiddenNodes = 600 
w_1 = theano.shared(np.asarray((np.random.randn(*(numFeatures, numHiddenNodes))*.01)))
w_2 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numClasses))*.01)))
params = [w_1, w_2]


## (2) Model
X = T.matrix()
Y = T.matrix()
srng = RandomStreams()
def dropout(X, p=0.):
    if p > 0:
        X *= srng.binomial(X.shape, p=1 - p)
        X /= 1 - p
    return X

def model(X, w_1, w_2, p_1, p_2):
    return T.nnet.softmax(T.dot(dropout(T.maximum(T.dot(dropout(X, p_1), w_1),0.), p_2), w_2))

y_hat_train = model(X, w_1, w_2, 0.2, 0.5)
y_hat_predict = model(X, w_1, w_2, 0., 0.)

## (3) Cost...same as logistic regression
cost = T.mean(T.nnet.categorical_crossentropy(y_hat_train, Y))

## (4) Minimization.  Update rule changes to backpropagation.
alpha = 0.01
def backprop(cost, w):
    grads = T.grad(cost=cost, wrt=w)
    updates = []
    for w1, grad in zip(w, grads):
        updates.append([w1, w1 - grad * alpha])
    return updates
update = backprop(cost, params)
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
y_pred = T.argmax(y_hat_predict, axis=1)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)


miniBatchSize = 1
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.8160
2) accuracy = 0.8530
3) accuracy = 0.8775
4) accuracy = 0.8840
5) accuracy = 0.8725
6) accuracy = 0.8975
7) accuracy = 0.8985
8) accuracy = 0.9035
9) accuracy = 0.9090
10) accuracy = 0.9140
train time = 156.16
predict time = 0.06

In [25]:
# Let's add back in that third layer
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

## (1) Parms
numHiddenNodes = 600 
w_1 = theano.shared(np.asarray((np.random.randn(*(numFeatures, numHiddenNodes))*.01)))
w_2 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numHiddenNodes))*.01)))
w_3 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numClasses))*.01)))
params = [w_1, w_2, w_3]


## (2) Model
X = T.matrix()
Y = T.matrix()

srng = RandomStreams()
def dropout(X, p=0.):
    if p > 0:
        X *= srng.binomial(X.shape, p=1 - p)
        X /= 1 - p
    return X

def model(X, w_1, w_2, w_3, p_1, p_2, p_3):
    return T.nnet.softmax(T.dot(dropout(T.maximum(T.dot(dropout(T.maximum(T.dot(dropout(X, p_1), w_1),0.), p_2), w_2),0.), p_3), w_3))

y_hat_train = model(X, w_1, w_2, w_3, 0.2, 0.5,0.5)
y_hat_predict = model(X, w_1, w_2, w_3, 0., 0.,0.)

## (3) Cost...same as logistic regression
cost = T.mean(T.nnet.categorical_crossentropy(y_hat_train, Y))


## (4) Minimization.  Update rule changes to backpropagation.
alpha = 0.01
def backprop(cost, w):
    grads = T.grad(cost=cost, wrt=w)
    updates = []
    for w1, grad in zip(w, grads):
        updates.append([w1, w1 - grad * alpha])
    return updates
update = backprop(cost, params)
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
y_pred = T.argmax(y_hat_predict, axis=1)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

miniBatchSize = 1
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.6045
2) accuracy = 0.7655
3) accuracy = 0.8645
4) accuracy = 0.8720
5) accuracy = 0.8865
6) accuracy = 0.8935
7) accuracy = 0.8975
8) accuracy = 0.9065
9) accuracy = 0.9155
10) accuracy = 0.9070
train time = 322.41
predict time = 0.10

Part 3: Convolutional Neural Nets

Today, when the phrase 'deep learning' is used to describe a system, it most likely is a convolution net (or convonet). The convonet architechture was largely developed in the late 90's at Bell Labs, but only very recently popularized. It was developed for image recognition, and is described and implemented with 2d representations in mind.

Geoffrey Hinton has an excellent two-part lecture on the topic:

https://www.youtube.com/watch?v=6oD3t6u5EPs

https://www.youtube.com/watch?v=fueIAeAsGzA

Also, this code was partly taken from these tutorials, which are worth referring back to:

http://deeplearning.net/tutorial/lenet.html

http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/

https://www.youtube.com/watch?v=S75EdAcXHKk

http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/


In [ ]:
# Let's add back in that third layer
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal.downsample import max_pool_2d

## (1) Parms
numHiddenNodes = 600 
patchWidth = 3
patchHeight = 3
featureMapsLayer1 = 32
featureMapsLayer2 = 64
featureMapsLayer3 = 128

# For convonets, we will work in 2d rather than 1d.  The MNIST images are 28x28 in 2d.
imageWidth = 28
train_data = train_data.reshape(-1, 1, imageWidth, imageWidth)
test_data = test_data.reshape(-1, 1, imageWidth, imageWidth)

# Convolution layers.  
w_1 = theano.shared(np.asarray((np.random.randn(*(featureMapsLayer1, 1, patchWidth, patchHeight))*.01)))
w_2 = theano.shared(np.asarray((np.random.randn(*(featureMapsLayer2, featureMapsLayer1, patchWidth, patchHeight))*.01)))
w_3 = theano.shared(np.asarray((np.random.randn(*(featureMapsLayer3, featureMapsLayer2, patchWidth, patchHeight))*.01)))

# Fully connected NN. 
w_4 = theano.shared(np.asarray((np.random.randn(*(featureMapsLayer3 * 3 * 3, numHiddenNodes))*.01)))
w_5 = theano.shared(np.asarray((np.random.randn(*(numHiddenNodes, numClasses))*.01)))
params = [w_1, w_2, w_3, w_4, w_5]

## (2) Model
X = T.tensor4() # conv2d works with tensor4 type
Y = T.matrix()

srng = RandomStreams()
def dropout(X, p=0.):
    if p > 0:
        X *= srng.binomial(X.shape, p=1 - p)
        X /= 1 - p
    return X

# Theano provides built-in support for add convolutional layers
def model(X, w_1, w_2, w_3, w_4, w_5, p_1, p_2):
    l1 = dropout(max_pool_2d(T.maximum(conv2d(X, w_1, border_mode='full'),0.), (2, 2)), p_1)
    l2 = dropout(max_pool_2d(T.maximum(conv2d(l1, w_2), 0.), (2, 2)), p_1)
    l3 = dropout(T.flatten(max_pool_2d(T.maximum(conv2d(l2, w_3), 0.), (2, 2)), outdim=2), p_1) # flatten to switch back to 1d layers
    l4 = dropout(T.maximum(T.dot(l3, w_4), 0.), p_2)
    return T.nnet.softmax(T.dot(l4, w_5))

y_hat_train = model(X, w_1, w_2, w_3, w_4, w_5, 0.2, 0.5)
y_hat_predict = model(X, w_1, w_2, w_3, w_4, w_5, 0., 0.)
y_x = T.argmax(y_hat, axis=1)

## (3) Cost
cost = T.mean(T.nnet.categorical_crossentropy(y_hat_train, Y))

## (4) Minimization.  
def backprop(cost, w, alpha=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=w)
    updates = []
    for w1, grad in zip(w, grads):
        
        # adding gradient scaling
        acc = theano.shared(w1.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * grad ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        grad = grad / gradient_scaling
        updates.append((acc, acc_new))
        
        updates.append((w1, w1 - grad * alpha))
    return updates

update = backprop(cost, params)
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
y_pred = T.argmax(y_hat_predict, axis=1)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

miniBatchSize = 1
def gradientDescentStochastic(epochs):
    trainTime = 0.0
    predictTime = 0.0
    start_time = time.time()
    for i in range(epochs):
        for start, end in zip(range(0, len(train_data), miniBatchSize), range(miniBatchSize, len(train_data), miniBatchSize)):
            cost = train(train_data[start:end], train_labels_b[start:end])
        trainTime =  trainTime + (time.time() - start_time)
        print '%d) accuracy = %.4f' %(i+1, np.mean(np.argmax(test_labels_b, axis=1) == predict(test_data)))
    print 'train time = %.2f' %(trainTime)

gradientDescentStochastic(10)

start_time = time.time()
predict(test_data)   
print 'predict time = %.2f' %(time.time() - start_time)


1) accuracy = 0.7460
2) accuracy = 0.8870
3) accuracy = 0.9315
4) accuracy = 0.9535

Brain inspiration

The architechture of the convonet was inspired by the visual cortext in the human brain. If you are interested in learning more, check out: http://www-psych.stanford.edu/~ashas/Cognition%20Textbook/chapter2.pdf

Self-Driving Vehicles


In [ ]: