Part of Neural Network Notebook (3nb).
Copyright (C) 2014 by Eka A. Kurniawan
eka.a.kurniawan(ta)gmail(tod)com
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Done:
MacBook Pro Retina, Mid 2012 with OS X 10.9.1 (Mavericks)
Python version:
In [1]:
import sys
print("Python %d.%d.%d" % (sys.version_info.major, \
sys.version_info.minor, \
sys.version_info.micro))
NumPy version:
In [2]:
import numpy as np
print("NumPy %s" % np.__version__)
matplotlib version:
In [3]:
import matplotlib
import matplotlib.pyplot as plt
print("matplotlib %s" % matplotlib.__version__)
To run this IPython Notebook; open a console, go to notebook directory and execute following command.
ipython3-3.3 notebook --pylab inline
Settings required:
In [4]:
# Display graph in 'retina' format for Mac with retina display. Others, use PNG or SVG format.
%config InlineBackend.figure_format = 'retina'
#%config InlineBackend.figure_format = 'PNG'
#%config InlineBackend.figure_format = 'SVG'
Other imports:
In [5]:
import time
import os.path
Functions to save/load array to/from file.
In [6]:
def save_list(file_name, list_name):
with open(file_name, 'w') as fh:
fh.write("\n".join(list_name))
def load_list(file_name):
with open(file_name, 'r') as fh:
return [line.strip() for line in fh.readlines()]
Following function plots spikes and postsynaptic responses (PSRs) in a same graph.
In [7]:
# dt: time step (in second)
# spikes: array of simulation times when spikes happen
# PSRs: array of postsynaptic responses (PSRs) in every time step (in ampere)
def plot_PSRs(dt, spikes, PSRs):
# Convert PSR from Ampere to Nanoampere
PSRs_prime = PSRs * 1e9
fig = plt.figure()
len_PSRs = len(PSRs)
time = np.linspace(0,len_PSRs-1,len_PSRs) * dt
sp1 = fig.add_subplot(211)
sp1.scatter(spikes, len(spikes) * [1], color = 'red')
sp1.set_ylabel('Spikes')
sp1.set_xlim([0, time[-1]])
sp1.xaxis.grid()
sp1.set_yticklabels("", visible = False)
sp1.set_xticklabels("", visible = False)
sp2 = fig.add_subplot(212)
sp2.plot(time, PSRs_prime, color = 'green')
sp2.set_ylabel('Postsynaptic\nResponse\n(Nanoamperes)')
sp2.set_xlabel('Time (Seconds)')
sp2.set_xlim([0, time[-1]])
sp2.grid(True)
sp2.yaxis.tick_right()
plt.show()
Following function plots execution time comprises buffer allocation time (bat
), run time (rt
) and the total time (tt
) of the two.
In [8]:
def plot_exec_time(bats, rts, tts, device_names, \
methods_len, SEQ_PN_ID, PAR_PN_PA, \
SEQ_PS_ID, PAR_PS_PA, \
ttl_synapses):
# Set lower and upper limits for X and Y axis.
xlim_lower = 1e1
xlim_upper = 1e6
ylim_lower = 1e-4
ylim_upper = 1e4
# Total synapse labels
ttl_synapse_lbls = [np.prod(ttl_synapse) for ttl_synapse in ttl_synapses]
portions = [bats, rts, tts]
portions_len = len(portions)
portion_labels = ["Buffer Allocation Time", \
"Run Time", \
"Total Time"]
devices_len = len(device_names)
fig, axs = plt.subplots(portions_len, devices_len, sharey = True)
# axs will be accessed in two-dimentional form. If it is in one-dimentional
# (in case of devices_len equals one) add one more fake dimention to it.
if (devices_len <= 1):
axs_ext = np.array(portions_len * [np.nan])
axs = np.vstack((axs, axs_ext)).transpose()
# Figure size
fig.set_size_inches(27, 27)
# Colors
colors = ['red', 'green', 'blue', 'magenta', 'cyan', 'orange', 'gray']
# Plot
for portion_id, portion in enumerate(portions):
for device_id, device_name in enumerate(device_names):
# Title
axs[portion_id, device_id].set_title(device_names[device_id])
# Handle logarithmic along X or Y axis
axs[portion_id, device_id].set_xscale('log')
axs[portion_id, device_id].set_yscale('log')
# Set lower and upper limits for X and Y axis.
axs[portion_id, device_id].set_xlim(xlim_lower, xlim_upper)
axs[portion_id, device_id].set_ylim(ylim_lower, ylim_upper)
axs[portion_id, device_id].grid(True)
if not np.isnan(portions[portion_id][device_id][SEQ_PN_ID][0]):
time_values = [ylim_lower if time_value == 0 else time_value \
for time_value in portions[portion_id][device_id][SEQ_PN_ID]]
axs[portion_id, device_id].plot(ttl_synapse_lbls, time_values, \
label = "seq_pn", \
color = colors[SEQ_PN_ID], \
linewidth = 2.5)
if not np.isnan(portions[portion_id][device_id][SEQ_PS_ID][0]):
time_values = [ylim_lower if time_value == 0 else time_value \
for time_value in portions[portion_id][device_id][SEQ_PS_ID]]
axs[portion_id, device_id].plot(ttl_synapse_lbls, time_values, \
label = "seq_ps", \
color = colors[SEQ_PS_ID], \
linewidth = 2.5)
if not np.isnan(portions[portion_id][device_id][PAR_PN_PA_ID][0]):
time_values = [ylim_lower if time_value == 0 else time_value for \
time_value in portions[portion_id][device_id][PAR_PN_PA_ID]]
axs[portion_id, device_id].plot(ttl_synapse_lbls, time_values, \
label = "par_pn_pa", \
color = colors[PAR_PN_PA_ID], \
linewidth = 2.5)
if not np.isnan(portions[portion_id][device_id][PAR_PS_PA_ID][0]):
time_values = [ylim_lower if time_value == 0 else time_value for \
time_value in portions[portion_id][device_id][PAR_PS_PA_ID]]
axs[portion_id, device_id].plot(ttl_synapse_lbls, time_values, \
label = "par_ps_pa", \
color = colors[PAR_PS_PA_ID], \
linewidth = 2.5)
# X Labels
axs[portion_id, device_id].set_xlabel('# Synapses', fontsize = 14)
# Legend
axs[portion_id, device_id].legend(loc = 'upper left')
# Tick font size
for tick in axs[portion_id, device_id].xaxis.get_major_ticks():
tick.label.set_fontsize(12)
for tick in axs[portion_id, device_id].yaxis.get_major_ticks():
tick.label.set_fontsize(12)
# Y Labels
axs[portion_id, 0].set_ylabel(portion_labels[portion_id] + '\n(second)', \
fontsize = 14)
plt.show()
The implementation is taken from CSIM: A neural Circuit SIMulator website and modified accordingly.[1]
In [9]:
from math import exp
# PSR: postsynaptic respose (in ampere)
# W: synaptic weight (unitless)
# delay: cause and effect delay between presynaptic event and
# postsynaptic response (in second)
class Synapse:
def __init__(self,
PSR = 0.0,
W = 1.0,
delay = 0.0):
self.PSR = PSR
self.W = W
self.delay = delay
# tau: synaptic time constant (in second)
class SpikingSynapse(Synapse):
def __init__(self,
dt = 2e-4,
PSR = 0.0,
W = 1e-9,
delay = 0.0,
tau = 3e-3):
Synapse.__init__(self, PSR, W, delay)
self.tau = tau
# postsynaptic decay (unitless)
self.decay = 0.0
self.updateInternal(dt)
def updateInternal(self, dt = 2e-4):
self.decay = exp(-dt / self.tau)
def advance(self):
self.PSR *= self.decay
# U: synaptic utilization
# D: depression time constant
# F: facilitation time constant
# u0: initial value of running facilitation
# r0: initial value of running depression
# lastSpike: biological time of last spike
class DynamicSpikingSynapse(SpikingSynapse):
def __init__(self,
dt = 2e-4,
PSR = 0.0,
W = 1e-9,
delay = 0.0,
tau = 3e-3,
U = 0.4,
D = 1.0,
F = 0.01,
u0 = 0.4,
r0 = 1.0,
lastSpike = None):
SpikingSynapse.__init__(self, dt, PSR, W, delay, tau)
self.U = U
self.D = D
self.F = F
self.u0 = u0
self.r0 = r0
self.lastSpike = lastSpike
self.u = u0
self.r = r0
def reset_u(self):
self.u = u0
def reset_r(self):
self.r = r0
def preSpikeHit(self, simulationTime):
if (self.lastSpike):
isi = simulationTime - self.lastSpike
self.r = 1.0 + \
( ((self.r * (1.0 - self.u)) - 1.0) * \
exp(-isi / self.D) )
self.u = self.U + \
( self.u * \
(1 - self.U) * \
exp(-isi / self.F) )
self.PSR += ((self.W / self.decay) * self.u * self.r)
self.lastSpike = simulationTime
Plot PSRs with different spike trains.
In [10]:
dt = 2e-4
Tsim = 1.0
nSteps = int((Tsim / dt) + 0.5)
spikes = np.hstack((np.linspace(0.05, 0.15, 4), \
np.linspace(0.85, 0.95, 4)))
hasFireds = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds[i] = 1
s1 = DynamicSpikingSynapse(tau = 3e-3,
U = 0.2,
D = 1.0,
F = 0.02,
u0 = 0.2,
r0 = 0.8)
t = 0.0
PSRs = np.array(nSteps * [0.0], dtype = np.float32)
for i in range(nSteps):
t += dt
s1.advance()
if (hasFireds[i]):
s1.preSpikeHit(t)
PSRs[i] = s1.PSR
plot_PSRs(dt, spikes, PSRs)
In [11]:
dt = 2e-4
Tsim = 1.0
nSteps = int((Tsim / dt) + 0.5)
spikes = np.hstack((np.linspace(0.05, 0.15, 4), \
np.linspace(0.85, 0.95, 5)))
hasFireds = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds[i] = 1
s1 = DynamicSpikingSynapse(tau = 10e-3,
U = 0.2,
D = 1.0,
F = 0.02,
u0 = 0.2,
r0 = 0.8)
t = 0.0
PSRs = np.array(nSteps * [0.0], dtype = np.float32)
for i in range(nSteps):
t += dt
s1.advance()
if (hasFireds[i]):
s1.preSpikeHit(t)
PSRs[i] = s1.PSR
plot_PSRs(dt, spikes, PSRs)
In [12]:
dt = 2e-4
Tsim = 6.0
nSteps = int((Tsim / dt) + 0.5)
spikes = np.hstack((np.linspace(0.1, 1.4, 14), \
np.linspace(1.43, 1.76, 12), \
np.linspace(1.77, 1.83, 7), \
np.linspace(1.93, 3.33, 15), \
np.linspace(5.33, 5.73, 4)))
hasFireds = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds[i] = 1
U = 0.2
s1 = DynamicSpikingSynapse(tau = 10e-3,
U = U,
D = 1.0,
F = 0.02,
u0 = U,
r0 = 1 - U)
t = 0.0
PSRs = np.array(nSteps * [0.0], dtype = np.float32)
for i in range(nSteps):
t += dt
s1.advance()
if (hasFireds[i]):
s1.preSpikeHit(t)
PSRs[i] = s1.PSR
plot_PSRs(dt, spikes, PSRs)
Sequential implementation in which each core in charge of one neuron with multiple synapses.
In [13]:
def run_seq_pn(Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = False):
# Buffer allocation time is N/A for sequential implementation
bat = np.nan
# Biological time
nSteps = int((Tsim / dt) + 0.5)
# Convert spikes into hasFireds
hasFireds = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds[i] = 1
# Array of total incoming synapse
ttl_incoming_synapses_np = np.array(ttl_neuron * [ttl_incoming_synapse_per_neuron], \
dtype = np.int32)
# Array of incoming synapse start index
incoming_synapse_start_idxs_np = np.array(ttl_neuron * [0], dtype = np.int64)
for i in range(1, ttl_neuron):
incoming_synapse_start_idxs_np[i] = incoming_synapse_start_idxs_np[i - 1] + \
ttl_incoming_synapses_np[i - 1]
# Total synapse
ttl_synapse = ttl_neuron * ttl_incoming_synapse_per_neuron
# Construct dynamic spiking synapses with different values of U
Us = np.linspace(0.2, 0.4, ttl_synapse)
dynamicSpikingSynapses = []
for i in range(ttl_synapse):
U = Us[i]
dynamicSpikingSynapses.append(DynamicSpikingSynapse(tau = 10e-3,
U = U,
D = 1.0,
F = 0.02,
u0 = U,
r0 = 1 - U))
# Collection of total PSRs of each neuron
ttl_PSRs_coll = np.zeros((ttl_neuron, nSteps))
# Perform advance and preSpikeHit
t = 0.0
tic = time.time()
for k in range(nSteps):
t += dt
for j in range(ttl_neuron):
ttl_PSR = 0.0
ttl_incoming_synapse = ttl_incoming_synapses_np[j]
start_idx = incoming_synapse_start_idxs_np[j]
for i in range(ttl_incoming_synapse):
dynamicSpikingSynapses[start_idx + i].advance()
if (hasFireds[k]):
dynamicSpikingSynapses[start_idx + i].preSpikeHit(t)
ttl_PSR += dynamicSpikingSynapses[start_idx + i].PSR
ttl_PSRs_coll[j][k] = ttl_PSR
toc = time.time()
# Run time
rt = toc - tic
# Total time
tt = rt
if plot:
plot_PSRs(dt, spikes, ttl_PSRs_coll[0])
plot_PSRs(dt, spikes, ttl_PSRs_coll[-1])
return bat, rt, tt
Testing code plots the first and last synapses.
In [14]:
dt = 2e-4
Tsim = 6.0
spikes = np.hstack((np.linspace(0.1, 1.4, 14), \
np.linspace(1.43, 1.76, 12), \
np.linspace(1.77, 1.83, 7), \
np.linspace(1.93, 3.33, 15), \
np.linspace(5.33, 5.73, 4)))
ttl_neuron = 10
ttl_incoming_synapse_per_neuron = 2
seq_pn_t = run_seq_pn(Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = True)
Sequential implementation in which each core in charge of one synapse.
In [15]:
def run_seq_ps(Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = False):
# Buffer allocation time is N/A for sequential implementation
bat = np.nan
# Biological time
nSteps = int((Tsim / dt) + 0.5)
# Convert spikes into hasFireds
hasFireds = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds[i] = 1
# Array of total incoming synapse
ttl_incoming_synapses_np = np.array(ttl_neuron * [ttl_incoming_synapse_per_neuron], \
dtype = np.int32)
# Array of incoming synapse start index
incoming_synapse_start_idxs_np = np.array(ttl_neuron * [0], dtype = np.int64)
for i in range(1, ttl_neuron):
incoming_synapse_start_idxs_np[i] = incoming_synapse_start_idxs_np[i - 1] + \
ttl_incoming_synapses_np[i - 1]
# Total synapse
ttl_synapse = ttl_neuron * ttl_incoming_synapse_per_neuron
# Construct dynamic spiking synapses with different values of U
Us = np.linspace(0.2, 0.4, ttl_synapse)
dynamicSpikingSynapses = []
for i in range(ttl_synapse):
U = Us[i]
dynamicSpikingSynapses.append(DynamicSpikingSynapse(tau = 10e-3,
U = U,
D = 1.0,
F = 0.02,
u0 = U,
r0 = 1 - U))
# Collection of total PSRs of each neuron
ttl_PSRs_coll = np.zeros((ttl_neuron, nSteps))
# Perform advance and preSpikeHit
t = 0.0
tic = time.time()
for step_idx in range(nSteps):
t += dt
for s_idx in range(ttl_synapse):
dynamicSpikingSynapses[s_idx].advance()
if (hasFireds[step_idx]):
dynamicSpikingSynapses[s_idx].preSpikeHit(t)
# Calculate total PSR
for n_idx in range(ttl_neuron):
ttl_PSR = 0.0
ttl_incoming_synapse = ttl_incoming_synapses_np[n_idx]
start_idx = incoming_synapse_start_idxs_np[n_idx]
for is_idx in range(ttl_incoming_synapse):
ttl_PSR += dynamicSpikingSynapses[start_idx + is_idx].PSR
ttl_PSRs_coll[n_idx][step_idx] = ttl_PSR
toc = time.time()
# Run time
rt = toc - tic
# Total time
tt = rt
if plot:
plot_PSRs(dt, spikes, ttl_PSRs_coll[0])
plot_PSRs(dt, spikes, ttl_PSRs_coll[-1])
return bat, rt, tt
Testing code plots the first and last synapses.
In [16]:
dt = 2e-4
Tsim = 6.0
spikes = np.hstack((np.linspace(0.1, 1.4, 14), \
np.linspace(1.43, 1.76, 12), \
np.linspace(1.77, 1.83, 7), \
np.linspace(1.93, 3.33, 15), \
np.linspace(5.33, 5.73, 4)))
ttl_neuron = 10
ttl_incoming_synapse_per_neuron = 2
seq_pn_t = run_seq_ps(Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = True)
Parallel Python-OpenCL requires OpenCL driver from CPU or GPU vendor as well as PyOpenCL module for Python.
PyOpenCL version:
In [17]:
import pyopencl as cl
print("PyOpenCL %s" % cl.VERSION_TEXT)
Set PYOPENCL_COMPILER_OUTPUT environmental
variable to 1
in order to see OpenCL compilation outupt.
In [18]:
import os
os.environ['PYOPENCL_COMPILER_OUTPUT'] = '1'
The list of all OpenCL devices found in this machine.
In [19]:
devices = []
device_names = []
for platform_id, platform in enumerate(cl.get_platforms()):
print("Platform[%d] Vendor : %s" % (platform_id, platform.get_info(cl.platform_info.VENDOR)))
print("Platform[%d] Name : %s" % (platform_id, platform.get_info(cl.platform_info.NAME)))
print("Platform[%d] Version : %s" % (platform_id, platform.get_info(cl.platform_info.VERSION)))
print("Platform[%d] Device(s) :" % platform_id)
for device_id, device in enumerate(platform.get_devices(device_type = cl.device_type.ALL)):
devices.append(device)
print(" - Device[%d] : %s - %s" % (device_id, \
device.get_info(cl.device_info.VENDOR), \
device.get_info(cl.device_info.NAME)))
device_names.append(device.get_info(cl.device_info.NAME))
OpenCL code (kernel) is written in C consists of advance
and spikeHit
functions for:
advance_spikeHit_pn_pa
)
In [20]:
%%writefile DynamicSpikingSynapse/DynamicSpikingSynapse.cl
// Copyright (C) 2014 by Eka A. Kurniawan
// eka.a.kurniawan(ta)gmail(tod)com
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; version 2 of the License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the
// Free Software Foundation, Inc.,
// 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
__kernel void advance_spikeHit_pn_pa(const float t,
const int step,
const int nSteps,
__global const int *hasFireds,
__global const int *ttl_incoming_synapses,
__global const long *incoming_synapse_start_idxs,
__global float *dss_PSR,
__global const float *dss_W,
__global const float *dss_decay,
__global const float *dss_U,
__global const float *dss_D,
__global const float *dss_F,
__global float *dss_u,
__global float *dss_r,
__global float *dss_lastSpike,
__global float *ttl_PSRs_coll)
{
long gid = get_global_id(0);
// Index to store total PSR
long out_idx = gid * nSteps;
float ttl_PSR = 0;
int ttl_incoming_synapse = ttl_incoming_synapses[gid];
long start_idx = incoming_synapse_start_idxs[gid];
for (int i = 0; i < ttl_incoming_synapse; i++) {
// Synapse ID
long sid = start_idx + (long)i;
// advance
float PSR = dss_PSR[sid];
float decay = dss_decay[sid];
PSR *= decay;
// preSpikeHit
if (hasFireds[step]) {
float r = dss_r[sid];
float u = dss_u[sid];
if (dss_lastSpike[sid] > 0) {
float isi = t - dss_lastSpike[sid];
r = 1 + \
( ((r * (1 - u)) - 1) * \
exp(-isi / dss_D[sid]) );
float U = dss_U[sid];
u = U + \
( u * (1 - U) * exp(-isi / dss_F[sid]) );
dss_r[sid] = r;
dss_u[sid] = u;
}
PSR += ((dss_W[sid] / decay) * u * r);
dss_lastSpike[sid] = t;
}
dss_PSR[sid] = PSR;
ttl_PSR += PSR;
}
ttl_PSRs_coll[out_idx + step] = ttl_PSR;
}
__kernel void advance_spikeHit_ps_pa(const float t,
const int step,
__global const int *hasFireds,
__global float *dss_PSR,
__global const float *dss_W,
__global const float *dss_decay,
__global const float *dss_U,
__global const float *dss_D,
__global const float *dss_F,
__global float *dss_u,
__global float *dss_r,
__global float *dss_lastSpike)
{
long gid = get_global_id(0);
// advance
float PSR = dss_PSR[gid];
float decay = dss_decay[gid];
PSR *= decay;
// preSpikeHit
if (hasFireds[step]) {
float r = dss_r[gid];
float u = dss_u[gid];
if (dss_lastSpike[gid] > 0) {
float isi = t - dss_lastSpike[gid];
r = 1 + \
( ((r * (1 - u)) - 1) * \
exp(-isi / dss_D[gid]) );
float U = dss_U[gid];
u = U + \
( u * (1 - U) * exp(-isi / dss_F[gid]) );
dss_r[gid] = r;
dss_u[gid] = u;
}
PSR += ((dss_W[gid] / decay) * u * r);
dss_lastSpike[gid] = t;
}
dss_PSR[gid] = PSR;
}
__kernel void sum_up_psr(const int step,
const int nSteps,
__global const int *ttl_incoming_synapses,
__global const long *incoming_synapse_start_idxs,
__global float *dss_PSR,
__global float *ttl_PSRs_coll)
{
long gid = get_global_id(0);
// Index to store total PSR
long out_idx = gid * nSteps;
float ttl_PSR = 0;
int ttl_incoming_synapse = ttl_incoming_synapses[gid];
long start_idx = incoming_synapse_start_idxs[gid];
for (int i = 0; i < ttl_incoming_synapse; i++) {
// Synapse ID
long sid = start_idx + (long)i;
// Sum up PSR
float PSR = dss_PSR[sid];
ttl_PSR += PSR;
}
ttl_PSRs_coll[out_idx + step] = ttl_PSR;
}
Functions to setup OpenCL devices and create OpenCL program.
In [21]:
def cl_setup(platform_id, device_id):
platform = cl.get_platforms()[platform_id]
device = platform.get_devices()[device_id]
context = cl.Context(devices = [device])
queue = cl.CommandQueue(context, device = device)
return platform, device, context, queue
def cl_set_context(device):
return cl.Context(devices = [device])
def cl_set_queue(context, device):
return cl.CommandQueue(context, device = device)
def cl_create_program(context, device, filename):
fh = open(filename, 'r')
code = fh.read()
program = cl.Program(context, code)
program.build(devices = [device])
return program
Parallel implementation in which each core in charge of one neuron with multiple synapses. This implementation uses parameter arrays data structure.
In [22]:
def run_par_pn_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
workitem_size = None, plot = False):
# Setup OpenCL
cl_device = devices[device_id]
cl_context = cl_set_context(cl_device)
cl_queue = cl_set_queue(cl_context, cl_device)
# Biological time
nSteps = int((Tsim / dt) + 0.5)
# Convert spikes into hasFireds
hasFireds_np = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds_np[i] = 1
# Array of total incoming synapse
ttl_incoming_synapses_np = np.array(ttl_neuron * [ttl_incoming_synapse_per_neuron], \
dtype = np.int32)
# Array of incoming synapse start index
incoming_synapse_start_idxs_np = np.array(ttl_neuron * [0], dtype = np.int64)
for i in range(1, ttl_neuron):
incoming_synapse_start_idxs_np[i] = incoming_synapse_start_idxs_np[i - 1] + \
ttl_incoming_synapses_np[i - 1]
# Total synapse
ttl_synapse = ttl_neuron * ttl_incoming_synapse_per_neuron
# Construct dynamic spiking synapses with different values of U
Us = np.linspace(0.2, 0.4, ttl_synapse)
dynamicSpikingSynapses = []
for i in range(ttl_synapse):
U = Us[i]
dynamicSpikingSynapses.append(DynamicSpikingSynapse(tau = 10e-3,
U = U,
D = 1.0,
F = 0.02,
u0 = U,
r0 = 1 - U))
# Create OpenCL buffer
tic = time.time()
mf = cl.mem_flags
# hasFireds
hasFireds_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = hasFireds_np)
# Total incoming synapse
ttl_incoming_synapses_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = ttl_incoming_synapses_np)
# Incoming synapse start index
incoming_synapse_start_idxs_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = incoming_synapse_start_idxs_np)
# DynamicSpikingSynapses
dss_PSR_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_W_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_decay_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_U_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_D_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_F_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_u_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_r_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_lastSpike_np = np.ones((ttl_synapse), dtype = np.float32) * -1.0
for dynamicSpikingSynapse_idx, dynamicSpikingSynapse in enumerate(dynamicSpikingSynapses):
dss_PSR_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.PSR
dss_W_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.W
dss_decay_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.decay
dss_U_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.U
dss_D_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.D
dss_F_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.F
dss_u_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.u
dss_r_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.r
dss_lastSpike_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.lastSpike
dss_PSR_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_PSR_np)
dss_W_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_W_np)
dss_decay_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_decay_np)
dss_U_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_U_np)
dss_D_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_D_np)
dss_F_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_F_np)
dss_u_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_u_np)
dss_r_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_r_np)
dss_lastSpike_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_lastSpike_np)
toc = time.time()
# Buffer allocation time
bat = toc - tic
# Collection of total PSRs of each neuron
ttl_PSRs_coll_np = np.zeros((ttl_neuron, nSteps), dtype = np.float32)
ttl_PSRs_coll_buf = cl.Buffer(cl_context, \
mf.WRITE_ONLY | mf.COPY_HOST_PTR, \
hostbuf = ttl_PSRs_coll_np)
# Create OpenCL program
cl_program = cl_create_program(cl_context, cl_device, \
"./DynamicSpikingSynapse/DynamicSpikingSynapse.cl")
# Workitem size need to be in list format except for None
if workitem_size != None:
workitem_size = (workitem_size,)
# Perform advance and preSpikeHit
t = 0.0
tic = time.time()
for step in range(nSteps):
t += dt
cl_program.advance_spikeHit_pn_pa(cl_queue, \
(ttl_neuron,), workitem_size, \
float32(t), \
int32(step), \
int32(nSteps), \
hasFireds_buf, \
ttl_incoming_synapses_buf, \
incoming_synapse_start_idxs_buf, \
dss_PSR_buf, \
dss_W_buf, \
dss_decay_buf, \
dss_U_buf, \
dss_D_buf, \
dss_F_buf, \
dss_u_buf, \
dss_r_buf, \
dss_lastSpike_buf, \
ttl_PSRs_coll_buf).wait()
toc = time.time()
# Run time
rt = toc - tic
# Collect result
cl.enqueue_read_buffer(cl_queue, ttl_PSRs_coll_buf, hostbuf = ttl_PSRs_coll_np).wait()
# Total time
tt = bat + rt
if plot:
plot_PSRs(dt, spikes, ttl_PSRs_coll_np[0])
plot_PSRs(dt, spikes, ttl_PSRs_coll_np[-1])
return bat, rt, tt
Testing code plots the first and last synapses.
In [23]:
device_id = 1
dt = 2e-4
Tsim = 6.0
spikes = np.hstack((np.linspace(0.1, 1.4, 14), \
np.linspace(1.43, 1.76, 12), \
np.linspace(1.77, 1.83, 7), \
np.linspace(1.93, 3.33, 15), \
np.linspace(5.33, 5.73, 4)))
ttl_neuron = 10
ttl_incoming_synapse_per_neuron = 2
par_pn_pa_t = run_par_pn_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = True)
Parallel implementation in which each core in charge of one synapse. This implementation uses parameter arrays data structure.
In [24]:
def run_par_ps_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
workitem_size = None, plot = False):
# Setup OpenCL
cl_device = devices[device_id]
cl_context = cl_set_context(cl_device)
cl_queue = cl_set_queue(cl_context, cl_device)
# Biological time
nSteps = int((Tsim / dt) + 0.5)
# Convert spikes into hasFireds
hasFireds_np = np.array(nSteps * [0], dtype = np.int32)
for spike in spikes:
i = int((spike / dt) + 0.5)
hasFireds_np[i] = 1
# Array of total incoming synapse
ttl_incoming_synapses_np = np.array(ttl_neuron * [ttl_incoming_synapse_per_neuron], \
dtype = np.int32)
# Array of incoming synapse start index
incoming_synapse_start_idxs_np = np.array(ttl_neuron * [0], dtype = np.int64)
for i in range(1, ttl_neuron):
incoming_synapse_start_idxs_np[i] = incoming_synapse_start_idxs_np[i - 1] + \
ttl_incoming_synapses_np[i - 1]
# Total synapse
ttl_synapse = ttl_neuron * ttl_incoming_synapse_per_neuron
# Construct dynamic spiking synapses with different values of U
Us = np.linspace(0.2, 0.4, ttl_synapse)
dynamicSpikingSynapses = []
for i in range(ttl_synapse):
U = Us[i]
dynamicSpikingSynapses.append(DynamicSpikingSynapse(tau = 10e-3,
U = U,
D = 1.0,
F = 0.02,
u0 = U,
r0 = 1 - U))
# Create OpenCL buffer
tic = time.time()
mf = cl.mem_flags
# hasFireds
hasFireds_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = hasFireds_np)
# Total incoming synapse
ttl_incoming_synapses_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = ttl_incoming_synapses_np)
# Incoming synapse start index
incoming_synapse_start_idxs_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = incoming_synapse_start_idxs_np)
# DynamicSpikingSynapses
dss_PSR_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_W_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_decay_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_U_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_D_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_F_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_u_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_r_np = np.zeros((ttl_synapse), dtype = np.float32)
dss_lastSpike_np = np.ones((ttl_synapse), dtype = np.float32) * -1.0
for dynamicSpikingSynapse_idx, dynamicSpikingSynapse in enumerate(dynamicSpikingSynapses):
dss_PSR_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.PSR
dss_W_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.W
dss_decay_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.decay
dss_U_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.U
dss_D_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.D
dss_F_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.F
dss_u_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.u
dss_r_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.r
dss_lastSpike_np[dynamicSpikingSynapse_idx] = dynamicSpikingSynapse.lastSpike
dss_PSR_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_PSR_np)
dss_W_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_W_np)
dss_decay_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_decay_np)
dss_U_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_U_np)
dss_D_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_D_np)
dss_F_buf = cl.Buffer(cl_context, \
mf.READ_ONLY | mf.COPY_HOST_PTR, \
hostbuf = dss_F_np)
dss_u_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_u_np)
dss_r_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_r_np)
dss_lastSpike_buf = cl.Buffer(cl_context, \
mf.READ_WRITE | mf.COPY_HOST_PTR, \
hostbuf = dss_lastSpike_np)
toc = time.time()
# Buffer allocation time
bat = toc - tic
# Collection of total PSRs of each neuron
ttl_PSRs_coll_np = np.zeros((ttl_neuron, nSteps), dtype = np.float32)
ttl_PSRs_coll_buf = cl.Buffer(cl_context, \
mf.WRITE_ONLY | mf.COPY_HOST_PTR, \
hostbuf = ttl_PSRs_coll_np)
# Create OpenCL program
cl_program = cl_create_program(cl_context, cl_device, \
"./DynamicSpikingSynapse/DynamicSpikingSynapse.cl")
# Workitem size need to be in list format except for None
if workitem_size != None:
workitem_size = (workitem_size,)
# Perform advance and preSpikeHit
t = 0.0
tic = time.time()
for step in range(nSteps):
t += dt
cl_program.advance_spikeHit_ps_pa(cl_queue, \
(ttl_synapse,), workitem_size, \
float32(t), \
int32(step), \
hasFireds_buf, \
dss_PSR_buf, \
dss_W_buf, \
dss_decay_buf, \
dss_U_buf, \
dss_D_buf, \
dss_F_buf, \
dss_u_buf, \
dss_r_buf, \
dss_lastSpike_buf).wait()
cl_program.sum_up_psr(cl_queue, \
(ttl_neuron,), workitem_size, \
int32(step), \
int32(nSteps), \
ttl_incoming_synapses_buf, \
incoming_synapse_start_idxs_buf, \
dss_PSR_buf, \
ttl_PSRs_coll_buf).wait()
toc = time.time()
# Run time
rt = toc - tic
# Collect result
cl.enqueue_read_buffer(cl_queue, ttl_PSRs_coll_buf, hostbuf = ttl_PSRs_coll_np).wait()
# Total time
tt = bat + rt
if plot:
plot_PSRs(dt, spikes, ttl_PSRs_coll_np[0])
plot_PSRs(dt, spikes, ttl_PSRs_coll_np[-1])
return bat, rt, tt
Testing code plots the first and last synapses.
In [25]:
device_id = 1
dt = 2e-4
Tsim = 6.0
spikes = np.hstack((np.linspace(0.1, 1.4, 14), \
np.linspace(1.43, 1.76, 12), \
np.linspace(1.77, 1.83, 7), \
np.linspace(1.93, 3.33, 15), \
np.linspace(5.33, 5.73, 4)))
ttl_neuron = 10
ttl_incoming_synapse_per_neuron = 2
par_pn_pa_t = run_par_ps_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = True)
Spike train used to collect execution time is set within 0.3 second of biological time as follow.
In [26]:
device_id = 1
dt = 2e-4
Tsim = 0.3
spikes = np.hstack((np.linspace(0.05, 0.25, 11)))
ttl_neuron = 10
ttl_incoming_synapse_per_neuron = 2
par_pn_pa_t = run_par_pn_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron, \
plot = True)
Following is total synapses used to collect execution time.
In [27]:
dt = 2e-4
Tsim = 0.3
spikes = np.hstack((np.linspace(0.05, 0.25, 11)))
# Total synapse = total neuron * total incoming synapse per neuron
ttl_synapses = [[10, 1], \
[10, 10], \
[100, 10], \
[1000, 10], \
[1000, 100], \
[10000, 100]]
ttl_synapses_len = len(ttl_synapses)
# Methods
methods_len = 4
SEQ_PN_ID = 0
SEQ_PS_ID = 1
PAR_PN_PA_ID = 2
PAR_PS_PA_ID = 3
# Devices
devices_len = len(devices)
# Portions
# Buffer allocation times
bats = np.zeros((devices_len, methods_len, ttl_synapses_len), dtype = np.float32)
# Run times
rts = np.zeros((devices_len, methods_len, ttl_synapses_len), dtype = np.float32)
# Total times
tts = np.zeros((devices_len, methods_len, ttl_synapses_len), dtype = np.float32)
BAT_ID = 0
RT_ID = 1
TT_ID = 2
# Run sequential once for every synapse size (ttl_synapses)
seq_pn_ts = []
seq_ps_ts = []
# Devices
for device_id, device in enumerate(devices):
# Total synapses
for ttl_synapse_id, ttl_synapse in enumerate(ttl_synapses):
ttl_neuron, ttl_incoming_synapse_per_neuron = ttl_synapse
# Methods
# Run sequential per neuron once as the baseline
if device_id == 0:
try:
seq_pn_t = run_seq_pn(Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron)
except:
seq_pn_t = (np.nan, np.nan, np.nan)
seq_pn_ts.append(seq_pn_t)
else:
seq_pn_t = seq_pn_ts[ttl_synapse_id]
# Run sequential per synapse once as the baseline
if device_id == 0:
try:
seq_ps_t = run_seq_ps(Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron)
except:
seq_ps_t = (np.nan, np.nan, np.nan)
seq_ps_ts.append(seq_ps_t)
else:
seq_ps_t = seq_ps_ts[ttl_synapse_id]
# Parallel per neuron basis with parameter arrays
try:
par_pn_pa_t = run_par_pn_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron)
except (cl.LogicError, cl.MemoryError, cl.RuntimeError):
par_pn_pa_t = (np.nan, np.nan, np.nan)
# Parallel per synapse basis with parameter arrays
try:
par_ps_pa_t = run_par_ps_pa(device_id, \
Tsim, dt, spikes, \
ttl_neuron, ttl_incoming_synapse_per_neuron)
except (cl.LogicError, cl.MemoryError, cl.RuntimeError):
par_ps_pa_t = (np.nan, np.nan, np.nan)
# Portions -> Devices -> Methods -> Total Synapses
bats[device_id][SEQ_PN_ID][ttl_synapse_id] = seq_pn_t[BAT_ID]
bats[device_id][SEQ_PS_ID][ttl_synapse_id] = seq_ps_t[BAT_ID]
bats[device_id][PAR_PN_PA_ID][ttl_synapse_id] = par_pn_pa_t[BAT_ID]
bats[device_id][PAR_PS_PA_ID][ttl_synapse_id] = par_ps_pa_t[BAT_ID]
rts[device_id][SEQ_PN_ID][ttl_synapse_id] = seq_pn_t[RT_ID]
rts[device_id][SEQ_PS_ID][ttl_synapse_id] = seq_ps_t[RT_ID]
rts[device_id][PAR_PN_PA_ID][ttl_synapse_id] = par_pn_pa_t[RT_ID]
rts[device_id][PAR_PS_PA_ID][ttl_synapse_id] = par_ps_pa_t[RT_ID]
tts[device_id][SEQ_PN_ID][ttl_synapse_id] = seq_pn_t[TT_ID]
tts[device_id][SEQ_PS_ID][ttl_synapse_id] = seq_ps_t[TT_ID]
tts[device_id][PAR_PN_PA_ID][ttl_synapse_id] = par_pn_pa_t[TT_ID]
tts[device_id][PAR_PS_PA_ID][ttl_synapse_id] = par_ps_pa_t[TT_ID]
Save execution time result into DynamicSpikingSynapse/Benchmark
directory.
In [28]:
benchmark_dir = "DynamicSpikingSynapse/Benchmark/"
machine_name = "mac"
save_list(benchmark_dir + "exec_time_%s_device_names.txt" % machine_name, device_names)
np.save(benchmark_dir + "exec_time_%s_bats.npy" % machine_name, bats)
np.save(benchmark_dir + "exec_time_%s_rts.npy" % machine_name, rts)
np.save(benchmark_dir + "exec_time_%s_tts.npy" % machine_name, tts)
Load and plot execution time of following devices from different machines.
Mac:
PC:
In [34]:
machine_names = ["mac", "pc"]
device_names_cmb = []
bats_cmb = []
rts_cmb = []
tts_cmb = []
for machine_name in machine_names:
if os.path.isfile(benchmark_dir + "exec_time_%s_device_names.txt" % machine_name):
device_names_cmb += load_list(benchmark_dir + "exec_time_%s_device_names.txt" % \
machine_name)
bats_cmb.append(np.load(benchmark_dir + "exec_time_%s_bats.npy" % machine_name))
rts_cmb.append(np.load(benchmark_dir + "exec_time_%s_rts.npy" % machine_name))
tts_cmb.append(np.load(benchmark_dir + "exec_time_%s_tts.npy" % machine_name))
bats_cmb = np.concatenate(bats_cmb)
rts_cmb = np.concatenate(rts_cmb)
tts_cmb = np.concatenate(tts_cmb)
In [36]:
plot_exec_time(bats_cmb, rts_cmb, tts_cmb, device_names_cmb, \
methods_len, SEQ_PN_ID, PAR_PN_PA_ID, \
SEQ_PS_ID, PAR_PS_PA_ID, \
ttl_synapses)
Following are hardware and software specifications of all devices.