In [3]:
import numpy as np
import pandas as pd
import holoviews as hv
import scipy.io as sio

In [235]:
mat = sio.loadmat('/Users/elijahc/dev/ml_v1/data/timeseries/02_timeseries.mat')
ts = mat['timeseries']
nsamples = ts.shape[0]
nfeat = ts.shape[1]
binning = 10
extra_samples = nsamples % 50
if extra_samples > 0:
    ts = ts[:-extra_samples]

ts = ts.reshape(binning,-1,nfeat).sum(axis=0).astype(np.int8)
index = np.expand_dims(np.arange(ts.shape[0]),axis=0)
ts = ts.swapaxes(0,1)
data = np.concatenate([ts,index],axis=0).astype(np.int32)
index.shape
binned_samples = ts.shape[0]

print(index)
print(data.sum(axis=0).shape)
non_zero_rows = data.sum(axis=0)-index
cond = np.where(non_zero_rows > 0)
sparse_ts = []
print(data.shape)
data = data[:,cond[1]]
data.shape


[[     0      1      2 ..., 606097 606098 606099]]
(606100,)
(38, 606100)
Out[235]:
(38, 530781)

In [236]:
print(data.shape)
time_arr = data[-1:] * binning
data_pts = data[:-1].sum()

i = 0
cond = np.nonzero(data[:-1])
bin_c = np.bincount(data[cond])
vals = data[cond]
print(bin_c)
print(bin_c.sum())
print(vals.sum())


(38, 530781)
[      0 1711640  122640    7561     373      13]
1842227
1981160

In [237]:
nid, tbin = cond
final_data = []

for i, (n,t,val) in enumerate(zip(nid,tbin,vals)):
    if i % 100000 ==0:
        print ('i: %g, n: %g, t: %g val: %g' % (i,n,t,val))
    final_data.append([np.squeeze(time_arr)[t],n,val])
#final_data = final_data.extend([ [time_arr[0,t],n] for _ in np.arange(val) ])

final_data_np = np.array(final_data)
print(final_data_np.shape)
#for t in zip(np.arange(data.shape[1]):
    
df = pd.DataFrame(final_data_np.astype(np.int32),columns=['time','nid','spikes'])
df


i: 0, n: 0, t: 47 val: 1
i: 100000, n: 2, t: 178726 val: 1
i: 200000, n: 4, t: 240413 val: 1
i: 300000, n: 5, t: 527838 val: 1
i: 400000, n: 8, t: 283563 val: 1
i: 500000, n: 9, t: 393596 val: 1
i: 600000, n: 11, t: 178986 val: 1
i: 700000, n: 12, t: 405658 val: 2
i: 800000, n: 14, t: 314449 val: 1
i: 900000, n: 17, t: 38608 val: 1
i: 1e+06, n: 19, t: 225897 val: 1
i: 1.1e+06, n: 21, t: 17616 val: 2
i: 1.2e+06, n: 22, t: 93686 val: 1
i: 1.3e+06, n: 24, t: 233465 val: 1
i: 1.4e+06, n: 26, t: 207808 val: 1
i: 1.5e+06, n: 28, t: 118499 val: 1
i: 1.6e+06, n: 30, t: 182113 val: 1
i: 1.7e+06, n: 32, t: 288557 val: 1
i: 1.8e+06, n: 35, t: 231627 val: 1
(1842227, 3)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/internals.py in create_block_manager_from_blocks(blocks, axes)
   4246                 blocks = [make_block(values=blocks[0],
-> 4247                                      placement=slice(0, len(axes[0])))]
   4248 

/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   2684 
-> 2685     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
   2686 

/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
    108                              'implies %d' % (len(self.values),
--> 109                                              len(self.mgr_locs)))
    110 

ValueError: Wrong number of items passed 3, placement implies 2

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-237-15bf210f9502> in <module>()
     12 #for t in zip(np.arange(data.shape[1]):
     13 
---> 14 df = pd.DataFrame(final_data_np.astype(np.int32),columns=['time','nid'])
     15 df

/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    295             else:
    296                 mgr = self._init_ndarray(data, index, columns, dtype=dtype,
--> 297                                          copy=copy)
    298         elif isinstance(data, (list, types.GeneratorType)):
    299             if isinstance(data, types.GeneratorType):

/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/frame.py in _init_ndarray(self, values, index, columns, dtype, copy)
    472             values = _possibly_infer_to_datetimelike(values)
    473 
--> 474         return create_block_manager_from_blocks([values], [columns, index])
    475 
    476     @property

/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/internals.py in create_block_manager_from_blocks(blocks, axes)
   4254         blocks = [getattr(b, 'values', b) for b in blocks]
   4255         tot_items = sum(b.shape[0] for b in blocks)
-> 4256         construction_error(tot_items, blocks[0].shape[1:], axes, e)
   4257 
   4258 

/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e)
   4231         raise ValueError("Empty data passed with indices specified.")
   4232     raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 4233         passed, implied))
   4234 
   4235 

ValueError: Shape of passed values is (3, 1842227), indices imply (2, 1842227)

In [238]:
df = pd.DataFrame(final_data_np.astype(np.int32),columns=['time','nid','spikes'])
df


Out[238]:
time nid spikes
0 590 0 1
1 680 0 1
2 710 0 1
3 830 0 1
4 890 0 1
5 1020 0 1
6 1130 0 1
7 1140 0 1
8 1160 0 1
9 1190 0 1
10 1220 0 1
11 1310 0 2
12 1330 0 1
13 1370 0 2
14 1380 0 2
15 1560 0 1
16 1610 0 1
17 1630 0 1
18 1640 0 2
19 1660 0 1
20 1670 0 1
21 1730 0 1
22 1820 0 1
23 1840 0 1
24 1890 0 1
25 2000 0 1
26 2110 0 1
27 2520 0 1
28 2810 0 2
29 4000 0 1
... ... ... ...
1842197 6055740 36 1
1842198 6055770 36 1
1842199 6055830 36 1
1842200 6055940 36 1
1842201 6055990 36 2
1842202 6056140 36 1
1842203 6056180 36 1
1842204 6056470 36 1
1842205 6056530 36 1
1842206 6056630 36 1
1842207 6056690 36 1
1842208 6057060 36 1
1842209 6057750 36 1
1842210 6058140 36 1
1842211 6058230 36 1
1842212 6058260 36 1
1842213 6058650 36 1
1842214 6058800 36 1
1842215 6058890 36 1
1842216 6059080 36 1
1842217 6059120 36 1
1842218 6059140 36 1
1842219 6059150 36 1
1842220 6059200 36 1
1842221 6059480 36 1
1842222 6060270 36 1
1842223 6060360 36 1
1842224 6060660 36 1
1842225 6060850 36 1
1842226 6060970 36 1

1842227 rows × 3 columns


In [ ]:
ts = ts[0:binned_samples//2,:2]
index = np.arange(ts.shape[0]) * 0.001 * binning
print(index.shape)
print(ts.shape)

neurons=np.array(['neuron %d' % x for x in np.arange(ts.shape[1])])
cols = np.append(neurons,['time'])
#columns_final = [].extend(cols).extend(['time'])
print(cols)
df = pd.DataFrame(data, columns=cols)

In [17]:
data= (index,ts[:,0])
#print(data)
#hv.notebook_extension('bokeh')
print(index.shape)
print(ts[:,0].shape)
#(hv.TimeSeries(data))


(60610,)
(60610,)

In [223]:
df


Out[223]:
time nid
0 590 0
1 680 0
2 710 0
3 830 0
4 890 0
5 1020 0
6 1130 0
7 1140 0
8 1160 0
9 1190 0
10 1220 0
11 1310 0
12 1310 0
13 1330 0
14 1370 0
15 1370 0
16 1380 0
17 1380 0
18 1560 0
19 1610 0
20 1630 0
21 1640 0
22 1640 0
23 1660 0
24 1670 0
25 1730 0
26 1820 0
27 1840 0
28 1890 0
29 2000 0
... ... ...
1981130 6055770 36
1981131 6055830 36
1981132 6055940 36
1981133 6055990 36
1981134 6055990 36
1981135 6056140 36
1981136 6056180 36
1981137 6056470 36
1981138 6056530 36
1981139 6056630 36
1981140 6056690 36
1981141 6057060 36
1981142 6057750 36
1981143 6058140 36
1981144 6058230 36
1981145 6058260 36
1981146 6058650 36
1981147 6058800 36
1981148 6058890 36
1981149 6059080 36
1981150 6059120 36
1981151 6059140 36
1981152 6059150 36
1981153 6059200 36
1981154 6059480 36
1981155 6060270 36
1981156 6060360 36
1981157 6060660 36
1981158 6060850 36
1981159 6060970 36

1981160 rows × 2 columns


In [227]:
from bokeh.models import Jitter
from bokeh.plotting import figure, show, output_file
from bokeh.charts import Scatter
from bokeh.models.sources import ColumnDataSource

#p = figure(plot_width=500, plot_height=400,title="Demonstration of Jitter transform")

factors = list(df.nid.unique())
print(factors)


output_file('/Users/elijahc/dev/ml_v1/output/scatter2.html')
#n = 0
#df[df['nid'] == n]
p=0
jitter = figure(width=500,plot_height=250)

for i, (k,grp) in enumerate(df.groupby("nid")):
    print('i: %g' % i)
    jitter.circle(x='time', y={'value': i+1, 'transform': Jitter(width=0.3)},
                  source=ColumnDataSource(data=grp),
                  alpha=0.3, size=5)


show(jitter)


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]
i: 0
i: 1
i: 2
i: 3
i: 4
i: 5
i: 6
i: 7
i: 8
i: 9
i: 10
i: 11
i: 12
i: 13
i: 14
i: 15
i: 16
i: 17
i: 18
i: 19
i: 20
i: 21
i: 22
i: 23
i: 24
i: 25
i: 26
i: 27
i: 28
i: 29
i: 30
i: 31
i: 32
i: 33
i: 34
i: 35
i: 36
ERROR:/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: n0 [renderer: GlyphRenderer(id='8dc4ef5e-e6ab-4f63-859e-d19610782446', ...)]
ERROR:/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: neuron 0 [renderer: GlyphRenderer(id='985d8d37-73df-42b1-b4c9-496355b4470c', ...)]

In [229]:
from bokeh.models import Jitter
from bokeh.plotting import figure, show, output_file
from bokeh.charts import Scatter
from bokeh.models.sources import ColumnDataSource

output_file('/Users/elijahc/dev/ml_v1/output/scatter3.html')
#n = 0
#df[df['nid'] == n]

p = Scatter(df,x='time',y='nid')

show(p)


ERROR:/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: n0 [renderer: GlyphRenderer(id='8dc4ef5e-e6ab-4f63-859e-d19610782446', ...)]
ERROR:/usr/local/opt/pyenv/versions/3.5.3/lib/python3.5/site-packages/bokeh/core/validation/check.py:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: neuron 0 [renderer: GlyphRenderer(id='985d8d37-73df-42b1-b4c9-496355b4470c', ...)]

In [239]:
import pickle
df.to_pickle('/Users/elijahc/dev/ml_v1/data/timeseries/02_timeseries_long.pkl')

In [240]:
test_read = pd.read_pickle('/Users/elijahc/dev/ml_v1/data/timeseries/02_timeseries_long.pkl')
test_read


Out[240]:
time nid spikes
0 590 0 1
1 680 0 1
2 710 0 1
3 830 0 1
4 890 0 1
5 1020 0 1
6 1130 0 1
7 1140 0 1
8 1160 0 1
9 1190 0 1
10 1220 0 1
11 1310 0 2
12 1330 0 1
13 1370 0 2
14 1380 0 2
15 1560 0 1
16 1610 0 1
17 1630 0 1
18 1640 0 2
19 1660 0 1
20 1670 0 1
21 1730 0 1
22 1820 0 1
23 1840 0 1
24 1890 0 1
25 2000 0 1
26 2110 0 1
27 2520 0 1
28 2810 0 2
29 4000 0 1
... ... ... ...
1842197 6055740 36 1
1842198 6055770 36 1
1842199 6055830 36 1
1842200 6055940 36 1
1842201 6055990 36 2
1842202 6056140 36 1
1842203 6056180 36 1
1842204 6056470 36 1
1842205 6056530 36 1
1842206 6056630 36 1
1842207 6056690 36 1
1842208 6057060 36 1
1842209 6057750 36 1
1842210 6058140 36 1
1842211 6058230 36 1
1842212 6058260 36 1
1842213 6058650 36 1
1842214 6058800 36 1
1842215 6058890 36 1
1842216 6059080 36 1
1842217 6059120 36 1
1842218 6059140 36 1
1842219 6059150 36 1
1842220 6059200 36 1
1842221 6059480 36 1
1842222 6060270 36 1
1842223 6060360 36 1
1842224 6060660 36 1
1842225 6060850 36 1
1842226 6060970 36 1

1842227 rows × 3 columns