In [1]:
%pylab inline
In [2]:
import pandas as pd
In [3]:
data_path = '/home/brazhe/yandex-disk/data-tmp/'
In [4]:
ls $data_path/*.csv
In [5]:
from swan import utils as swu
In [ ]:
In [6]:
from sklearn import manifold as skm
from sklearn import decomposition as skd
In [ ]:
In [7]:
open_data = pd.read_csv(data_path+'lefthand-open.csv')
In [8]:
shut_data = pd.read_csv(data_path+'lefthand-shut.csv')
In [9]:
open_data.describe()
Out[9]:
In [10]:
shut_data.describe()
Out[10]:
In [11]:
figure(figsize=(20,4))
open_data.plot(x='time', y='gforce',ax=gca())
axvline(2, color='y',ls='--')
axvline(42, color='y',ls='--')
xlabel('time, s')
Out[11]:
In [12]:
figure(figsize=(20,4))
shut_data.plot(x='time', y='gforce',ax=gca())
axvline(2, color='y',ls='--')
axvline(65, color='y',ls='--')
xlabel('time, s')
Out[12]:
In [13]:
open_data = open_data[(open_data.time < 42) & (open_data.time > 2)]
In [14]:
shut_data = shut_data[(shut_data.time < 65) & (shut_data.time > 2)]
We need to separate our data to training and test data sets. So we test our assumptions on a different piece of data than the algorithm has been trained on
In [15]:
dt = mean(diff(open_data.time))
In [16]:
print('Sampling interval: %0.2f ms'%(dt*1000))
In [17]:
%matplotlib qt
In [18]:
figure(figsize=(20,4))
shut_data.plot(x='time', y='x',ax=gca(), ls='-',marker='.')
Out[18]:
In [19]:
from swan import utils as swu
In [20]:
swu.plot_spectrogram_with_ts(open_data.gforce[:-1], f_s = 1/dt,
figsize=(16,8),
freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
Out[20]:
In [21]:
swu.plot_spectrogram_with_ts(open_data.x[:-1], f_s = 1/dt,
figsize=(16,8),
freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
Out[21]:
In [22]:
swu.plot_spectrogram_with_ts(open_data.y[:-1], f_s = 1/dt,
figsize=(16,8),
freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
Out[22]:
In [23]:
swu.plot_spectrogram_with_ts(open_data.z[:-1], f_s = 1/dt,
figsize=(16,8),
freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
Out[23]:
In [24]:
swu.plot_spectrogram_with_ts(shut_data.gforce, f_s = 1/dt,
figsize=(16,8),
freqs=linspace(0.5,25,512),cmap='viridis',padding='reflect')
gcf()
Out[24]:
In [25]:
close('all')
Our test data will be the last 10 seconds of each recording
In [26]:
def standardize(x):
return (x-x.mean(0))/x.std(0)
def split_data(df, sep=10):
cond = df.time >= df.time.max()-sep
train,test = df[~cond], df[cond]
convert = lambda x: array(x)[:,1:] # convert to array and throw away time column
return standardize(convert(train)), standardize(convert(test))
In [27]:
open_train, open_test = split_data(open_data)
shut_train, shut_test = split_data(shut_data)
In [28]:
plot(open_test[:,0])
plot(open_test[:,1])
Out[28]:
In [29]:
%time u,s,vh = svd(open_train, full_matrices=False)
In [30]:
%time tSVD = skd.TruncatedSVD(n_components=2, algorithm='arpack') #exact algorithm
In [31]:
coords = tSVD.fit_transform(open_train)
coords_test = tSVD.transform(open_test)
In [32]:
plot(coords_test[:,1])
Out[32]:
In [33]:
figure();
plot(cumsum(s**2)/np.sum(s**2),'s-')
gcf()
Out[33]:
In [34]:
figure();
plot(u[:,0]*s[0])
plot(coords[:,0])
Out[34]:
In [35]:
figure()
plot(coords[:,0],coords[:,1],'.')
Out[35]:
In [36]:
plot(coords_test[:,0],coords_test[:,1],'.')
Out[36]:
In [37]:
def extract_random_slice(arr,L=2000):
N = len(arr)
start = randint(0, N-L)
cut = arr[start:start+L]
return cut - cut.mean(0)
In [38]:
train_slices_open = [ravel(extract_random_slice(open_train)) for n in range(4000)]
train_slices_shut = [ravel(extract_random_slice(shut_train)) for n in range(4000)]
In [39]:
test_slices_open = [ravel(extract_random_slice(open_test)) for n in range(4000)]
test_slices_shut = [ravel(extract_random_slice(shut_test)) for n in range(4000)]
In [40]:
train_data = vstack(train_slices_open + train_slices_shut)
test_data = vstack(test_slices_open + test_slices_shut)
In [41]:
train_data.shape
Out[41]:
In [42]:
Ltrain, Ltest = len(train_data), len(test_data)
In [ ]:
todo: compare approximate and exact SVD algorithms
In [43]:
tSVD = skd.TruncatedSVD(25)
In [44]:
%time tSVD.fit(train_data)
Out[44]:
In [45]:
%time coords_test = tSVD.transform(test_data)
In [46]:
%time coords = tSVD.transform(train_data)
In [65]:
figure(figsize=(10,10))
k1,k2 =0,1
plot(coords[:Ltrain//2,k1], coords[:Ltrain//2,k2], '+',alpha=0.5)
plot(coords[Ltrain//2:,k1], coords[Ltrain//2:,k2], '+',alpha=0.5)
legend(['Open (train)', 'Shut (train)'])
#gcf()
Out[65]:
In [64]:
figure(figsize=(10,10))
k1,k2 = 0,1
plot(coords_test[:Ltest//2,k1], coords_test[:Ltest//2,k2], '+',alpha=0.5)
plot(coords_test[Ltest//2:,k1], coords_test[Ltest//2:,k2], '+',alpha=0.5)
legend(['Open (test)', 'Shut (test)'])
#gcf()
Out[64]:
In [49]:
tsne = skm.TSNE()
mds = skm.MDS()
In [67]:
tr = tsne
In [68]:
%time mn_coords_train = tr.fit_transform(coords)
In [69]:
%time mn_coords_test = tr.fit_transform(coords_test)
In [70]:
figure(figsize=(10,10))
plot(mn_coords_train[:Ltrain//2,0],mn_coords_train[:Ltrain//2,1],'+', alpha=0.5)
plot(mn_coords_train[Ltrain//2:,0],mn_coords_train[Ltrain//2:,1],'+', alpha=0.5)
#gcf()
Out[70]:
In [ ]:
In [71]:
figure(figsize=(10,10))
plot(mn_coords_test[:Ltest//2,0],mn_coords_test[:Ltest//2,1],'+',alpha=0.5)
plot(mn_coords_test[Ltest//2:,0],mn_coords_test[Ltest//2:,1],'+',alpha=0.5)
#gcf()
Out[71]:
In [72]:
close('all')
In [ ]:
In [ ]:
In [ ]:
In [ ]: