In [1]:
    
%pylab inline
    
    
In [2]:
    
import pandas as pd
    
In [3]:
    
data_path = '/home/brazhe/yandex-disk/data-tmp/'
    
In [4]:
    
ls $data_path/*.csv
    
    
In [5]:
    
from swan import utils as swu
    
In [ ]:
    
    
In [6]:
    
from sklearn import manifold as skm
from sklearn import decomposition as skd
    
In [ ]:
    
    
In [7]:
    
open_data = pd.read_csv(data_path+'lefthand-open.csv')
    
In [8]:
    
shut_data = pd.read_csv(data_path+'lefthand-shut.csv')
    
In [9]:
    
open_data.describe()
    
    Out[9]:
In [10]:
    
shut_data.describe()
    
    Out[10]:
In [11]:
    
figure(figsize=(20,4))
open_data.plot(x='time', y='gforce',ax=gca())
axvline(2, color='y',ls='--')
axvline(42, color='y',ls='--')
xlabel('time, s')
    
    Out[11]:
    
In [12]:
    
figure(figsize=(20,4))
shut_data.plot(x='time', y='gforce',ax=gca())
axvline(2, color='y',ls='--')
axvline(65, color='y',ls='--')
xlabel('time, s')
    
    Out[12]:
    
In [13]:
    
open_data = open_data[(open_data.time < 42) & (open_data.time > 2)]
    
In [14]:
    
shut_data = shut_data[(shut_data.time < 65) & (shut_data.time > 2)]
    
We need to separate our data to training and test data sets. So we test our assumptions on a different piece of data than the algorithm has been trained on
In [15]:
    
dt = mean(diff(open_data.time))
    
In [16]:
    
print('Sampling interval: %0.2f ms'%(dt*1000))
    
    
In [17]:
    
%matplotlib qt
    
In [18]:
    
figure(figsize=(20,4))
shut_data.plot(x='time', y='x',ax=gca(), ls='-',marker='.')
    
    Out[18]:
    
In [19]:
    
from swan import utils as swu
    
In [20]:
    
swu.plot_spectrogram_with_ts(open_data.gforce[:-1], f_s = 1/dt, 
                             figsize=(16,8),
                             freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
    
    Out[20]:
    
In [21]:
    
swu.plot_spectrogram_with_ts(open_data.x[:-1], f_s = 1/dt, 
                             figsize=(16,8),
                             freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
    
    Out[21]:
    
In [22]:
    
swu.plot_spectrogram_with_ts(open_data.y[:-1], f_s = 1/dt, 
                             figsize=(16,8),
                             freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
    
    Out[22]:
    
In [23]:
    
swu.plot_spectrogram_with_ts(open_data.z[:-1], f_s = 1/dt, 
                             figsize=(16,8),
                             freqs=linspace(0.25,25,512),cmap='viridis',padding='reflect')
gcf()
    
    Out[23]:
    
In [24]:
    
swu.plot_spectrogram_with_ts(shut_data.gforce, f_s = 1/dt, 
                             figsize=(16,8),
                             freqs=linspace(0.5,25,512),cmap='viridis',padding='reflect')
gcf()
    
    Out[24]:
    
In [25]:
    
close('all')
    
Our test data will be the last 10 seconds of each recording
In [26]:
    
def standardize(x):
    return (x-x.mean(0))/x.std(0)
def split_data(df, sep=10):
    cond =  df.time >= df.time.max()-sep
    train,test = df[~cond], df[cond]
    convert = lambda x: array(x)[:,1:] # convert to array and throw away time column
    return standardize(convert(train)), standardize(convert(test))
    
In [27]:
    
open_train, open_test = split_data(open_data)
shut_train, shut_test = split_data(shut_data)
    
In [28]:
    
plot(open_test[:,0])
plot(open_test[:,1])
    
    Out[28]:
    
In [29]:
    
%time u,s,vh = svd(open_train, full_matrices=False)
    
    
In [30]:
    
%time tSVD = skd.TruncatedSVD(n_components=2, algorithm='arpack') #exact algorithm
    
    
In [31]:
    
coords = tSVD.fit_transform(open_train)
coords_test = tSVD.transform(open_test)
    
In [32]:
    
plot(coords_test[:,1])
    
    Out[32]:
    
In [33]:
    
figure();
plot(cumsum(s**2)/np.sum(s**2),'s-')
gcf()
    
    Out[33]:
    
In [34]:
    
figure();
plot(u[:,0]*s[0])
plot(coords[:,0])
    
    Out[34]:
    
In [35]:
    
figure()
plot(coords[:,0],coords[:,1],'.')
    
    Out[35]:
    
In [36]:
    
plot(coords_test[:,0],coords_test[:,1],'.')
    
    Out[36]:
    
In [37]:
    
def extract_random_slice(arr,L=2000):
    N = len(arr)
    start = randint(0, N-L)
    cut =  arr[start:start+L]
    return cut - cut.mean(0)
    
In [38]:
    
train_slices_open = [ravel(extract_random_slice(open_train)) for n in range(4000)]
train_slices_shut = [ravel(extract_random_slice(shut_train)) for n in range(4000)]
    
In [39]:
    
test_slices_open = [ravel(extract_random_slice(open_test)) for n in range(4000)]
test_slices_shut = [ravel(extract_random_slice(shut_test)) for n in range(4000)]
    
In [40]:
    
train_data = vstack(train_slices_open + train_slices_shut)
test_data = vstack(test_slices_open + test_slices_shut)
    
In [41]:
    
train_data.shape
    
    Out[41]:
In [42]:
    
Ltrain, Ltest = len(train_data), len(test_data)
    
In [ ]:
    
    
todo: compare approximate and exact SVD algorithms
In [43]:
    
tSVD = skd.TruncatedSVD(25)
    
In [44]:
    
%time tSVD.fit(train_data)
    
    
    Out[44]:
In [45]:
    
%time coords_test = tSVD.transform(test_data)
    
    
In [46]:
    
%time coords = tSVD.transform(train_data)
    
    
In [65]:
    
figure(figsize=(10,10))
k1,k2 =0,1
plot(coords[:Ltrain//2,k1], coords[:Ltrain//2,k2], '+',alpha=0.5)
plot(coords[Ltrain//2:,k1], coords[Ltrain//2:,k2], '+',alpha=0.5)
legend(['Open (train)', 'Shut (train)'])
#gcf()
    
    Out[65]:
    
In [64]:
    
figure(figsize=(10,10))
k1,k2 = 0,1
plot(coords_test[:Ltest//2,k1], coords_test[:Ltest//2,k2], '+',alpha=0.5)
plot(coords_test[Ltest//2:,k1], coords_test[Ltest//2:,k2], '+',alpha=0.5)
legend(['Open (test)', 'Shut (test)'])
#gcf()
    
    Out[64]:
    
In [49]:
    
tsne = skm.TSNE()
mds = skm.MDS()
    
In [67]:
    
tr = tsne
    
In [68]:
    
%time mn_coords_train = tr.fit_transform(coords)
    
    
In [69]:
    
%time mn_coords_test = tr.fit_transform(coords_test)
    
    
In [70]:
    
figure(figsize=(10,10))
plot(mn_coords_train[:Ltrain//2,0],mn_coords_train[:Ltrain//2,1],'+', alpha=0.5)
plot(mn_coords_train[Ltrain//2:,0],mn_coords_train[Ltrain//2:,1],'+', alpha=0.5)
#gcf()
    
    Out[70]:
    
In [ ]:
    
    
In [71]:
    
figure(figsize=(10,10))
plot(mn_coords_test[:Ltest//2,0],mn_coords_test[:Ltest//2,1],'+',alpha=0.5)
plot(mn_coords_test[Ltest//2:,0],mn_coords_test[Ltest//2:,1],'+',alpha=0.5)
#gcf()
    
    Out[71]:
    
In [72]:
    
close('all')
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: