In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Öffnen von Hdf mittels pandas


In [3]:
hdf = pd.HDFStore('../../data/raw/TestMessungen_NEU.hdf')
df = hdf.get("/x1/t1/trx_3_1")
df.head()


Out[3]:
index Timestamp trx_1_3_ifft_0 trx_1_3_ifft_1 trx_1_3_ifft_2 trx_1_3_ifft_3 trx_1_3_ifft_4 trx_1_3_ifft_5 trx_1_3_ifft_6 trx_1_3_ifft_7 ... trx_4_3_ifft_1992 trx_4_3_ifft_1993 trx_4_3_ifft_1994 trx_4_3_ifft_1995 trx_4_3_ifft_1996 trx_4_3_ifft_1997 trx_4_3_ifft_1998 trx_4_3_ifft_1999 trx_4_3_ts target
0 0 1.492291e+09 0.019133 0.018392 0.017630 0.016844 0.016038 0.015211 0.014365 0.013500 ... 0.023663 0.022952 0.022242 0.021535 0.020833 0.020139 0.019456 0.018786 1.488963e+12 Empty_0.0,0.0_0.0,0.0
1 1 1.492291e+09 0.019535 0.018800 0.018042 0.017263 0.016464 0.015645 0.014809 0.013956 ... 0.020158 0.019421 0.018689 0.017965 0.017253 0.016556 0.015879 0.015225 1.488963e+12 Empty_0.0,0.0_0.0,0.0
2 2 1.492291e+09 0.021150 0.020419 0.019665 0.018889 0.018091 0.017274 0.016438 0.015584 ... 0.022223 0.021502 0.020793 0.020102 0.019432 0.018786 0.018170 0.017589 1.488963e+12 Empty_0.0,0.0_0.0,0.0
3 3 1.492291e+09 0.015211 0.014543 0.013854 0.013144 0.012416 0.011669 0.010905 0.010126 ... 0.025602 0.024654 0.023704 0.022753 0.021805 0.020863 0.019929 0.019008 1.488963e+12 Empty_0.0,0.0_0.0,0.0
4 4 1.492291e+09 0.020133 0.019436 0.018715 0.017971 0.017206 0.016419 0.015612 0.014786 ... 0.020872 0.020214 0.019572 0.018951 0.018354 0.017785 0.017249 0.016749 1.488963e+12 Empty_0.0,0.0_0.0,0.0

5 rows × 12009 columns

Beispiel Erkenner

datensätze vorbereiten


In [41]:
from evaluation import *
from filters import *
from utility import *
from features import *
from new_features import *

# generate datasets
tst = ['1','2','3']
tst_ds = []

for t in tst:

    df_tst = hdf.get('/x1/t'+t+'/trx_3_1')
    #df_tst = hdf.get('/x1/t'+t+'/trx_1_2')
    
    lst = df_tst.columns[df_tst.columns.str.contains('_ifft_')]
    
    df_tst,_ = distortion_filter(df_tst)
    
    groups = get_trx_groups(df_tst)
    
    df_cf_mean = reduce_dim_PCA(cf_mean_window(df_tst, window=3, column_key="ifft", label=None ).fillna(0), n_comps=10)
    #df_cf_std = reduce_dim_PCA(cf_std_window(df_tst, window=3, column_key="ifft", label=None ).fillna(0), n_comps=10)
    df_cf_ptp = reduce_dim_PCA(cf_ptp(df_tst, window=3, column_key="ifft", label=None ).fillna(0), n_comps=10)
    #df_cf_kurt = reduce_dim_PCA(cf_kurt(df_tst, window=3, column_key="ifft", label=None ).fillna(0), n_comps=10)

    

    
    
    #df_std = rf_grouped(df_tst, groups=groups, fn=rf_std_single)
    df_mean = rf_grouped(df_tst, groups=groups, fn=rf_mean_single, label='target')
    df_p2p = rf_grouped(df_tst, groups=groups, fn=rf_ptp_single) # added p2p feature
    df_kurt = rf_grouped(df_tst, groups=groups, fn=rf_kurtosis_single)
    df_skew = rf_grouped(df_tst, groups=groups, fn=rf_skew_single)

    
    df_all = pd.concat( [df_mean, df_p2p, df_kurt, df_skew], axis=1 ) 
     
    df_all = cf_std_window(df_all, window=4, label='target')
    
    df_all = cf_diff(df_all, label='target')
    
    df_all = reduce_dim_PCA(df_all.fillna(0), n_comps=10, label='target')

    df_all = pd.concat( [df_all, df_cf_mean, df_cf_ptp], axis=1)
    
    df_tst_sum = generate_class_label_presence(df_all, state_variable='target')
    
    
            
    # remove index column
    df_tst_sum = df_tst_sum[df_tst_sum.columns.values[~df_tst_sum.columns.str.contains('index')].tolist()]
    

    
    #df_all = reduce_dim_LDA(df_tst_sum, n_comps=1, label="target")
    


    
    print('Columns in Dataset:',t)
    print(df_tst_sum.columns)
    

    tst_ds.append(df_tst_sum.copy())


Columns in Dataset: 1
Index(['pca_0', 'pca_0', 'pca_0', 'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2',
       'pca_2', 'pca_3', 'pca_3', 'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5',
       'pca_5', 'pca_5', 'pca_6', 'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7',
       'pca_8', 'pca_8', 'pca_8', 'pca_9', 'pca_9', 'pca_9', 'target', 'pca_0',
       'pca_0', 'pca_0', 'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2', 'pca_2',
       'pca_3', 'pca_3', 'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5', 'pca_5',
       'pca_5', 'pca_6', 'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7', 'pca_8',
       'pca_8', 'pca_8', 'pca_9', 'pca_9', 'pca_9', 'pca_0', 'pca_0', 'pca_0',
       'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2', 'pca_2', 'pca_3', 'pca_3',
       'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5', 'pca_5', 'pca_5', 'pca_6',
       'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7', 'pca_8', 'pca_8', 'pca_8',
       'pca_9', 'pca_9', 'pca_9'],
      dtype='object')
Columns in Dataset: 2
Index(['pca_0', 'pca_0', 'pca_0', 'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2',
       'pca_2', 'pca_3', 'pca_3', 'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5',
       'pca_5', 'pca_5', 'pca_6', 'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7',
       'pca_8', 'pca_8', 'pca_8', 'pca_9', 'pca_9', 'pca_9', 'target', 'pca_0',
       'pca_0', 'pca_0', 'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2', 'pca_2',
       'pca_3', 'pca_3', 'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5', 'pca_5',
       'pca_5', 'pca_6', 'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7', 'pca_8',
       'pca_8', 'pca_8', 'pca_9', 'pca_9', 'pca_9', 'pca_0', 'pca_0', 'pca_0',
       'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2', 'pca_2', 'pca_3', 'pca_3',
       'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5', 'pca_5', 'pca_5', 'pca_6',
       'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7', 'pca_8', 'pca_8', 'pca_8',
       'pca_9', 'pca_9', 'pca_9'],
      dtype='object')
Columns in Dataset: 3
Index(['pca_0', 'pca_0', 'pca_0', 'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2',
       'pca_2', 'pca_3', 'pca_3', 'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5',
       'pca_5', 'pca_5', 'pca_6', 'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7',
       'pca_8', 'pca_8', 'pca_8', 'pca_9', 'pca_9', 'pca_9', 'target', 'pca_0',
       'pca_0', 'pca_0', 'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2', 'pca_2',
       'pca_3', 'pca_3', 'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5', 'pca_5',
       'pca_5', 'pca_6', 'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7', 'pca_8',
       'pca_8', 'pca_8', 'pca_9', 'pca_9', 'pca_9', 'pca_0', 'pca_0', 'pca_0',
       'pca_1', 'pca_1', 'pca_1', 'pca_2', 'pca_2', 'pca_2', 'pca_3', 'pca_3',
       'pca_3', 'pca_4', 'pca_4', 'pca_4', 'pca_5', 'pca_5', 'pca_5', 'pca_6',
       'pca_6', 'pca_6', 'pca_7', 'pca_7', 'pca_7', 'pca_8', 'pca_8', 'pca_8',
       'pca_9', 'pca_9', 'pca_9'],
      dtype='object')

validierung hold-out


In [42]:
# holdout validation
print(hold_out_val(tst_ds, target='target', include_self=False, cl='rf', verbose=False, random_state=1))
print(hold_out_val(tst_ds, target='target', include_self=False, cl='ab', verbose=False, random_state=1))
print(hold_out_val(tst_ds, target='target', include_self=False, cl='nb', verbose=False, random_state=1))
print(hold_out_val(tst_ds, target='target', include_self=False, cl='knn', verbose=False, random_state=1))
print(hold_out_val(tst_ds, target='target', include_self=False, cl='ld', verbose=False, random_state=1))
print(hold_out_val(tst_ds, target='target', include_self=False, cl='qd', verbose=False, random_state=1))
print(hold_out_val(tst_ds, target='target', include_self=False, cl='svc', verbose=False, random_state=1))


(0.70167962430145014, 0.21734257933619272)
(0.7698325453417959, 0.11832426064783956)
(0.88138734100310501, 0.049978003065224964)
(0.70460302131741093, 0.11468773000515596)
(0.70545455986401306, 0.10610315814909566)
/gpfs/software/x86_64/anaconda/envs/anaconda431-py35/lib/python3.5/site-packages/sklearn/discriminant_analysis.py:387: UserWarning: Variables are collinear.
  warnings.warn("Variables are collinear.")
/gpfs/software/x86_64/anaconda/envs/anaconda431-py35/lib/python3.5/site-packages/sklearn/discriminant_analysis.py:695: UserWarning: Variables are collinear
  warnings.warn("Variables are collinear")
/gpfs/software/x86_64/anaconda/envs/anaconda431-py35/lib/python3.5/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
(0.41921296081030351, 0.036657412320235012)
(0.60622940862780628, 0.15814172039618063)

extra feature "peak to peak"

Schließen von HDF Store


In [5]:
hdf.close()