In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from include.feature_lists import numeric_features
from include.dataset_fnames import generate_station_data_fname, generate_data_fname

In [3]:
# for station_key in sorted(numeric_features):
#     fname = generate_station_data_fname(station_id=station_key, sample_type='train', data_type='numeric', allow_nan_values=False)
#     print fname

In [4]:
fname = generate_data_fname(sample_type='train', data_type='numeric')
print fname


d:/Kaggle_ws/Bosch/input/train_numeric.csv

In [14]:
%time df_flow = pd.read_csv(fname, index_col='Id', nrows=500000)


Wall time: 37.7 s

In [15]:
df_flow[df_flow.notnull()] = 1
df_flow[df_flow.isnull()] = 0

In [16]:
df_flow.head()


Out[16]:
L0_S0_F0 L0_S0_F2 L0_S0_F4 L0_S0_F6 L0_S0_F8 L0_S0_F10 L0_S0_F12 L0_S0_F14 L0_S0_F16 L0_S0_F18 ... L3_S50_F4245 L3_S50_F4247 L3_S50_F4249 L3_S50_F4251 L3_S50_F4253 L3_S51_F4256 L3_S51_F4258 L3_S51_F4260 L3_S51_F4262 Response
Id
4 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1
6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1
7 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1
9 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1
11 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1

5 rows × 969 columns


In [18]:
df_flow = df_flow.sort_values(['L0_S0_F0', 'L0_S0_F2', 'L0_S0_F4'])

In [8]:
df_flow.shape


Out[8]:
(500000, 969)

In [21]:
img = df_flow.sample(frac=0.004)
img = img.reset_index()
del img['Id']
del img['Response']

In [36]:
col = ['L0_S0_F0', 'L0_S0_F2', 'L0_S0_F4', 'L0_S0_F6', 'L0_S0_F8',
       'L0_S0_F10', 'L0_S0_F12', 'L0_S0_F14', 'L0_S0_F16', 'L0_S0_F18',
       'L0_S0_F20', 'L0_S0_F22', 'L0_S1_F24', 'L0_S1_F28', 'L0_S2_F32',
       'L0_S2_F36', 'L0_S2_F40', 'L0_S2_F44', 'L0_S2_F48', 'L0_S2_F52',
       'L0_S2_F56', 'L0_S2_F60', 'L0_S2_F64', 'L0_S3_F68', 'L0_S3_F72',
       'L0_S3_F76', 'L0_S3_F80', 'L0_S3_F84', 'L0_S3_F88', 'L0_S3_F92',
       'L0_S3_F96', 'L0_S3_F100', 'L0_S4_F104', 'L0_S4_F109', 'L0_S5_F114',
       'L0_S5_F116', 'L0_S6_F118', 'L0_S6_F122', 'L0_S6_F132',
       'L0_S7_F136', 'L0_S7_F138', 'L0_S7_F142', 'L0_S8_F144',
       'L0_S8_F146', 'L0_S8_F149', 'L0_S9_F155', 'L0_S9_F160',
       'L0_S9_F165', 'L0_S9_F170', 'L0_S9_F175'][::-1]

In [37]:
img = img.sort_values(col, axis=0)

In [38]:
plt.figure(figsize=(20,30))
plt.imshow(img.values)


Out[38]:
<matplotlib.image.AxesImage at 0x8f6f4358>

In [ ]: