In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
from include.feature_lists import numeric_features
from include.dataset_fnames import generate_station_data_fname, generate_data_fname
In [3]:
# for station_key in sorted(numeric_features):
# fname = generate_station_data_fname(station_id=station_key, sample_type='train', data_type='numeric', allow_nan_values=False)
# print fname
In [4]:
fname = generate_data_fname(sample_type='train', data_type='numeric')
print fname
In [14]:
%time df_flow = pd.read_csv(fname, index_col='Id', nrows=500000)
In [15]:
df_flow[df_flow.notnull()] = 1
df_flow[df_flow.isnull()] = 0
In [16]:
df_flow.head()
Out[16]:
In [18]:
df_flow = df_flow.sort_values(['L0_S0_F0', 'L0_S0_F2', 'L0_S0_F4'])
In [8]:
df_flow.shape
Out[8]:
In [21]:
img = df_flow.sample(frac=0.004)
img = img.reset_index()
del img['Id']
del img['Response']
In [36]:
col = ['L0_S0_F0', 'L0_S0_F2', 'L0_S0_F4', 'L0_S0_F6', 'L0_S0_F8',
'L0_S0_F10', 'L0_S0_F12', 'L0_S0_F14', 'L0_S0_F16', 'L0_S0_F18',
'L0_S0_F20', 'L0_S0_F22', 'L0_S1_F24', 'L0_S1_F28', 'L0_S2_F32',
'L0_S2_F36', 'L0_S2_F40', 'L0_S2_F44', 'L0_S2_F48', 'L0_S2_F52',
'L0_S2_F56', 'L0_S2_F60', 'L0_S2_F64', 'L0_S3_F68', 'L0_S3_F72',
'L0_S3_F76', 'L0_S3_F80', 'L0_S3_F84', 'L0_S3_F88', 'L0_S3_F92',
'L0_S3_F96', 'L0_S3_F100', 'L0_S4_F104', 'L0_S4_F109', 'L0_S5_F114',
'L0_S5_F116', 'L0_S6_F118', 'L0_S6_F122', 'L0_S6_F132',
'L0_S7_F136', 'L0_S7_F138', 'L0_S7_F142', 'L0_S8_F144',
'L0_S8_F146', 'L0_S8_F149', 'L0_S9_F155', 'L0_S9_F160',
'L0_S9_F165', 'L0_S9_F170', 'L0_S9_F175'][::-1]
In [37]:
img = img.sort_values(col, axis=0)
In [38]:
plt.figure(figsize=(20,30))
plt.imshow(img.values)
Out[38]:
In [ ]: