In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
import sys
sys.path.append('d:/Kaggle_ws/Bosch/src')

In [4]:
from include.feature_lists import numeric_missing_features
from include.dataset_fnames import generate_station_data_fname

In [5]:
station_id = 'L3S36'
numeric_missing_features[station_id]


Out[5]:
['L3_S36_F3918', 'L3_S36_F3922', 'L3_S36_F3920', 'L3_S36_F3924']

In [6]:
fname = generate_station_data_fname(station_id=station_id, sample_type='train', data_type='numeric')
features = ['Id'] + numeric_missing_features[station_id]

In [7]:
station_df = pd.read_csv(fname, usecols=features, index_col='Id')

In [8]:
station_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 569032 entries, 9 to 2367495
Data columns (total 4 columns):
L3_S36_F3918    569031 non-null float64
L3_S36_F3920    569031 non-null float64
L3_S36_F3922    569031 non-null float64
L3_S36_F3924    569031 non-null float64
dtypes: float64(4)
memory usage: 21.7 MB

In [9]:
fname = generate_station_data_fname(station_id=station_id, sample_type='test', data_type='numeric')
features = ['Id'] + numeric_missing_features[station_id]
station_df = pd.read_csv(fname, usecols=features, index_col='Id')
station_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 568882 entries, 2 to 2367482
Data columns (total 4 columns):
L3_S36_F3918    568882 non-null float64
L3_S36_F3920    568882 non-null float64
L3_S36_F3922    568882 non-null float64
L3_S36_F3924    568882 non-null float64
dtypes: float64(4)
memory usage: 21.7 MB

In [11]:
test_indices = station_df[station_df['L3_S36_F3922'].isnull()].index
len (test_indices)


Out[11]:
0

In [15]:
1<>0


Out[15]:
True

In [ ]: