In [11]:
import numpy as np
import pandas as pd

In [12]:
from include.dataset_fnames import generate_station_data_fname
from include.feature_lists import numeric_features

In [ ]:


In [ ]:


In [13]:
product_ids_df = pd.read_csv('product_man_10.csv', usecols=['idx', 'product'], index_col=['idx'])

In [14]:
product_ids_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1183165 entries, 4 to 2367495
Data columns (total 1 columns):
product    1183165 non-null int64
dtypes: int64(1)
memory usage: 18.1 MB

In [15]:
%%time 

for station_id in sorted(numeric_features):
    fname = generate_station_data_fname(station_id, sample_type='train', data_type='numeric', allow_nan_values=False)
    print fname
    station_features = numeric_features[station_id]
    features = ['Id'] + station_features
    station_df = pd.read_csv(fname, usecols=features, index_col=['Id'])
#     product_idx = [int(x) for x in station_df.index]
#     print product_ids_df.loc[product_idx, 'product'].shape
    products = product_ids_df.loc[station_df.index, 'product']
    station_df['product'] = products
    
    fname2 = generate_station_data_fname(station_id, sample_type='train', data_type='numeric', use_product=True, allow_nan_values=False)
    
    station_df.to_csv(fname2)


d:/Kaggle_ws/Bosch/data/stations/train\L0S00_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S01_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S02_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S03_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S04_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S05_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S06_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S07_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S08_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S09_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S10_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S11_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S12_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S13_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S14_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S15_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S16_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S17_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S18_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S19_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S20_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S21_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S22_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L0S23_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L1S24_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L1S25_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L2S26_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L2S27_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L2S28_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S29_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S30_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S31_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S32_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S33_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S34_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S35_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S36_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S37_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S38_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S39_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S40_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S41_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S42_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S43_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S44_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S45_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S46_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S47_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S48_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S49_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S50_notnull_numeric.csv
d:/Kaggle_ws/Bosch/data/stations/train\L3S51_notnull_numeric.csv
Wall time: 2min 37s

In [ ]: