In [1]:
%pylab inline
import pandas as pd
Train set has dummied suppliers and bracket_pricing. New feature year and quantity_rep.
In [2]:
train_set = pd.read_csv('./train_set_adjusted.csv')
In [6]:
print train_set.shape
# train_set.dtypes
In [8]:
# train_set.sum()
In [9]:
tube = pd.read_csv('./tube_material_id_imputed_dummies_drop_ns.csv')
In [19]:
print tube.shape
print tube.columns
In [13]:
spec = pd.read_csv('./spec_dummies.csv')
In [14]:
print spec.shape
print spec.columns
In [15]:
comp_type_weight = pd.read_csv('./comp_type_dummies.csv')
In [16]:
print comp_type_weight.shape
print comp_type_weight.columns
In [17]:
tube_vol = pd.read_csv('./tube_volume.csv')
In [18]:
print tube_vol.shape
print tube_vol.columns
In [23]:
train = pd.merge(train_set, tube)
train = pd.merge(train, spec)
train = pd.merge(train, comp_type_weight)
train = pd.merge(train, tube_vol)
train = train.drop(['quote_date', 'quantity_rep'], axis=1)
In [24]:
print train.shape
print train.columns.values
In [26]:
train.to_csv('preprocessed_train.csv', index=False)
In [ ]: