In [1]:
%pylab inline
# We pull in the training, validation and test sets created according to the scheme described
# in the data exploration lesson.
import pandas as pd
samtrain = pd.read_csv('../datasets/samsung/samtrain.csv')
samval = pd.read_csv('../datasets/samsung/samval.csv')
samtest = pd.read_csv('../datasets/samsung/samtest.csv')
# We use the Python RandomForest package from the scikits.learn collection of algorithms.
# The package is called sklearn.ensemble.RandomForestClassifier
# For this we need to convert the target column ('activity') to integer values
# because the Python RandomForest package requires that.
# In R it would have been a "factor" type and R would have used that for classification.
# We map activity to an integer according to
# laying = 1, sitting = 2, standing = 3, walk = 4, walkup = 5, walkdown = 6
# Code is in supporting library randomforest.py
import randomforests as rf
samtrain = rf.remap_col(samtrain,'activity')
samval = rf.remap_col(samval,'activity')
samtest = rf.remap_col(samtest,'activity')
In [ ]: