In [1]:
%pylab inline
# We pull in the training, validation and test sets created according to the scheme described
# in the data exploration lesson.

import pandas as pd

samtrain = pd.read_csv('../datasets/samsung/samtrain.csv')
samval = pd.read_csv('../datasets/samsung/samval.csv')
samtest = pd.read_csv('../datasets/samsung/samtest.csv')

# We use the Python RandomForest package from the scikits.learn collection of algorithms. 
# The package is called sklearn.ensemble.RandomForestClassifier

# For this we need to convert the target column ('activity') to integer values 
# because the Python RandomForest package requires that.  
# In R it would have been a "factor" type and R would have used that for classification.

# We map activity to an integer according to
# laying = 1, sitting = 2, standing = 3, walk = 4, walkup = 5, walkdown = 6
# Code is in supporting library randomforest.py

import randomforests as rf
samtrain = rf.remap_col(samtrain,'activity')
samval = rf.remap_col(samval,'activity')
samtest = rf.remap_col(samtest,'activity')


Populating the interactive namespace from numpy and matplotlib
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-a625cd575675> in <module>()
     20 # Code is in supporting library randomforest.py
     21 
---> 22 import randomforests as rf
     23 samtrain = rf.remap_col(samtrain,'activity')
     24 samval = rf.remap_col(samval,'activity')

ImportError: No module named 'randomforests'

In [ ]: