In [3]:
#http://nullege.com/codes/show/src%40r%40a%40ramp-0.1.4%40examples%40iris.py/4/sklearn/python

import pandas
from ramp import *
import urllib2
import sklearn
from sklearn import decomposition
  
  
# fetch and clean iris data from UCI
data = pandas.read_csv(urllib2.urlopen(
    "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"))
data = data.drop([149]) # bad line
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
data.columns = columns
  
  
# all features
features = [FillMissing(f, 0) for f in columns[:-1]]
  
# features, log transformed features, and interaction terms
expanded_features = (
    features +
    [Log(F(f) + 1) for f in features] +
    [
        F('sepal_width') ** 2,
        combo.Interactions(features),
    ]
)
  
  
# Define several models and feature sets to explore,
# run 5 fold cross-validation on each and print the results.
# We define 2 models and 4 feature sets, so this will be
# 4 * 2 = 8 models tested.
shortcuts.cv_factory(
    data=data,
  
    target=[AsFactor('class')],
    metrics=[[metrics.GeneralizedMCC()]],
  
    # Try out two algorithms
    model=[
        sklearn.ensemble.RandomForestClassifier(n_estimators=20),
        sklearn.linear_model.LogisticRegression(),
        ],
  
    # and 4 feature sets
    features=[
        expanded_features,
  
        # Feature selection
        [trained.FeatureSelector(
            expanded_features,
            # use random forest's importance to trim
            selectors.RandomForestSelector(classifier=True),
            target=AsFactor('class'), # target to use
            n_keep=5, # keep top 5 features
            )],
  
        # Reduce feature dimension (pointless on this dataset)
        [combo.DimensionReduction(expanded_features,
                            decomposer=decomposition.PCA(n_components=4))],
  
        # Normalized features
        [Normalize(f) for f in expanded_features],
    ]
)


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-3-fd395698eb18> in <module>()
     61         # Reduce feature dimension (pointless on this dataset)
     62         [combo.DimensionReduction(expanded_features,
---> 63                             decomposer=decomposition.PCA(n_components=4))],
     64 
     65         # Normalized features

/usr/local/lib/python2.7/dist-packages/ramp/shortcuts.pyc in cv_factory(store, data, **kwargs)
     43     fact = ConfigFactory(Configuration(), **kwargs)
     44     for conf in fact:
---> 45         models.cv(conf, DataContext(store, data), **fargs)

/usr/local/lib/python2.7/dist-packages/ramp/models.pyc in cv(config, context, folds, repeat, print_results)
    110     for train, test in folds:
    111         ctx.train_index = train
--> 112         preds, x, y = predict(config, ctx, test)
    113         actuals = y.reindex(test)
    114         config.update_reporters_with_predictions(ctx, x, actuals, preds)

/usr/local/lib/python2.7/dist-packages/ramp/models.pyc in predict(config, context, predict_index, fit_model)
     64 
     65     if fit_model:
---> 66         x, y = fit(config, context)
     67 
     68     # TODO: possible to have x loaded without new prediction rows

/usr/local/lib/python2.7/dist-packages/ramp/models.pyc in fit(config, context)
     39         print "loading stored model..."
     40     except KeyError:
---> 41         x, y = get_xy(config, context)
     42 
     43         train_x = x.reindex(context.train_index)

/usr/local/lib/python2.7/dist-packages/ramp/models.pyc in get_xy(config, context)
     25 
     26 def get_xy(config, context):
---> 27     return get_x(config, context), get_y(config, context)
     28 
     29 

/usr/local/lib/python2.7/dist-packages/ramp/models.pyc in get_x(config, context)
     16 
     17 def get_x(config, context):
---> 18     x = build_featureset(config.features, context)
     19     if config.column_subset:
     20         x = x[config.column_subset]

/usr/local/lib/python2.7/dist-packages/ramp/builders.pyc in build_featureset(features, context)
     40     x = []
     41     for feature in features:
---> 42         x.append(build_feature_safe(feature, context))
     43     for d in x[1:]:
     44         assert (d.index == x[0].index).all(), "Mismatched indices after feature creation"

/usr/local/lib/python2.7/dist-packages/ramp/builders.pyc in build_feature_safe(feature, context)
     12 
     13 def build_feature_safe(feature, context):
---> 14     d = feature.create(context)
     15 
     16     # sanity check index is valid

/usr/local/lib/python2.7/dist-packages/ramp/features/base.pyc in create(self, context, force)
    226             print "creating '%s' ..." % (self.unique_name)
    227 
--> 228         data = self.create_data(force)
    229 
    230         # cache it

/usr/local/lib/python2.7/dist-packages/ramp/features/base.pyc in create_data(self, force)
    177             datas.append(data)
    178         # actually apply the feature
--> 179         data = self._create(datas)
    180         return data
    181 

/usr/local/lib/python2.7/dist-packages/ramp/features/base.pyc in _create(self, datas)
    242         Actual feature creation.
    243         """
--> 244         data = self.combine(datas)
    245         hsh = self._hash() # cache this so we dont recompute for every column
    246         data.columns = data.columns.map(lambda x: self.column_rename(x, hsh))

/usr/local/lib/python2.7/dist-packages/ramp/features/combo.pyc in combine(self, datas)
     48                 colnames.append('%s, %s' % (d1.name, d2.name))
     49                 cols.append(d)
---> 50         return concat(cols, keys=colnames, axis=1)
     51 
     52 class OutlierCount(ComboFeature):

/usr/lib/pymodules/python2.7/pandas/tools/merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity)
    688                        ignore_index=ignore_index, join=join,
    689                        keys=keys, levels=levels, names=names,
--> 690                        verify_integrity=verify_integrity)
    691     return op.get_result()
    692 

/usr/lib/pymodules/python2.7/pandas/tools/merge.pyc in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity)
    747         self.verify_integrity = verify_integrity
    748 
--> 749         self.new_axes = self._get_new_axes()
    750 
    751     def get_result(self):

/usr/lib/pymodules/python2.7/pandas/tools/merge.pyc in _get_new_axes(self)
    874             concat_axis = self._get_concat_axis()
    875 
--> 876         new_axes[self.axis] = concat_axis
    877 
    878         if self.join_axes is None:

IndexError: list assignment index out of range
creating 'FillMissing(sepal_length) [d02c92d9]' ...
creating 'FillMissing(sepal_width) [e1442d27]' ...
creating 'FillMissing(petal_length) [f90f0467]' ...
creating 'FillMissing(petal_width) [090e2f8a]' ...
creating 'log(Add(FillMissing(sepal_length), 1)) [dd0f90fb]' ...
creating 'Add(FillMissing(sepal_length), 1) [fdbd2e3d]' ...
creating 'FillMissing(sepal_length) [6b6d377d]' ...
loading 'FillMissing(sepal_length) [d02c92d9]'
creating 'log(Add(FillMissing(sepal_width), 1)) [6d23658a]' ...
creating 'Add(FillMissing(sepal_width), 1) [55ebcff2]' ...
creating 'FillMissing(sepal_width) [0275e7a1]' ...
loading 'FillMissing(sepal_width) [e1442d27]'
creating 'log(Add(FillMissing(petal_length), 1)) [f8f0cd65]' ...
creating 'Add(FillMissing(petal_length), 1) [38657b4f]' ...
creating 'FillMissing(petal_length) [266ee432]' ...
loading 'FillMissing(petal_length) [f90f0467]'
creating 'log(Add(FillMissing(petal_width), 1)) [eba7e39c]' ...
creating 'Add(FillMissing(petal_width), 1) [ccaa408c]' ...
creating 'FillMissing(petal_width) [c3522ef2]' ...
loading 'FillMissing(petal_width) [090e2f8a]'
creating 'Power(sepal_width) [8391663f]' ...
creating 'sepal_width [45e711dc]' ...
creating 'Interactions(FillMissing(sepal_length), FillMissing(sepal_width), FillMissing(petal_length), FillMissing(petal_width)) [2a89d6b4]' ...
loading 'FillMissing(sepal_length) [d02c92d9]'
loading 'FillMissing(sepal_width) [e1442d27]'
loading 'FillMissing(petal_length) [f90f0467]'
loading 'FillMissing(petal_width) [090e2f8a]'

In [ ]: