In [ ]:
#GLOBAL VARIABLES

In [11]:
#LOAD DATA
from sklearn.feature_extraction
from sklearn.


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-e124bbc296d6> in <module>()
      1 #LOAD DATA
      2 from sklearn.datasets import tests
----> 3 tests()

TypeError: 'module' object is not callable

In [ ]:


In [ ]:


In [2]:
#CLEAN

In [12]:
#Preprossesing`
from sklearn import preprocessing
#Binarize data (set feature values to 0 or 1) according to a threshold
preprocessing.Binarizer(self, threshold=0.0, copy=True) 

#Constructs a transformer from an arbitrary callable.
preprocessing.FunctionTransformer(func=None, validate=True, accept_sparse=False, pass_y=False)

#Imputation transformer for completing missing values.
preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0, verbose=0, copy=True)


preprocessing.KernelCenterer??

#Binarize labels in a one-vs-all fashion
preprocessing.LabelBinarizer(self, neg_label=0, pos_label=1, sparse_output=False)

#Encode labels with value between 0 and n_classes-1.
preprocessing.LabelEncoder()       

#Scale each feature by its maximum absolute value.
preprocessing.MaxAbsScaler(copy=True)     

#Transforms features by scaling each feature to a given range.
preprocessing.MinMaxScaler(self, feature_range=(0, 1), copy=True)

#Transform between iterable of iterables and a multilabel format
preprocessing.MultiLabelBinarizer(classes=None, sparse_output=False) 

#Normalize samples individually to unit norm.)
preprocessing.Normalizer(norm='l2', copy=True)

#Encode categorical integer features using a one-hot aka one-of-K scheme.)
preprocessing.OneHotEncoder(n_values='auto', categorical_features='all', dtype=<type 'float'>, sparse=True, handle_unknown='error')

#Generate polynomial and interaction features.)
preprocessing.PolynomialFeatures(degree=2, interaction_only=False, include_bias=True)

#Scale features using statistics that are robust to outliers.)
preprocessing.RobustScaler(with_centering=True, with_scaling=True, copy=True)

#Standardize features by removing the mean and scaling to unit variance)
preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True)  #MAY NEED FURTH WHITENING


Out[12]:
<module 'sklearn.preprocessing' from '/home/gilad/VMachine/lib/python2.7/site-packages/sklearn/preprocessing/__init__.pyc'>

In [ ]:
#Train/Valid/Test Splitting

In [19]:
#Feature_Selection
from sklearn import decomposition
#Finds a dictionary (a set of atoms) that can best be used to represent datausing a sparse code.
decomposition.DictionaryLearning(n_components=None, alpha=1, max_iter=1000, tol=1e-08, fit_algorithm='lars', 
                                 transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, 
                                 n_jobs=1, code_init=None, dict_init=None, verbose=False, split_sign=False, random_state=None)
# Factor Analysis (FA)

# A simple linear generative model with Gaussian latent variables.

# The observations are assumed to be caused by a linear transformation of
# lower dimensional latent factors and added Gaussian noise.
# Without loss of generality the factors are distributed according to a
# Gaussian with zero mean and unit covariance. The noise is also zero mean
# and has an arbitrary diagonal covariance matrix.

# If we would restrict the model further, by assuming that the Gaussian
# noise is even isotropic (all diagonal entries are the same) we would obtain
# :class:`PPCA`.

# FactorAnalysis performs a maximum likelihood estimate of the so-called
# `loading` matrix, the transformation of the latent variables to the
# observed ones, using expectation-maximization (EM).
decomposition.FactorAnalysis (n_components=None, tol=0.01, copy=True, max_iter=1000, 
                             noise_variance_init=None, svd_method='randomized', iterated_power=3, random_state=0)
    
#FastICA: a fast algorithm for Independent Component Analysis.
decomposition.FastICA(n_components=None, algorithm='parallel', whiten=True, fun='logcosh', 
                      fun_args=None, max_iter=200, tol=0.0001, w_init=None, random_state=None)

#Incremental principal components analysis (IPCA).
decomposition.IncrementalPCA(n_components=None, whiten=False, copy=True, batch_size=None)

#Kernel Principal component analysis (KPCA) Non-linear dimensionality reduction through the use of kernels (see:ref:`metrics`).
decomposition.KernelPCA(n_components=None, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, 
                        fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False)

# Latent Dirichlet Allocation with online variational Bayes algorithm
decomposition.LatentDirichletAllocation(n_topics=10, doc_topic_prior=None, topic_word_prior=None, learning_method='online', learning_decay=0.7, 
                                        learning_offset=10.0, max_iter=10, batch_size=128, evaluate_every=-1, total_samples=1000000.0, perp_tol=0.1, 
                                        mean_change_tol=0.001, max_doc_update_iter=100, n_jobs=1, verbose=0, random_state=None)

#Mini-batch dictionary learning
decomposition.MiniBatchDictionaryLearning(n_components=None, alpha=1, n_iter=1000, fit_algorithm='lars', n_jobs=1, batch_size=3, shuffle=True,
                                          dict_init=None, transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, 
                                          verbose=False, split_sign=False, random_state=None)

#Mini-batch Sparse Principal Components Analysis
# Finds the set of sparse components that can optimally reconstruct
# the data.  The amount of sparseness is controllable by the coefficient`
# of the L1 penalty, given by the parameter alpha.
decomposition.MiniBatchSparsePCA(n_components=None, alpha=1, ridge_alpha=0.01, n_iter=100, callback=None, batch_size=3, verbose=False, 
                                 shuffle=True, n_jobs=1, method='lars', random_state=None)


# Non-Negative Matrix Factorization (NMF)
# Find two non-negative matrices (W, H) whose product approximates the non-
# negative matrix X. This factorization can be used for example for
# dimensionality reduction, source separation or topic extraction.
decomposition.NMF(n_components=None, init=None, solver='cd', tol=0.0001, max_iter=200, random_state=None, alpha=0.0, l1_ratio=0.0, 
                  verbose=0, shuffle=False, nls_max_iter=2000, sparseness=None, beta=1, eta=0.1)

# Principal component analysis (PCA):
# Linear dimensionality reduction using Singular Value Decomposition of the
# data and keeping only the most significant singular vectors to project the
# data to a lower dimensional space.
# It only works for dense arrays and is not scalable to large dimensional data.
decomposition.PCA(n_components=None, copy=True, whiten=False)


# Non-Negative Matrix Factorization (NMF):
# Find two non-negative matrices (W, H) whose product approximates the non-
# negative matrix X. This factorization can be used for example for
# dimensionality reduction, source separation or topic extraction.
decomposition.ProjectedGradientNMF()

#Principal component analysis (PCA) using randomized SVD
decomposition.RandomizedPCA(n_components=None, copy=True, iterated_power=3, whiten=False, random_state=None)

# Finds a sparse representation of data against a fixed, precomputeddictionary.
decomposition.SparseCoder(dictionary, transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, split_sign=False, n_jobs=1)

#Finds the set of sparse components that can optimally reconstruct
# the data.  The amount of sparseness is controllable by the coefficient
# of the L1 penalty, given by the parameter alpha.
decomposition.SparsePCA(self, n_components=None, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, 
                        method='lars', n_jobs=1, U_init=None, V_init=None, verbose=False, random_state=None)

# Dimensionality reduction using truncated SVD (aka LSA).
decomposition.TruncatedSVD(n_components=2, algorithm='randomized', n_iter=5, random_state=None, tol=0.0)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-19-9c32bad3d674> in <module>()
     26 decomposition.ProjectedGradientNMF
     27 decomposition.RandomizedPCA
---> 28 decomposition.SparseCoder()
     29 decomposition.SparsePCA(self, n_components=None, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, 
     30                         method='lars', n_jobs=1, U_init=None, V_init=None, verbose=False, random_state=None)

TypeError: __init__() takes at least 2 arguments (1 given)

In [21]:



/home/gilad/VMachine/lib/python2.7/site-packages/sklearn/utils/__init__.py:75: DeprecationWarning: Class ProjectedGradientNMF is deprecated; It will be removed in release 0.19. Use NMF instead.'pg' solver is still available until release 0.19.
  warnings.warn(msg, category=DeprecationWarning)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-a395c4d99cb5> in <module>()
     25 
     26 # Finds a sparse representation of data against a fixed, precomputeddictionary.
---> 27 decomposition.SparseCoder(dictionary, transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, split_sign=False, n_jobs=1)
     28 
     29 #Finds the set of sparse components that can optimally reconstruct

NameError: name 'dictionary' is not defined

In [5]:
#Whitening
decomposition.PCA()

In [ ]:
#Gridsearch

In [6]:
#RUN Many Models and score them all.

In [7]:
#Ensemble Methods

In [ ]: