In [1]:
import sys
from time import time
from pprint import pprint

import numpy as np
import scipy
import scipy.sparse as sp
import joblib

import io
import os.path

import sklearn
import sklearn.svm
import sklearn.datasets
import sklearn.metrics
import sklearn.cross_validation


from sklearn.externals.six import u, b

from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectKBest, chi2

from svmlight_loader import (load_svmlight_file, load_svmlight_files,dump_svmlight_file)
from sklearn.datasets import load_svmlight_file as sk_load_svmlight_file

from sklearn import decomposition
from scikits.learn.decomposition import NMF

import warnings
warnings.filterwarnings('ignore')

%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
X_train, y_train, X_test, y_test = load_svmlight_files(["svm.train.in", "svm.test.in"],dtype=np.float32)

print X_train.shape
print y_train.shape
print X_test.shape
print X_test.shape

print X_train.dtype
#print X_train[:1]


(20, 47205)
(20,)
(38842, 47205)
(38842, 47205)
float32

In [3]:
X = np.zeros(X_train.shape)
for x, y in np.ndindex(X_train.shape):
    X[x,y]=X_train[x,y]

In [4]:
print X.shape
print X[:1]
print np.max(X)
print np.min(X)


(20, 47205)
[[ 0.  0.  0. ...,  0.  0.  0.]]
0.607490479946
0.0

In [ ]:
model = NMF(n_components=10,init='nndsvd')
model.fit(X)
print model.components_
print model.reconstruction_err_

In [18]:
print model.components_.shape


(10, 47205)

In [ ]:
print model.