In [13]:
from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import sparse_random_matrix

In [14]:
X = sparse_random_matrix(100, 100, density=0.01, random_state=42)

In [43]:
feature_names = ["f_{}".format(str(i)) for i in range(X.shape[0])]
str(feature_names)


Out[43]:
"['f_0', 'f_1', 'f_2', 'f_3', 'f_4', 'f_5', 'f_6', 'f_7', 'f_8', 'f_9', 'f_10', 'f_11', 'f_12', 'f_13', 'f_14', 'f_15', 'f_16', 'f_17', 'f_18', 'f_19', 'f_20', 'f_21', 'f_22', 'f_23', 'f_24', 'f_25', 'f_26', 'f_27', 'f_28', 'f_29', 'f_30', 'f_31', 'f_32', 'f_33', 'f_34', 'f_35', 'f_36', 'f_37', 'f_38', 'f_39', 'f_40', 'f_41', 'f_42', 'f_43', 'f_44', 'f_45', 'f_46', 'f_47', 'f_48', 'f_49', 'f_50', 'f_51', 'f_52', 'f_53', 'f_54', 'f_55', 'f_56', 'f_57', 'f_58', 'f_59', 'f_60', 'f_61', 'f_62', 'f_63', 'f_64', 'f_65', 'f_66', 'f_67', 'f_68', 'f_69', 'f_70', 'f_71', 'f_72', 'f_73', 'f_74', 'f_75', 'f_76', 'f_77', 'f_78', 'f_79', 'f_80', 'f_81', 'f_82', 'f_83', 'f_84', 'f_85', 'f_86', 'f_87', 'f_88', 'f_89', 'f_90', 'f_91', 'f_92', 'f_93', 'f_94', 'f_95', 'f_96', 'f_97', 'f_98', 'f_99']"

In [15]:
import pandas as pd
pd.DataFrame(X.toarray(), columns=).describe()


Out[15]:
0 1 2 3 4 5 6 7 8 9 ... 90 91 92 93 94 95 96 97 98 99
count 100.000000 100.000000 100.0 100.000000 100.0 100.000000 100.00 100.00 100.0 100.000000 ... 100.0 100.0 100.00 100.00 100.00 100.0 100.00 100.00 100.000000 100.00
mean 0.000000 0.000000 0.0 -0.020000 0.0 0.030000 -0.01 -0.01 0.0 0.020000 ... 0.0 0.0 -0.01 0.01 0.01 0.0 -0.01 -0.02 -0.020000 -0.01
std 0.142134 0.142134 0.0 0.140705 0.0 0.171447 0.10 0.10 0.0 0.140705 ... 0.0 0.0 0.10 0.10 0.10 0.0 0.10 0.20 0.140705 0.10
min -1.000000 -1.000000 0.0 -1.000000 0.0 0.000000 -1.00 -1.00 0.0 0.000000 ... 0.0 0.0 -1.00 0.00 0.00 0.0 -1.00 -1.00 -1.000000 -1.00
25% 0.000000 0.000000 0.0 0.000000 0.0 0.000000 0.00 0.00 0.0 0.000000 ... 0.0 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.000000 0.00
50% 0.000000 0.000000 0.0 0.000000 0.0 0.000000 0.00 0.00 0.0 0.000000 ... 0.0 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.000000 0.00
75% 0.000000 0.000000 0.0 0.000000 0.0 0.000000 0.00 0.00 0.0 0.000000 ... 0.0 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.000000 0.00
max 1.000000 1.000000 0.0 0.000000 0.0 1.000000 0.00 0.00 0.0 1.000000 ... 0.0 0.0 0.00 1.00 1.00 0.0 0.00 1.00 0.000000 0.00

8 rows × 100 columns


In [16]:
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

In [151]:
svd.fit(X)


Out[151]:
TruncatedSVD(algorithm='randomized', n_components=5, n_iter=7,
       random_state=42, tol=0.0)

In [27]:
l.shape


Out[27]:
(100, 5)

In [34]:
# Have to check if the re-constructed values match the original values
svd.inverse_transform(l)


Out[34]:
array([[  4.53614251e-07,   5.94986126e-09,   1.52694511e-22, ...,
         -2.72725083e-06,   5.13570405e-09,  -2.15097572e-11],
       [ -5.69375538e-18,  -1.11990027e-19,   3.67847978e-33, ...,
         -2.17113157e-17,  -2.69544696e-19,   9.35568034e-22],
       [ -2.93753794e-02,  -3.74847011e-04,   1.41173781e-17, ...,
         -3.65049683e-02,  -1.09142571e-03,   3.85158003e-06],
       ..., 
       [ -1.55332590e-06,  -2.01895537e-08,   3.86892213e-22, ...,
          2.73152004e-06,  -4.78520806e-08,   1.67404008e-10],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [ -6.63184677e-03,  -9.22742695e-05,   4.54729947e-18, ...,
         -2.32029752e-02,  -2.79132499e-04,   1.01189068e-06]])

In [148]:
transformed_features = " ".join([
    "+ {0:2f}*{1}".format(coef, feat) for coef, feat in zip(svd.components_[0], feature_names)])

In [149]:
transformed_features


Out[149]:
'+ 0.005787*f_0 + 0.000025*f_1 + 0.000000*f_2 + 0.027323*f_3 + 0.000000*f_4 + 0.146093*f_5 + -0.006118*f_6 + -0.000000*f_7 + 0.000000*f_8 + 0.087252*f_9 + 0.447225*f_10 + -0.000001*f_11 + -0.000000*f_12 + 0.000012*f_13 + 0.000000*f_14 + -0.074129*f_15 + 0.000028*f_16 + -0.000000*f_17 + -0.004809*f_18 + 0.000000*f_19 + 0.000000*f_20 + 0.000000*f_21 + 0.000012*f_22 + 0.000000*f_23 + 0.000126*f_24 + -0.000000*f_25 + -0.291393*f_26 + -0.000014*f_27 + 0.483655*f_28 + -0.000000*f_29 + -0.000000*f_30 + 0.000002*f_31 + -0.000000*f_32 + -0.000000*f_33 + -0.000007*f_34 + -0.000000*f_35 + 0.000007*f_36 + -0.000000*f_37 + 0.000017*f_38 + -0.000029*f_39 + -0.051315*f_40 + -0.000000*f_41 + -0.213801*f_42 + -0.000000*f_43 + -0.000014*f_44 + 0.000007*f_45 + 0.015369*f_46 + -0.000013*f_47 + -0.000000*f_48 + -0.000000*f_49 + -0.000025*f_50 + -0.000000*f_51 + -0.000000*f_52 + 0.025728*f_53 + -0.000001*f_54 + -0.120611*f_55 + -0.000013*f_56 + -0.000001*f_57 + -0.000000*f_58 + -0.000000*f_59 + 0.078762*f_60 + 0.094397*f_61 + -0.078762*f_62 + -0.084546*f_63 + -0.000000*f_64 + 0.000001*f_65 + -0.000000*f_66 + -0.000205*f_67 + -0.000000*f_68 + -0.000000*f_69 + -0.000000*f_70 + -0.000000*f_71 + 0.000000*f_72 + -0.000014*f_73 + 0.000060*f_74 + -0.000000*f_75 + -0.000000*f_76 + -0.028632*f_77 + -0.000000*f_78 + 0.000000*f_79 + 0.129254*f_80 + -0.000000*f_81 + -0.027044*f_82 + -0.222734*f_83 + -0.000000*f_84 + -0.006118*f_85 + 0.145857*f_86 + -0.000000*f_87 + -0.000000*f_88 + 0.333795*f_89 + -0.000000*f_90 + -0.000000*f_91 + 0.000000*f_92 + 0.000014*f_93 + -0.013049*f_94 + -0.000000*f_95 + -0.000000*f_96 + -0.390950*f_97 + -0.000027*f_98 + 0.000000*f_99'

In [ ]: