In [12]:
import numpy as np

In [6]:
import scipy as sp

In [7]:
import networkx as nx

In [52]:
fn = '/var/datasets/livejournal/small-soc-LiveJournal1.txt'

In [53]:
!head {fn}












In [99]:
import csv
from scipy import sparse

In [106]:
nodes = 100000 
matrix = sparse.lil_matrix((nodes,nodes), dtype=float)

csvreader = csv.reader(open(fn))

In [107]:
with open(fn,'r') as f:
    next(f);next(f);next(f);next(f)
    reader = csv.reader(f,delimiter='\t')
    for column,row in reader:
        column = int(column)
        row = int(row)
        if column >= nodes or row >= nodes:
            continue
        #matrix.data[row].append(column)
        matrix[row,column] = 1

In [108]:
csc = matrix.tocsc()

In [178]:
k=200

In [163]:
res = sp.sparse.linalg.svds(csc, k=k)

In [164]:
semilogy(abs(res[0][:,-1]))


Out[164]:
[<matplotlib.lines.Line2D at 0x7fcc44a1cf90>]

In [176]:
cumsum( res[1]  ) / csc.nnz


Out[176]:
array([  1.32021215e-05,   2.64198894e-05,   3.96674285e-05,
         5.29315853e-05,   6.62352901e-05,   7.95638244e-05,
         9.29176225e-05,   1.06281255e-04,   1.19668130e-04,
         1.33086042e-04,   1.46528011e-04,   1.59992543e-04,
         1.73505792e-04,   1.87051295e-04,   2.00612779e-04,
         2.14229147e-04,   2.27859812e-04,   2.41515925e-04,
         2.55195745e-04,   2.68893745e-04,   2.82598022e-04,
         2.96325799e-04,   3.10058789e-04,   3.23833895e-04,
         3.37649073e-04,   3.51516047e-04,   3.65394698e-04,
         3.79286769e-04,   3.93217891e-04,   4.07170511e-04,
         4.21145606e-04,   4.35132169e-04,   4.49126066e-04,
         4.63151166e-04,   4.77201968e-04,   4.91259947e-04,
         5.05346320e-04,   5.19473891e-04,   5.33650866e-04,
         5.47839788e-04,   5.62046556e-04,   5.76275129e-04,
         5.90533587e-04,   6.04804695e-04,   6.19112456e-04,
         6.33446797e-04,   6.47811283e-04,   6.62183331e-04,
         6.76602818e-04,   6.91047488e-04,   7.05496520e-04,
         7.19966825e-04,   7.34449596e-04,   7.48951942e-04,
         7.63515596e-04,   7.78134486e-04,   7.92785323e-04,
         8.07447950e-04,   8.22132813e-04,   8.36870311e-04,
         8.51645819e-04,   8.66449049e-04,   8.81256924e-04,
         8.96122982e-04,   9.10989209e-04,   9.25905913e-04,
         9.40840739e-04,   9.55826209e-04,   9.70872265e-04,
         9.85987644e-04,   1.00113472e-03,   1.01629674e-03,
         1.03150570e-03,   1.04676989e-03,   1.06206341e-03,
         1.07738959e-03,   1.09276416e-03,   1.10818147e-03,
         1.12360716e-03,   1.13907177e-03,   1.15459364e-03,
         1.17014195e-03,   1.18573509e-03,   1.20135719e-03,
         1.21699932e-03,   1.23265752e-03,   1.24832758e-03,
         1.26406837e-03,   1.27983459e-03,   1.29564509e-03,
         1.31153534e-03,   1.32745481e-03,   1.34340629e-03,
         1.35939646e-03,   1.37540420e-03,   1.39144759e-03,
         1.40752182e-03,   1.42362315e-03,   1.43975188e-03,
         1.45590041e-03,   1.47209787e-03,   1.48834355e-03,
         1.50464154e-03,   1.52094911e-03,   1.53728895e-03,
         1.55365259e-03,   1.57008089e-03,   1.58653966e-03,
         1.60306004e-03,   1.61963507e-03,   1.63624159e-03,
         1.65305556e-03,   1.66992971e-03,   1.68685318e-03,
         1.70384657e-03,   1.72093285e-03,   1.73803439e-03,
         1.75514283e-03,   1.77247800e-03,   1.78988968e-03,
         1.80732803e-03,   1.82482409e-03,   1.84233952e-03,
         1.86004437e-03,   1.87778614e-03,   1.89565599e-03,
         1.91374273e-03,   1.93183568e-03,   1.95008927e-03,
         1.96836166e-03,   1.98668550e-03,   2.00516091e-03,
         2.02381739e-03,   2.04248946e-03,   2.06125600e-03,
         2.08009141e-03,   2.09895149e-03,   2.11783799e-03,
         2.13680126e-03,   2.15581125e-03,   2.17486886e-03,
         2.19398520e-03,   2.21323830e-03,   2.23279433e-03,
         2.25243009e-03,   2.27216797e-03,   2.29203938e-03,
         2.31191865e-03,   2.33188362e-03,   2.35193298e-03,
         2.37203780e-03,   2.39217227e-03,   2.41253692e-03,
         2.43299077e-03,   2.45371743e-03,   2.47458433e-03,
         2.49549263e-03,   2.51649064e-03,   2.53750115e-03,
         2.55868899e-03,   2.58018453e-03,   2.60176308e-03,
         2.62339021e-03,   2.64533607e-03,   2.66795257e-03,
         2.69077257e-03,   2.71361532e-03,   2.73678931e-03,
         2.76035644e-03,   2.78430541e-03,   2.80847543e-03,
         2.83267628e-03,   2.85697424e-03,   2.88184086e-03,
         2.90682041e-03,   2.93228132e-03,   2.95806994e-03,
         2.98495438e-03,   3.01192243e-03,   3.03910011e-03,
         3.06630067e-03,   3.09484702e-03,   3.12453132e-03,
         3.15451225e-03,   3.18495738e-03,   3.21612974e-03,
         3.24759308e-03,   3.27953504e-03,   3.31167208e-03,
         3.34423215e-03,   3.37939833e-03,   3.41509759e-03,
         3.45108528e-03,   3.48889435e-03,   3.52807525e-03,
         3.57377802e-03,   3.62062839e-03,   3.67589066e-03,
         3.74419659e-03,   3.82875473e-03])

In [177]:
plot(range(k-1,-1,-1), res[1]/sum(res[1]),range(k-1,-1,-1), cumsum( res[1]/sum(res[1])  ) )
#axis([0, k-1, 0, max(res[1]/sum(res[1]) ) ])
axis([0, k-1, 0, 1 ])


Out[177]:
[0, 199, 0, 1]

In [150]:
res[1].toarray()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-150-b6ca25bc771d> in <module>()
----> 1 res[1].toarray()

AttributeError: 'numpy.ndarray' object has no attribute 'toarray'

In [147]:
reversed(res[1].array())


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-147-a8030afeac81> in <module>()
----> 1 reversed(res[1].array())

AttributeError: 'numpy.ndarray' object has no attribute 'array'

In [140]:
range(5,-1,1)


Out[140]:
[]

In [ ]: