In [1]:

    
import time
import numpy as np
import itertools
from scipy import sparse

f = open('dayrows.pkl', 'r') dayrows = cPickle.load(f) f.close()

columns in my notebook to be clustered



In [2]:

    
cols = [5, 6, 8, 9, 11, 12, 14, 15, 18, 19, 22, 25]

Start from here

day 1 - 2



In [3]:

    
loader = np.load('day2.npz')
val = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])



In [4]:

    
val









    Out[4]:





<4765304x260 sparse matrix of type '<type 'numpy.float32'>'
	with 61948952 stored elements in Compressed Sparse Row format>



In [5]:

    
for i in cols:
    loader = np.load('hashingcol'+str(i)+'day2.npz')
    vals = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])
    val = sparse.hstack([val,vals])



In [6]:

    
val









    Out[6]:





<4765304x3204 sparse matrix of type '<type 'numpy.float32'>'
	with 372226123 stored elements in COOrdinate format>



In [7]:

    
valcsr = val.tocsr()



In [8]:

    
valcsr









    Out[8]:





<4765304x3204 sparse matrix of type '<type 'numpy.float32'>'
	with 372226123 stored elements in Compressed Sparse Row format>



In [9]:

    
np.savez('hashingday2.npz', data=valcsr.data, indices=valcsr.indices,\
         indptr=valcsr.indptr, shape=valcsr.shape)



In [ ]: