In [1]:
import time
import numpy as np
import itertools
from scipy import sparse

f = open('dayrows.pkl', 'r') dayrows = cPickle.load(f) f.close()

columns in my notebook to be clustered


In [2]:
cols = [5, 6, 8, 9, 11, 12, 14, 15, 18, 19, 22, 25]

Start from here

day 1 - 2


In [3]:
loader = np.load('day2.npz')
val = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])

In [4]:
val


Out[4]:
<4765304x260 sparse matrix of type '<type 'numpy.float32'>'
	with 61948952 stored elements in Compressed Sparse Row format>

In [5]:
for i in cols:
    loader = np.load('hashingcol'+str(i)+'day2.npz')
    vals = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])
    val = sparse.hstack([val,vals])

In [6]:
val


Out[6]:
<4765304x3204 sparse matrix of type '<type 'numpy.float32'>'
	with 372226123 stored elements in COOrdinate format>

In [7]:
valcsr = val.tocsr()

In [8]:
valcsr


Out[8]:
<4765304x3204 sparse matrix of type '<type 'numpy.float32'>'
	with 372226123 stored elements in Compressed Sparse Row format>

In [9]:
np.savez('hashingday2.npz', data=valcsr.data, indices=valcsr.indices,\
         indptr=valcsr.indptr, shape=valcsr.shape)

In [ ]: