In [1]:
import time
import numpy as np
import itertools
from scipy import sparse
f = open('dayrows.pkl', 'r') dayrows = cPickle.load(f) f.close()
columns in my notebook to be clustered
In [2]:
cols = [5, 6, 8, 9, 11, 12, 14, 15, 18, 19, 22, 25]
In [3]:
loader = np.load('day2.npz')
val = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])
In [4]:
val
Out[4]:
In [5]:
for i in cols:
loader = np.load('hashingcol'+str(i)+'day2.npz')
vals = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape'])
val = sparse.hstack([val,vals])
In [6]:
val
Out[6]:
In [7]:
valcsr = val.tocsr()
In [8]:
valcsr
Out[8]:
In [9]:
np.savez('hashingday2.npz', data=valcsr.data, indices=valcsr.indices,\
indptr=valcsr.indptr, shape=valcsr.shape)
In [ ]: