In [1]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [4]:

    
num_feats = 1000
num_obsvs = 150

mod_size = 10

num_causl = 10

Generate data



In [ ]:

    
# adjacency matrix
W = np.zeros((num_feats, num_feats))
for i in range(num_feats/mod_size):
    W[i*mod_size:(i+1)*mod_size, i*mod_size:(i+1)*mod_size] = np.ones((mod_size, mod_size))
    if not i == (num_feats/mod_size - 1):
        W[(i+1)*mod_size-1, (i+1)*mod_size] = 1
        W[(i+1)*mod_size, (i+1)*mod_size-1] = 1
        
# remove the diagonal
W = W - np.eye(num_feats)



In [ ]:

    
# SNPs
X = np.random.binomial(1, 0.1, size=(num_obsvs, num_feats))



In [ ]:

    
# Phenotype
w_causl = np.random.normal(loc=0.2, scale=0.05, size=(num_causl))
print w_causl

w = np.zeros((num_feats, ))
w[:num_causl] = w_causl

y = np.dot(X, w) + np.random.normal(loc=0., scale=0.1, size=(num_obsvs, ))

### Shuffle

map_indices_l = range(num_feats) np.random.shuffle(map_indices_l) map_indices = dict(zip(range(num_feats), map_indices_l)) map_indices_r = dict(zip(map_indices_l, range(num_feats)))

X = X[:, map_indices_l] W_new = W[map_indices_l, :] W_new = W_new[:, map_indices_l] W = W_new causl = [map_indices_r[ix] for ix in range(num_causl)]

Save generated data

The data used in the StructuredSparsity.ipnb notebook is saved under data/struct_spars. Here it will be generated under data/my_struct_spars.



In [5]:

    
data_rep = 'data/my_struct_spars'
X_fname = '%s/X.data' % data_rep
y_fname = '%s/y.data' % data_rep
W_fname = '%s/W.data' % data_rep
causl_fname = '%s/causl.data' % data_rep
wghts_fname = '%s/w_causl.data' % data_rep



In [6]:

    
np.savetxt(X_fname, X, fmt='%d')
np.savetxt(y_fname, y)
np.savetxt(W_fname, W, fmt='%.1f')
np.savetxt(causl_fname, causl, fmt='%d')
np.savetxt(wghts_fname, w_causl)