In case of reading training data from file, we illustrate the necessary steps as following:

collect the info about the file data, including file_path, input_names, value_dim, is_sparse etc as discussed in https://github.com/Microsoft/CNTK/wiki/CNTKTextFormat-Reader
pass the real data (accroding to data info) to network model

The first step is done with MinibatchSource()



In [2]:

    
import sys, os
import getpass
import numpy as np

mnist_dir = '/home/' + getpass.getuser() + '/repos/cntk/Examples/Image/DataSets/MNIST/'
trn_data_file = mnist_dir + 'Train-28x28_cntk_text.txt'

print (os.path.exists(trn_data_file))









    



True

type I

(get numpy.ndarray from cntk reader)



In [3]:

    
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs



In [4]:

    
input_stream = StreamDef(field='features', shape=784, is_sparse=False)
label_stream = StreamDef(field='labels', shape=10, is_sparse=False)
print (input_stream)
print (label_stream)









    



{'dim': 784, 'stream_alias': 'features', 'is_sparse': False}
{'dim': 10, 'stream_alias': 'labels', 'is_sparse': False}



In [5]:

    
streams = StreamDefs(
    input = input_stream,
    label = label_stream
)
print (streams)









    



{'input': {'dim': 784, 'stream_alias': 'features', 'is_sparse': False}, 'label': {'dim': 10, 'stream_alias': 'labels', 'is_sparse': False}}



In [6]:

    
ctf = CTFDeserializer(trn_data_file, streams)
print (ctf)









    



{'input': {'input': {'alias': 'features', 'format': 'dense', 'dim': 784}, 'label': {'alias': 'labels', 'format': 'dense', 'dim': 10}}, 'type': 'CNTKTextFormatDeserializer', 'file': '/home/xtalpi/repos/cntk/Examples/Image/DataSets/MNIST/Train-28x28_cntk_text.txt'}



In [7]:

    
source = MinibatchSource(ctf)
features_si = source['input']
label_si = source['label']



In [9]:

    
mb = source.next_minibatch(10)
print (mb[features_si].value.shape)
print (mb[features_si].m_data.data().to_numpy().shape)
print (np.asarray(mb[features_si].m_data).shape)
print (mb[features_si].num_samples)









    



(10, 1, 784)
(10, 1, 784)
(10, 1, 784)
10

type II



In [57]:

    
from cntk.io import StreamConfiguration, text_format_minibatch_source
mb_source = text_format_minibatch_source(trn_data_file, [
        StreamConfiguration('features', 784),
        StreamConfiguration('labels', 10)
    ])
features_si = mb_source['features']
labels_si = mb_source['labels']



In [58]:

    
mb = mb_source.next_minibatch(10)
print (mb[features_si].value.shape)
print (mb[features_si].m_data.data().to_numpy().shape)
print (np.asarray(mb[features_si].m_data).shape)









    



(10, 1, 784)
(10, 1, 784)
(10, 1, 784)

save numpy.ndarray into file with cntk style



In [19]:

    
import sys, os
import getpass
import numpy as np
Teemo_path = '/home/' + getpass.getuser() + '/git_test'
if not os.path.exists(Teemo_path):
    Teemo_path = '/home/' + getpass.getuser() + '/huizhu/git_test'
if not os.path.exists(Teemo_path):
    exit("Can not find Teemo_path")
sys.path.append(Teemo_path)
from Teemo.examples.mnist import load_data
trn_x, trn_y, valid_x, valid_y = load_data.mnist()
trn_x = np.asarray(trn_x, dtype=np.float32)
trn_y = np.asarray(trn_y, dtype=np.float32)
valid_x = np.asarray(valid_x, dtype=np.float32)
valid_y = np.asarray(valid_y, dtype=np.float32)
print ('load data sucessfully')









    



load data sucessfully



In [54]:

    
def save_txt(file_name, features_ndarray, labels_ndarray):
    dir_name = os.path.dirname(file_name)
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    if os.path.exists(file_name):
        print ("File already exists: {0}".format(file_name))
    else:
        f = open(file_name, 'w')
        for feat, label in zip(features_ndarray, labels_ndarray):
            feat_str = ' '.join(feat.astype(str))
            label_str = ' '.join(label.astype(str))
            f.write('label| {0} features| {1}\n'.format(label_str, feat_str))



In [55]:

    
file_name = '/home/xtalpi/cntk_data/Train-28x28_cntk_text.txt'

save_txt(file_name, trn_x, trn_y)



In [ ]:



In [ ]: