notebook.community

Edit and run



In [1]:

    
import requests

def download(url, filename):
    """ utility to download necessary data """
    response = requests.get(url, stream=True)
    with open(filename, "wb") as handle:
        for data in response.iter_content():
            handle.write(data)
            
url1 = "https://github.com/Microsoft/CNTK/blob/master/Examples/Tutorials/SLUHandsOn/atis.%s.ctf?raw=true"
url2 = "https://github.com/Microsoft/CNTK/blob/master/Examples/Text/ATIS/%s.wl?raw=true"
urls = [url1%"train", url1%"test", url2%"query", url2%"slots"]

def data():
    for t in urls:
        filename = t.split('/')[-1].split('?')[0]
        try:
            f = open(filename)
            f.close()
        except IOError:
            download(t, filename)



In [2]:

    
import math
import numpy as np
from cntk.blocks import default_options, LSTM, Placeholder, Input        # building blocks
from cntk.layers import Embedding, Recurrence, Dense, BatchNormalization # layers
from cntk.models import Sequential                                       # higher level things
from cntk.utils import ProgressPrinter, log_number_of_parameters
from cntk.io import MinibatchSource, CTFDeserializer
from cntk.io import StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk import future_value, combine, Trainer, cross_entropy_with_softmax, classification_error, splice
from cntk.learner import sgd, momentum_sgd, adagrad, adam_sgd, nesterov
from cntk.learner import learning_rate_schedule, momentum_schedule, momentum_schedule_per_sample



In [3]:

    
# number of words in vocab, slot labels, and intent labels
vocab_size = 943 ; num_labels = 129 ; num_intents = 26    

# model dimensions
input_dim  = vocab_size
label_dim  = num_labels
emb_dim    = 150
hidden_dim = 300

def create_model():
    with default_options(initial_state=0.1):
        return Sequential([
            Embedding(emb_dim),
            Recurrence(LSTM(hidden_dim), go_backwards=False),
            Dense(num_labels)
        ])

def create_reader(path, is_training):
    return MinibatchSource(CTFDeserializer(path, StreamDefs(
         query         = StreamDef(field='S0', shape=vocab_size,  is_sparse=True),
         intent_unused = StreamDef(field='S1', shape=num_intents, is_sparse=True),  
         slot_labels   = StreamDef(field='S2', shape=num_labels,  is_sparse=True)
     )), randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)

def create_learner(parameters, minibatch_size, epoch_size):
    lr_schedule = [0.003]*4+[0.0015]*24+[0.0003]
    m_schedule_const = [700]
    m_schedule_float = [np.exp(-1.*minibatch_size/x) for x in m_schedule_const]

    lr_1 = learning_rate_schedule(lr_schedule, units=epoch_size)
    m_1 = momentum_schedule(           m_schedule_const, units=1)
    m_2 = momentum_schedule_per_sample(m_schedule_float, units=1)

    learner_1 = sgd(parameters, lr=lr_1)
    learner_2 = adagrad(parameters, lr=lr_1)
    learner_3 = momentum_sgd(parameters, lr=lr_1, momentum=m_1)
    learner_4 = adam_sgd(parameters, lr_per_sample=lr_1, momentum_time_constant=m_1)
    learner_5 = nesterov(parameters, lr=lr_1, momentum=m_1)
    
    return learner_5



In [4]:

    
model = create_model()
reader = create_reader("/home/xtalpi/git_test/test_data/examples/atis/atis.train.ctf", is_training=True)

def create_criterion_function(model):
    labels = Placeholder()
    ce   = cross_entropy_with_softmax(model, labels)
    errs = classification_error      (model, labels)
    return combine ([ce, errs]) # (features, labels) -> (loss, metric)

# Importantly, for sequential data, a sample is an individual item of a sequence. 
# Hence, CNTK's minibatchSize does not refer to the number of sequences in the minibatch, 
# but the aggregate number of sequence items/tokens across the sequences that constitute the minibatch
# details refer to https://github.com/Microsoft/CNTK/wiki/SGD-Block#what-is-the-minibatch-size-in-cntk
def train(reader, model, max_epochs=16):
    criterion = create_criterion_function(model)
    criterion.replace_placeholders({criterion.placeholders[0]: Input(vocab_size),
                                    criterion.placeholders[1]: Input(num_labels)})
    epoch_size = 18000        # 18000 samples is half the dataset size 
    minibatch_size = 80
    
    learner = create_learner(criterion.parameters, minibatch_size, epoch_size)

    trainer = Trainer(model=model, 
                      loss_function=criterion.outputs[0], 
                      eval_function=criterion.outputs[1], 
                      parameter_learners=learner)
    
    log_number_of_parameters(model)
    progress_printer = ProgressPrinter(tag='Training')
    

   
    t = 0
    for ii in range(1):
        epoch_end = (ii+1)*100
        while t < epoch_end:
            data = reader.next_minibatch(minibatch_size, input_map={
                    criterion.arguments[0]: reader.streams.query,
                    criterion.arguments[1]: reader.streams.slot_labels
                })
           
            t += data[criterion.arguments[0]].num_samples
            trainer.train_minibatch(data)
            progress_printer.update_with_trainer(trainer, with_metric=True)
            
            
        loss, metric, samples = progress_printer.epoch_summary(with_metric=True)
        print ([data[x].num_samples for x in criterion.arguments])
        #print (help(criterion))
        

train(reader, model)









    



Training 721479 parameters in 6 parameter tensors.
Finished Epoch [1]: [Training] loss = 4.839873 * 140, metric = 72.9% * 140
[64, 64]



In [4]:

    
Training 721479 parameters in 6 parameter tensors.
Finished Epoch [1]: [Training] loss = 1.109854 * 18029, metric = 21.0% * 18029
[70, 70]
Finished Epoch [2]: [Training] loss = 0.461001 * 17992, metric = 10.3% * 17992
[73, 73]
Finished Epoch [3]: [Training] loss = 0.305768 * 18030, metric = 6.5% * 18030
[67, 67]
Finished Epoch [4]: [Training] loss = 0.222351 * 18021, metric = 5.0% * 18021
[80, 80]
Finished Epoch [5]: [Training] loss = 0.163784 * 17946, metric = 3.8% * 17946
[74, 74]
Finished Epoch [6]: [Training] loss = 0.149376 * 18035, metric = 3.3% * 18035
[70, 70]
Finished Epoch [7]: [Training] loss = 0.123693 * 17961, metric = 2.7% * 17961
[78, 78]
Finished Epoch [8]: [Training] loss = 0.125279 * 17996, metric = 2.7% * 17996
[73, 73]
Finished Epoch [9]: [Training] loss = 0.085364 * 18038, metric = 1.8% * 18038
[80, 80]
Finished Epoch [10]: [Training] loss = 0.087199 * 18011, metric = 2.0% * 18011
[77, 77]









    



  File "<ipython-input-4-fed5c6ff40a2>", line 2
    Training 721479 parameters in 6 parameter tensors.
                  ^
SyntaxError: invalid syntax



In [17]:

    
def evaluate(reader, model):
    criterion = create_criterion_function(model)
    criterion.replace_placeholders({criterion.placeholders[0]: Input(num_labels)})

    # process minibatches and perform evaluation
    lr_schedule = learning_rate_schedule(1)
    momentum_as_time_constant = momentum_as_time_constant_schedule(0)
    dummy_learner = adam_sgd(criterion.parameters, 
                             lr=lr_schedule, momentum=momentum_as_time_constant, low_memory=True)
    evaluator = Trainer(model, criterion.outputs[0], criterion.outputs[1], dummy_learner)
    progress_printer = ProgressPrinter(tag='Evaluation')

    while True:
        minibatch_size = 1000
        data = reader.next_minibatch(minibatch_size, input_map={  # fetch minibatch
            criterion.arguments[0]: reader.streams.query,
            criterion.arguments[1]: reader.streams.slot_labels
        })
        if not data:                                 # until we hit the end
            break
        metric = evaluator.test_minibatch(data)
        progress_printer.update(0, data[criterion.arguments[1]].num_samples, metric) # log progress
    loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric

evaluate(reader, model)









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-17-0a0647381577> in <module>()
     25     return loss, metric
     26 
---> 27 from cntk.learner import UnitType
     28 evaluate(reader, model)

ImportError: cannot import name 'UnitType'



In [ ]: