In [ ]:
import utilities
from utilities import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
import os

def test_sentence(sentence):
    sentence = process_text(sentence)
    vec = vectorizer.transform([sentence]).toarray()
    prediction = model.predict_proba(vec)[0]
    print "Label", label_names[np.argmax(prediction)]
    for i, category in enumerate(label_names):
        print "{}: {:.2f}".format(category, prediction[i])

# Constants
no_dev = True
binary = False


Using Theano backend.

In [ ]:
print "Get and process data"
# Get raw text + labels
features, labels, label_names = get_data('readme', binary=binary, no_dev=no_dev)
features2, labels2, label_names2 = get_data('description', binary=binary, no_dev=no_dev)
features = features + features2
labels = labels + labels2
features, labels = shuffle_data(features, labels)
#features = np.matrix(features)

print "Vectorize data"
# trainingsdaten werden in eingabedaten (vektoren) umgewandelt
# features ist dann matrix bestehend aus den einzelnen vektore
features, vectorizer = vectorize_text(features, max_features=3500)

# x sind die eingabematrizen, y sind die vektoren in denen die ergebnisse stehen
x_train, x_test, y_train, y_test = split_train_test(features, labels, ratio=0.8, shuffle=True)

print "Prepare one-hot-encoding"
# One-Hot-Encoding needed for Neural Net Output
y_train = one_hot_encoding(y_train)
y_test = one_hot_encoding(y_test)


Get and process data

In [ ]:
model = keras.models.load_model('modelnondev')

In [ ]:
# Get accuracy on test_set
print "Test on {} unknown samples".format(len(x_test))
acc = model.evaluate(x_test, y_test, verbose=0)
print "Loss, accuracy: ", acc

In [ ]:
# Test model on some sentence
# 
test_sentence('This is my website api course library')