In [1]:
import utilities
from utilities import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.models import load_model

def test_sentence(sentence):
    sentence = process_text(sentence)
    vec = vectorizer.transform([sentence]).toarray()
    print "Label", label_names[np.argmax(model.predict_proba(vec, verbose=False))]
    out = model.predict_proba(vec, verbose=False)[0]
    prob_dev = out[0]
    prob_not = out[1]
    print "Proba DEV: {0:.2f} Not DEV: {1:.2f}".format(prob_dev, prob_not)

# Constants
no_dev = False
binary = True


Using Theano backend.

In [2]:
print "Get and process data"
# Get raw text + labels
features, labels, label_names = get_data('readme', binary=binary, no_dev=no_dev)
features2, labels2, label_names2 = get_data('description', binary=binary, no_dev=no_dev)
features = features + features2
labels = labels + labels2
features, labels = shuffle_data(features, labels)
#features = np.matrix(features)

print "Vectorize data"
# trainingsdaten werden in eingabedaten (vektoren) umgewandelt
# features ist dann matrix bestehend aus den einzelnen vektore
features, vectorizer = vectorize_text(features, max_features=4000)

# x sind die eingabematrizen, y sind die vektoren in denen die ergebnisse stehen
x_train, x_test, y_train, y_test = split_train_test(features, labels, ratio=0.8, shuffle=True)

print "Prepare one-hot-encoding"
# One-Hot-Encoding needed for Neural Net Output
y_train = one_hot_encoding(y_train)
y_test = one_hot_encoding(y_test)


Get and process data
Vectorize data
Prepare one-hot-encoding

In [3]:
model = load_model('modelrmanddescr5epoch')

In [6]:
test_sentence('This is my website')


Label NOTDEV
Proba DEV: 0.00 Not DEV: 1.00

In [ ]: