In [80]:
import utilities
from utilities import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
import os
def test_sentence(sentence):
sentence = process_text(sentence)
vec = vectorizer.transform([sentence]).toarray()
prediction = model.predict_proba(vec)[0]
print "Label", label_names[np.argmax(prediction)]
for i, category in enumerate(label_names):
print "{}: {:.2f}".format(category, prediction[i])
# Constants
no_dev = False
binary = False
In [81]:
print "Get and process data"
# Get raw text + labels
features, labels, label_names = get_data(whatIWant='meta', binary=binary, no_dev=no_dev)
features = np.asanyarray(features)
# x sind die eingabematrizen, y sind die vektoren in denen die ergebnisse stehen
x_train, x_test, y_train, y_test = split_train_test(features, labels, ratio=0.7, shuffle=True)
print "Prepare one-hot-encoding"
# One-Hot-Encoding needed for Neural Net Output
y_train = one_hot_encoding(y_train)
y_test = one_hot_encoding(y_test)
In [86]:
label_names
Out[86]:
In [87]:
model = None
trained_model_filneame = 'modelMeta'
if trained_model_filneame not in os.listdir('.'):
# Struktur des Netzes
model = Sequential()
input_size = x_train.shape[1]
model.add(Dense(input_size, input_dim=input_size))
model.add(Activation('relu'))
model.add(Dense(input_size * 4))
model.add(Activation('relu'))
model.add(Dense(input_size * 4)) # Let's make it deeeep
model.add(Activation('relu'))
# Output Layer, one neuron per class
if binary:
model.add(Dense(2))
else:
if no_dev:
model.add(Dense(9))
else:
model.add(Dense(10)) # 9 without Skipped
# Softmax zum Normalisieren der Werte, damit Wert des Neurons WSK in % angibt
model.add(Activation('softmax'))
adam = Adam()
model.compile(metrics=['accuracy'], optimizer=adam, loss='categorical_crossentropy')
print "Built model from scratch"
else:
model = keras.models.load_model(trained_model_filneame)
print "Model was loaded from file"
In [88]:
print "Train model"
model.fit(x_train, y_train, nb_epoch=10, shuffle=True, verbose=True)
Out[88]:
In [89]:
# Get accuracy on test_set
print "Test on {} unknown samples".format(len(x_test))
acc = model.evaluate(x_test, y_test, verbose=0)
print "Loss, accuracy: ", acc
In [17]:
key_max = {}
data = api_call()
for i in xrange(len(data)):
for x in data[i]:
if not x in key_max:
key_max[x] = data[i][x]
else:
if data[i][x] > key_max[x]:
key_max[x] = data[i][x]
for x in key_max:
if x != 'readme' and x != 'description':
print x, key_max[x]
In [40]:
max(labels)
Out[40]:
In [ ]: