In [80]:
import utilities
from utilities import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
import os

def test_sentence(sentence):
    sentence = process_text(sentence)
    vec = vectorizer.transform([sentence]).toarray()
    prediction = model.predict_proba(vec)[0]
    print "Label", label_names[np.argmax(prediction)]
    for i, category in enumerate(label_names):
        print "{}: {:.2f}".format(category, prediction[i])

# Constants
no_dev = False
binary = False

In [81]:
print "Get and process data"
# Get raw text + labels
features, labels, label_names = get_data(whatIWant='meta', binary=binary, no_dev=no_dev)
features = np.asanyarray(features)

# x sind die eingabematrizen, y sind die vektoren in denen die ergebnisse stehen
x_train, x_test, y_train, y_test = split_train_test(features, labels, ratio=0.7, shuffle=True)

print "Prepare one-hot-encoding"
# One-Hot-Encoding needed for Neural Net Output
y_train = one_hot_encoding(y_train)
y_test = one_hot_encoding(y_test)


Get and process data
Prepare one-hot-encoding

In [86]:
label_names


Out[86]:
[u'SKIPPED',
 u'DEV',
 u'UNSURE',
 u'DATA',
 u'WEB',
 u'HW',
 u'OTHER',
 u'UNLABELED',
 u'DOCS',
 u'EDU']

In [84]:
model = None
trained_model_filneame = 'modelMeta'

if trained_model_filneame not in os.listdir('.'):
    # Struktur des Netzes
    model = Sequential()
    input_size = x_train.shape[1]
    model.add(Dense(input_size, input_dim=input_size))
    model.add(Activation('relu'))
    model.add(Dense(input_size * 4))
    model.add(Activation('relu'))
    model.add(Dense(input_size * 4)) # Let's make it deeeep
    model.add(Activation('relu'))
    # Output Layer, one neuron per class
    if binary:
        model.add(Dense(2))
    else:
        if no_dev:
            model.add(Dense(9))
        else:
            model.add(Dense(10)) # 9 without Skipped
    # Softmax zum Normalisieren der Werte, damit Wert des Neurons WSK in % angibt
    model.add(Activation('softmax'))
    adam = Adam()
    model.compile(metrics=['accuracy'], optimizer=adam, loss='categorical_crossentropy')
    print "Built model from scratch"
else:
    model = keras.models.load_model(trained_model_filneame)
    print "Model was loaded from file"


Built model from scratch

In [85]:
print "Train model"
model.fit(x_train, y_train, nb_epoch=10, shuffle=True, verbose=True)


Train model
---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-85-9fe697d8f0a8> in <module>()
      1 print "Train model"
----> 2 model.fit(x_train, y_train, nb_epoch=10, shuffle=True, verbose=True)

/Users/andreas/anaconda/lib/python2.7/site-packages/keras/models.pyc in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, **kwargs)
    625                               shuffle=shuffle,
    626                               class_weight=class_weight,
--> 627                               sample_weight=sample_weight)
    628 
    629     def evaluate(self, x, y, batch_size=32, verbose=1,

/Users/andreas/anaconda/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight)
   1050                                                            class_weight=class_weight,
   1051                                                            check_batch_dim=False,
-> 1052                                                            batch_size=batch_size)
   1053         # prepare validation data
   1054         if validation_data:

/Users/andreas/anaconda/lib/python2.7/site-packages/keras/engine/training.pyc in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_dim, batch_size)
    981                                    output_shapes,
    982                                    check_batch_dim=False,
--> 983                                    exception_prefix='model target')
    984         sample_weights = standardize_sample_weights(sample_weight,
    985                                                     self.output_names)

/Users/andreas/anaconda/lib/python2.7/site-packages/keras/engine/training.pyc in standardize_input_data(data, names, shapes, check_batch_dim, exception_prefix)
    109                                         ' to have shape ' + str(shapes[i]) +
    110                                         ' but got array with shape ' +
--> 111                                         str(array.shape))
    112     return arrays
    113 

Exception: Error when checking model target: expected activation_60 to have shape (None, 8) but got array with shape (301, 10)

In [78]:
# Get accuracy on test_set
print "Test on {} unknown samples".format(len(x_test))
acc = model.evaluate(x_test, y_test, verbose=0)
print "Loss, accuracy: ", acc


Test on 53 unknown samples
Loss, accuracy:  [3.4961194722157605, 0.094339622992951913]

In [17]:
key_max = {}
data = api_call()

for i in xrange(len(data)):
    for x in data[i]:
        if not x in key_max:
            key_max[x] = data[i][x]
        else:
            if data[i][x] > key_max[x]:
                key_max[x] = data[i][x]

for x in key_max:
    if x != 'readme' and x != 'description':
        print x, key_max[x]


files screenshot.png
hasDownloads 1
watches 9
folder_count 93
treeDepth 9
id 99
api_url https://api.github.com/repos/zzolo/dropbox-upstart
author zzolo
stars 964
forks 98
api_calls 99
folders tests
language_array VimL Ruby Shell
commit_count 98
commit_interval_avg 9
branch_count 9
contributors_count 9
class WEB
open_issues_count 9
name zfo-editor
language_main r
url https://github.com/zzolo/dropbox-upstart
avg_commit_length 99
hasWiki 1
treeArray  src srcmipruebagit
file_count 99
commit_interval_max 9
tagger stefan
isFork 1

In [40]:
max(labels)


Out[40]:
8

In [ ]: