In [ ]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from sklearn import metrics
import pandas
import tensorflow as tf
from tensorflow.contrib import learn
import chainer.functions as F
import chainer.links as L
from chainer import optimizers, Chain
from commonml.skchainer import ChainerEstimator, SoftmaxCrossEntropyClassifier, RnnEstimator
from commonml.text import VocabularyTransformer
import logging
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
logging.root.level = 20
In [ ]:
dbpedia = learn.datasets.load_dataset('dbpedia')
X_train, y_train = pandas.DataFrame(dbpedia.train.data)[1], pandas.Series(dbpedia.train.target)
X_test, y_test = pandas.DataFrame(dbpedia.test.data)[1], pandas.Series(dbpedia.test.target)
In [ ]:
MAX_DOCUMENT_LENGTH = 10
vocab_transformer = VocabularyTransformer(MAX_DOCUMENT_LENGTH)
X_train_idx = np.array(list(vocab_transformer.fit_transform(X_train)))
X_test_idx = np.array(list(vocab_transformer.transform(X_test)))
n_words = len(vocab_transformer.get_feature_names())
print('Total words: %d' % n_words)
In [ ]:
EMBEDDING_SIZE = 50
class RNNModel(Chain):
def __init__(self, n_words, n_units, n_classes):
super(RNNModel, self).__init__(embed=F.EmbedID(n_words, n_units),
l1=L.LSTM(n_units, n_units),
l2=L.Linear(n_units, n_classes),
)
def __call__(self, x):
h0 = self.embed(x)
h1 = self.l1(h0)
h2 = self.l2(h1)
return h2
def reset_state(self):
self.l1.reset_state()
classifier = RnnEstimator(model=SoftmaxCrossEntropyClassifier(RNNModel(n_words, EMBEDDING_SIZE, 15)),
optimizer=optimizers.Adam(),
batch_size=1000,
device=0,
stop_trigger=(100, 'epoch'))
classifier.fit(X_train_idx, y_train)
score = metrics.accuracy_score(y_test, classifier.predict(X_test_idx))
print('Accuracy: {0:f}'.format(score))