In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.externals import joblib
In [3]:
posts = load_files("./posts")
In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(posts.data, posts.target)
In [5]:
vectorizer = CountVectorizer()
In [6]:
vectorizer.fit(X_train)
Out[6]:
In [7]:
X_train_vectorized = vectorizer.transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)
In [8]:
clf = MLPClassifier()
In [9]:
clf.fit(X_train_vectorized, Y_train)
Out[9]:
In [10]:
clf.score(X_test_vectorized, Y_test)
Out[10]:
In [11]:
joblib.dump(vectorizer, 'count_vectorizer.joblib')
Out[11]:
In [12]:
joblib.dump(clf, 'mlp_post_classifier.joblib')
Out[12]:
In [ ]: