In [9]:
from os.path import join
import jieba
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB,BernoulliNB,GaussianNB
import numpy as np
def get_train_data(path=''):
    X=[]
    y=[]
    with open(join(path,'pos.txt'),'r') as fr:
        for line in fr.readlines():
            X.append(' '.join(jieba.cut(line,cut_all=False)))
            y.append(1)
    with open(join(path,'neg.txt'),'r') as fr:
        for line in fr.readlines():
            X.append(' '.join(jieba.cut(line,cut_all=False)))
            y.append(0)
    return X,y

def bagOfWord(X):
    vectorizer = CountVectorizer(min_df=8, token_pattern=r"(?u)\b\w+\b")
    X = vectorizer.fit_transform(X)
    with open('./model/vectorizer.pkl','wb') as fr:
        print('save text vectorizer to ./model/')
        pickle.dump(vectorizer,fr)
    return X

def train_model(X=[],y=[],model=''):
    if model == 'GaussianNB':
        bayes = GaussianNB()
    elif model == 'Bernoulli':
        bayes = BernoulliNB()
    else:
        bayes = MultinomialNB()
    bayes.fit(X, y)
    print('saving bayes model to ./model')
    with open('./model/bayes.pkl','wb') as fr:
        pickle.dump(bayes,fr)

def train(train_path='',):
    X,y = get_train_data(path=train_path)
    X = bagOfWord(X)
    train_model(X,y)
def predict_sentence(s=''):
    with open('/home/bruce/model/vectorizer.pkl','rb') as f:
        vectorizer = pickle.load(f,encoding='latin1')
    with open('/home/bruce/model/bayes.pkl','rb') as f:
        bayes = pickle.load(f,encoding='latin1')
    s =[' '.join(jieba.cut(s, cut_all=False))]
    x = vectorizer.transform(s)
    predict = bayes.predict(x)
    print(predict)
    if predict ==1:
        print('positive')
    else:
        print('negtive')
#train(train_path = 'G:\\code\\DLNLP\\src\\data')

In [11]:
predict_sentence('这个东西感觉太贵了,我很不满意')


[0]
negtive

In [ ]: