notebook.community

Edit and run



In [1]:

    
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import numpy as np
import time, os, json
# Customized tools
from lib import *
from jieba import cut_for_search, add_word, suggest_freq, del_word

from IPython.display import clear_output









    



Model loaded succeed
>> Synonyms on loading ...



In [2]:

    
result_dump = 'result_dump/test.csv'
result = pd.read_csv(result_dump)

corpus_2_list = json.load(open('Classifier/corpus2.json', 'r'))
corpus_3_list = json.load(open('Classifier/corpus3.json', 'r'))
corpus_add_2 = json.load(open('Classifier/corpus_add2.json', 'r'))
corpus_add_3 = json.load(open('Classifier/corpus_add3.json', 'r'))

trans = Transformer(corpus_2_list, corpus_3_list,
        corpus_add_2, corpus_add_3)









    



Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 1.777 seconds.
Prefix dict has been built succesfully.



In [3]:

    
n_total = result.shape[0]
for i, row in result.iterrows():
#     if (row.label == 100) and (trans.get_feature(row.title)[1] > 0):
    if True:
        clear_output()
        print('TITLE: {}'.format(row.title))
        print('LABEL: {}'.format(row.label))
        print('TOKEN:\n{}'.format(trans.tokenize(row.title)))
        print('FEATURE: {}'.format(trans.get_feature(row.title)))
        stdin = input('=== {}/{}. Press Enter to Continue ==='.format(i+1, n_total))
        if stdin == 'stop':
            break









    



TITLE: 意外的恋爱时光_电影_高清1080P在线观看_腾讯视频
LABEL: 100.0
TOKEN:
{'恋爱', '电影', '腾讯', '时光', '观看', '_', '1080p', '意外', '在线', '高清', '视频', '的'}
FEATURE: [0 0 4 0]
=== 33/100. Press Enter to Continue ===stop



In [97]:

    
del_word('台中')
print('/'.join(cut_for_search('「台中」正确应该不会被切开')))
add_word('台中')
print('/'.join(cut_for_search('「台中」正确应该不会被切开')))









    



「/台/中/」/正确/应该/不会/被/切开
「/台中/」/正确/应该/不会/被/切开



In [8]:

    
import json
with open('Classifier/corpus_add2.json', 'r') as f:
    corp_add2 = json.load(f)



In [ ]:

    
new_list = ['内幕', '不可告人', '嘿咻', '嘿嘿', '']



In [9]:

    
r.append()









    Out[9]:





['pa']

Table of Contents