In [1]:
# -*- coding: utf-8 -*-
import os
LTP_DATA_DIR = '/Users/lipingzhang/Desktop/program/jd/jd_crm/jd_crm/data_prepare/ltp_data_v3.4.0/' # ltp模型目录的路径
cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model') # 分词模型路径,模型名称为`cws.model`
from pyltp import Segmentor
segmentor = Segmentor() # 初始化实例
segmentor.load(cws_model_path) # 加载模型
words = segmentor.segment('元芳你怎么看') # 分词
print '\t'.join(words)
segmentor.release() # 释放模型
In [2]:
# -*- coding: utf-8 -*-
from pyltp import SentenceSplitter
sents = SentenceSplitter.split('元芳你怎么看?我就趴窗口上看呗!') # 分句
print '\n'.join(sents)
In [7]:
# -*- coding: utf-8 -*-
import os
LTP_DATA_DIR = '/Users/lipingzhang/Desktop/program/jd/jd_crm/jd_crm/data_prepare/ltp_data_v3.4.0/' # ltp模型目录的路径
pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model') # 词性标注模型路径,模型名称为`pos.model`
from pyltp import Postagger
postagger = Postagger() # 初始化实例
postagger.load(pos_model_path) # 加载模型
words = ['元芳', '你', '怎么', '看'] # 分词结果
postags = postagger.postag(words) # 词性标注
print '\t'.join(postags)
print list(postags)
postagger.release() # 释放模型
In [9]:
cs = "cosine_sim_|及时:包装,高:性价比"
c = cs[len("cosine_sim_|"):]
print c
In [ ]: