In [1]:
# -*- coding: utf-8 -*-
import os
LTP_DATA_DIR = '/Users/lipingzhang/Desktop/program/jd/jd_crm/jd_crm/data_prepare/ltp_data_v3.4.0/'  # ltp模型目录的路径
cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')  # 分词模型路径,模型名称为`cws.model`

from pyltp import Segmentor
segmentor = Segmentor()  # 初始化实例
segmentor.load(cws_model_path)  # 加载模型
words = segmentor.segment('元芳你怎么看')  # 分词
print '\t'.join(words)
segmentor.release()  # 释放模型


元芳	你	怎么	看

In [2]:
# -*- coding: utf-8 -*-
from pyltp import SentenceSplitter
sents = SentenceSplitter.split('元芳你怎么看?我就趴窗口上看呗!')  # 分句
print '\n'.join(sents)


元芳你怎么看?
我就趴窗口上看呗!

In [7]:
# -*- coding: utf-8 -*-
import os
LTP_DATA_DIR = '/Users/lipingzhang/Desktop/program/jd/jd_crm/jd_crm/data_prepare/ltp_data_v3.4.0/'  # ltp模型目录的路径
pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')  # 词性标注模型路径,模型名称为`pos.model`

from pyltp import Postagger
postagger = Postagger() # 初始化实例
postagger.load(pos_model_path)  # 加载模型

words = ['元芳', '你', '怎么', '看']  # 分词结果
postags = postagger.postag(words)  # 词性标注

print '\t'.join(postags)
print list(postags)
postagger.release()  # 释放模型


nh r r v
['nh', 'r', 'r', 'v']

In [9]:
cs = "cosine_sim_|及时:包装,高:性价比"
c = cs[len("cosine_sim_|"):]
print c


及时:包装,高:性价比

In [ ]: