In [5]:
import nltk
In [6]:
from nltk import word_tokenize
from nltk import pos_tag
from nltk.chunk import ne_chunk
text = "Jack and Jill went to Capitol Hill"
print ne_chunk(pos_tag(word_tokenize(text)))
In [7]:
t = ne_chunk(pos_tag(word_tokenize(text)))
%matplotlib inline
%pylab inline
nltk.draw.tree.demo()
t.draw()
In [8]:
import httplib, urllib
import os, csv
import codecs
import json
In [9]:
#GET /rest/v1/ping HTTP/1.1
#Host: api.rosette.com
#user_key: 40fe14de7872ebf3b8c5e11c17fb7a5f
#Content-Type: application/json
#Accept: application/json
def ping():
headers = { "user_key": "40fe14de7872ebf3b8c5e11c17fb7a5f",
"Content-Type": "application/json",
"Accept": "application/json"}
conn = httplib.HTTPSConnection("api.rosette.com")
conn.request("GET", "/rest/v1/ping", "", headers)
response = conn.getresponse()
data = response.read()
conn.close()
return data
#POST /rest/v1/entities HTTP/1.1
#Host: api.rosette.com
#user_key: 40fe14de7872ebf3b8c5e11c17fb7a5f
#Content-Type: application/json
#Accept: application/json
def get_entities(input_text):
headers = { "user_key": "40fe14de7872ebf3b8c5e11c17fb7a5f",
"Content-Type": "application/json",
"Accept": "application/json"}
conn = httplib.HTTPSConnection("api.rosette.com")
body = '{"type": "text", "content": "'+input_text+'"}'
conn.request("POST", "/rest/v1/entities", body, headers)
response = conn.getresponse()
data = response.read()
conn.close()
return data
#POST /rest/v1/morphology/complete HTTP/1.1
#Host: api.rosette.com
#user_key: 40fe14de7872ebf3b8c5e11c17fb7a5f
#Content-Type: application/json
#Accept: application/json
def get_morphology(input_text):
headers = { "user_key": "40fe14de7872ebf3b8c5e11c17fb7a5f",
"Content-Type": "application/json",
"Accept": "application/json"}
conn = httplib.HTTPSConnection("api.rosette.com")
body = '{"language": "eng", "content": "'+input_text+'"}'
conn.request("POST", "/rest/v1/morphology/parts-of-speech", body, headers) #complete
response = conn.getresponse()
data = response.read()
conn.close()
return data
#print ping()
#print get_entities("Mary had a little lamb.");
#print get_morphology("Mary had a little lamb.");
In [10]:
la = "LA beat New York last night. John was at the game. There are teams in the NHL."
ny = "I grew up on the town of Milton. I grew up on the town of Milton. Milton is a great place to live. Only in Milton will you find this."
cnn = "Their handling of politically perilous issues this week couldn't have been more different: Jeb Bush, persistently fielding questions in public, hemmed and hawed for days over Iraq, while Hillary Clinton stayed radio silent while her party waged an internal fight over trade."
jj = "Jack and Jill went to the Red River"
In [11]:
#ENTITIES FROM NLTK
def extract_entity_names(t):
entity_names = []
if hasattr(t, 'node') and t.node:
if t.node in ['NE','ORGANIZATION','PERSON','LOCATION','DATE','TIME','MONEY','PERCENT','FACILITY','GPE']:
entity_names.append((' '.join([child[0] for child in t]),t.node))
else:
for child in t:
entity_names.extend(extract_entity_names(child))
return entity_names
raw_response = ne_chunk(pos_tag(word_tokenize(jj)))
tuple_response = extract_entity_names(raw_response)
#set(tuple_response)
for e,t in tuple_response:
print e, "(" + t + ")"
In [12]:
#ENTITIES FROM ROSETTE API
raw_response = get_entities(jj)
json_response = json.loads(raw_response)
for e in json_response['entities']:
print e['mention'], "(" + e['type'] + ")"
In [ ]: