In [1]:
import sys
import json
sys.path.append('..')
import tagnews
import folium
In [2]:
df = tagnews.load_data()
In [3]:
crimetags = tagnews.CrimeTags()
geoextractor = tagnews.GeoCoder()
In [4]:
while True:
random_sample = df.sample(1)
article_text = random_sample['bodytext'].iloc[0]
if crimetags.tagtext_proba(article_text).max() < .15:
continue
# print(article_text)
geostrings = [' '.join(gs) for gs in geoextractor.extract_geostrings(article_text, prob_thresh=0.5)]
geocode_results = tagnews.get_lat_longs_from_geostrings(geostrings)
lat_longs_raw = geocode_results.lat_longs_raw
lat_longs_post = geocode_results.lat_longs_post
raw_scores = []
for gr in geocode_results.full_responses_raw:
try:
raw_scores.append(json.loads(gr.response.content)['result'][0]['score'])
except:
raw_scores.append(None)
post_scores = []
for gr in geocode_results.full_responses_post:
try:
post_scores.append(json.loads(gr.response.content)['result'][0]['score'])
except:
post_scores.append(None)
if not geostrings:
continue
print('Article ID: {}'.format(random_sample.index[0]))
m = folium.Map(location=[41.87871, -87.6298])
for geostring, lat_long_raw, lat_long_post, raw_score, post_score in zip(geostrings, lat_longs_raw, lat_longs_post, raw_scores, post_scores):
if lat_long_raw is None:
print(' Unable to code raw "{}"'.format(geostring))
else:
folium.Marker(lat_long_raw, popup=geostring + ' ; RAW ; {}'.format(raw_score)).add_to(m)
if lat_long_post is None:
print(' Unable to code post-processed "{}"'.format(geostring))
else:
folium.Marker(lat_long_post, popup=geostring + ' ; POST ; {}'.format(post_score)).add_to(m)
try:
print('{}: {}'.format(geostring, raw_score / post_score))
except:
print('{}: {}'.format(geostring, 'N/A'))
break
m
Out[4]:
In [5]:
print(article_text)
In [ ]: