In [1]:
import sys
import json
sys.path.append('..')

import tagnews
import folium

In [2]:
df = tagnews.load_data()


..\tagnews\utils\load_data.py:185: RuntimeWarning: 1 location strings were not found in the bodytext.
  RuntimeWarning)

In [3]:
crimetags = tagnews.CrimeTags()
geoextractor = tagnews.GeoCoder()

In [4]:
while True:
    random_sample = df.sample(1)
    article_text = random_sample['bodytext'].iloc[0]
    if crimetags.tagtext_proba(article_text).max() < .15:
        continue
    # print(article_text)
    geostrings = [' '.join(gs) for gs in geoextractor.extract_geostrings(article_text, prob_thresh=0.5)]
    geocode_results = tagnews.get_lat_longs_from_geostrings(geostrings)
    lat_longs_raw = geocode_results.lat_longs_raw
    lat_longs_post = geocode_results.lat_longs_post
    
    raw_scores = []
    for gr in geocode_results.full_responses_raw:
        try:
            raw_scores.append(json.loads(gr.response.content)['result'][0]['score'])
        except:
            raw_scores.append(None)
    post_scores = []
    for gr in geocode_results.full_responses_post:
        try:
            post_scores.append(json.loads(gr.response.content)['result'][0]['score'])
        except:
            post_scores.append(None)

    if not geostrings:
        continue
    
    print('Article ID: {}'.format(random_sample.index[0]))

    m = folium.Map(location=[41.87871, -87.6298])

    for geostring, lat_long_raw, lat_long_post, raw_score, post_score in zip(geostrings, lat_longs_raw, lat_longs_post, raw_scores, post_scores):
        if lat_long_raw is None:
            print('  Unable to code raw "{}"'.format(geostring))
        else:
            folium.Marker(lat_long_raw, popup=geostring + ' ; RAW ; {}'.format(raw_score)).add_to(m)
        
        if lat_long_post is None:
            print('  Unable to code post-processed "{}"'.format(geostring))
        else:
            folium.Marker(lat_long_post, popup=geostring + ' ; POST ; {}'.format(post_score)).add_to(m)
        
        try:
            print('{}: {}'.format(geostring, raw_score / post_score))
        except:
            print('{}: {}'.format(geostring, 'N/A'))
    break

m


Article ID: 205277
south: 6.251640619617706
Ind.,: 3.4382634669318946
1800 block of East 222nd Place: 1.1430729818468413
1700 block of West 220th Place,: 1.5479177721231407
Out[4]:

In [5]:
print(article_text)


**CHICAGO (STMW) --** A 23-year-old man who was shot in south suburban Sauk Vill
age died early Thursday.

Manuel G. Montoya was pronounced dead at 1:19 a.m. at St. Margaret Hospital in
Dyer, Ind., a Lake County coroner’s office statement said.

He was shot in the 1800 block of East 222nd Place in Sauk Village, but police
and a representative at the coroner’s office could not say when the shooting
happened.

He died of a gunshot wound, and his death was ruled a homicide, the coroner’s
office said.

Montoya lived in the 1700 block of West 220th Place, about half a mile
northeast of the shooting.

Sauk Village police could not provide further details early Thursday.

_(Source: Sun-Times Media Wire (C) Chicago Sun-Times 2015. All Rights
Reserved. This material may not be published, broadcast, rewritten, or
redistributed.)_

![][1]

   [1]: http://pixel.wp.com/b.gif?host=chicago.cbslocal.com&blog=15116062&post=6
49158&subd=cbschicago&ref=&feed=1


In [ ]: