notebook.community

Edit and run



In [1]:

    
from __future__ import unicode_literals
import requests
import json
import requests
from bson.objectid import ObjectId
import re
from pymongo import MongoClient
import tangelo
import glob
# import pandas as pd
from pyelasticsearch import ElasticSearch
import sys, os
import utilities

# read in config file
from ConfigParser import ConfigParser
#__location__ = os.path.realpath(
#            os.path.join(os.getcwd(), os.path.dirname(__file__)))
config_file = glob.glob('config.ini')
parser = ConfigParser()
parser.read(config_file)
mitie_directory = parser.get('Locations', 'mitie_directory')
country_endpoint = parser.get('Endpoints', 'country_endpoint')



sys.path.append(mitie_directory)
from mitie import *

es = ElasticSearch(urls='http://localhost:9200', timeout=60, max_retries=2)


country_names = ["Afghanistan","Åland Islands","Albania","Algeria","American Samoa",
                 "Andorra","Angola","Anguilla","Antarctica","Antigua and Barbuda",
                 "Argentina","Armenia","Aruba","Ascension Island","Australia","Austria",
                 "Azerbaijan","Bahamas","Bahrain","Bangladesh","Barbados","Belarus",
                 "Belgium","Belize","Benin","Bermuda","Bhutan","Bolivia",
                 "Bonaire, Sint Eustatius, and Saba","Bosnia and Herzegovina","Botswana",
                 "Bouvet Island","Brazil","Britain","Great Britain", "British Indian Ocean Territory",
                 "British Virgin Islands","Brunei","Bulgaria","Burkina Faso","Burundi","Cambodia",
                 "Cameroon","Canada","Canary Islands","Cape Verde","Cayman Islands","Central African Republic",
                 "Ceuta and Melilla","Chad","Chile","China","Christmas Island","Clipperton Island",
                 "Cocos [Keeling] Islands","Colombia","Comoros","Congo - Brazzaville","Congo - Kinshasa","Congo",
                 "Democratic Republic of Congo", "Cook Islands","Costa Rica","Côte d’Ivoire","Croatia","Cuba",
                 "Curaçao","Cyprus","Czech Republic","Denmark","Diego Garcia","Djibouti","Dominica",
                 "Dominican Republic","Ecuador","Egypt","El Salvador","Equatorial Guinea","Eritrea",
                 "Estonia","Ethiopia","European Union","Falkland Islands","Faroe Islands","Fiji","Finland",
                 "France","French Guiana","French Polynesia","French Southern Territories","Gabon","Gambia",
                 "Gaza","Georgia","Germany","Ghana","Gibraltar","Greece","Greenland","Grenada","Guadeloupe",
                 "Guam","Guatemala","Guernsey","Guinea","Guinea-Bissau","Guyana","Haiti",
                 "Heard Island and McDonald Islands","Honduras","Hong Kong SAR China","Hungary","Iceland",
                 "India","Indonesia","Iran","Iraq","Ireland","Isle of Man","Israel","Italy","Jamaica","Japan",
                 "Jersey","Jordan","Kazakhstan","Kenya","Kiribati","Kuwait","Kyrgyzstan","Laos","Latvia","Lebanon",
                 "Lesotho","Liberia","Libya","Liechtenstein","Lithuania","Luxembourg","Macau SAR China","Macedonia",
                 "Madagascar","Malawi","Malaysia","Maldives","Mali","Malta","Marshall Islands","Martinique","Mauritania",
                 "Mauritius","Mayotte","Mexico","Micronesia","Moldova","Monaco","Mongolia","Montenegro","Montserrat",
                 "Morocco","Mozambique","Myanmar [Burma]","Namibia","Nauru","Nepal","Netherlands","Netherlands Antilles",
                 "New Caledonia","New Zealand","Nicaragua","Niger","Nigeria","Niue","Norfolk Island","North Korea",
                 "Northern Ireland", "Northern Mariana Islands","Norway","Oman","Outlying Oceania","Pakistan","Palau",
                 "Palestinian Territories","Panama","Papua New Guinea","Paraguay","Peru","Philippines","Pitcairn Islands",
                 "Poland","Portugal","Puerto Rico","Qatar","Réunion","Romania","Russia","Rwanda","Saint Barthélemy",
                 "Saint Helena","Saint Kitts and Nevis","Saint Lucia","Saint Martin","Saint Pierre and Miquelon",
                 "Saint Vincent and the Grenadines","Samoa","San Marino","São Tomé and Príncipe","Saudi Arabia",
                 "Senegal","Serbia","Serbia and Montenegro","Seychelles","Sierra Leone","Singapore","Sint Maarten",
                 "Slovakia","Slovenia","Solomon Islands","Somalia","South Africa",
                 "South Georgia and the South Sandwich Islands","South Korea","South Sudan","Spain","Sri Lanka",
                 "Sudan","Suriname","Svalbard and Jan Mayen","Swaziland","Sweden","Switzerland","Syria","Taiwan",
                 "Tajikistan","Tanzania","Thailand","Timor-Leste","Togo","Tokelau","Tonga","Trinidad and Tobago",
                 "Tristan da Cunha","Tunisia","Turkey","Turkmenistan","Turks and Caicos Islands","Tuvalu",
                 "U.S. Minor Outlying Islands","U.S. Virgin Islands","Uganda","Ukraine","United Arab Emirates",
                 "United Kingdom","UK","United States","USA", "United States of America", "Uruguay","Uzbekistan",
                 "Vanuatu","Vatican City","Venezuela","Vietnam","Wallis and Futuna","Western Sahara","Yemen",
                 "Zambia","Zimbabwe", "Europe", "America", "Africa", "Asia", "North America", "South America",
                 "United Nations","UN"]





@tangelo.restful
def get():
    return """
    This service expects a POST in the form '{"text":"On 12 August, the BBC reported that..."}'
    
    It will return the places mentioned in the text along with their latitudes and longitudes in the form: 
        {"lat":34.567, "lon":12.345, "seachterm":"Baghdad", "placename":"Baghdad", "countrycode":"IRQ"}
    """



In [2]:

    
def pick_best_result(results, term):
# Given a search term and the elasticsearch/geonames result from that search, return the best lat, lon, searchterm, place name
    loc = []
    try:
        results = results['hits']['hits']
    except:
        return []
    if len(results) < 1:
    # end if there are no results
        return []
    # This is a big chunk of conditional logic to favor different results depending on what terms are in the 
    #  original term. This is all obviously Syria and Iraq specific.

# Governorate/Province Search
    elif re.search("Governorate|Province|Wilayah", term):
        # look for top-level ADM1 code
        for r in results:
            if r['_source']['feature_code'] == 'ADM1':
                coords = r['_source']['coordinates'].split(",")
                loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                if loc:
                    return loc
        # Failing that, take an area
        if loc == []:
            for r in results:
                if r['_source']['feature_class'] == 'A':
                    coords = r['_source']['coordinates'].split(",")
                    loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                    if loc:
                        return loc
        # Failing that, take an inhabited place
        if loc == []:
            for r in results:
                if r['_source']['feature_class'] == 'P':
                    coords = r['_source']['coordinates'].split(",")
                    loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                    if loc:
                        return loc
        # last resort, just take the first result.
        if loc == []:
            coords = results[0]['_source']['coordinates'].split(",")
            loc = [float(coords[0]), float(coords[1]), term, results[0]['_source']['asciiname'], results[0]['_source']['feature_class'], results[0]['_source']['country_code3']]
            return loc
# District search
    elif re.search("District", term):
         # take places that are areas
        ## define the default up here at the top?
        for r in results:
            if r['_source']['feature_class'] == 'A':
                coords = r['_source']['coordinates'].split(",")
                loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                if loc:
                    return loc
        # Failing that, take an inhabited place
        if loc == []:
            for r in results:
                if r['_source']['feature_class'] == 'P':
                    coords = r['_source']['coordinates'].split(",")
                    loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                    if loc:
                        return loc
        # last resort, just take the first place result.
        if loc == []:
            coords = results[0]['_source']['coordinates'].split(",")
            loc = [float(coords[0]), float(coords[1]), term, results[0]['_source']['asciiname'], results[0]['_source']['feature_class'], results[0]['_source']['country_code3']]
            return loc
# Subdistrict search    
    elif re.search("Subdistrict", term):
         # take places that are areas
        ## define the default up here at the top?
        for r in results:
            if r['_source']['feature_class'] == 'P':
                coords = r['_source']['coordinates'].split(",")
                loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                if loc:
                    return loc
        # Failing that, take an inhabited place
        if loc == []:
            for r in results:
                if r['_source']['feature_class'] == 'P':
                    coords = r['_source']['coordinates'].split(",")
                    loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                    if loc:
                        return loc
        # last resort, just take the first result.
        if loc == []:
            coords = results[0]['_source']['coordinates'].split(",")
            loc = [float(coords[0]), float(coords[1]), term, results[0]['_source']['asciiname'], results[0]['_source']['feature_class'], results[0]['_source']['country_code3']]
            return loc
# Airport search    
    elif re.search("Airport", term):
        for r in results:
            if r['_source']['feature_class'] == 'S':
                coords = r['_source']['coordinates'].split(",")
                loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                if loc:
                    return loc
        # Failing that, take an inhabited place
        if loc == []:
            for r in results:
                if r['_source']['feature_class'] == 'P':
                    coords = r['_source']['coordinates'].split(",")
                    loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                    if loc:
                        return loc
        if loc == []:
            coords = results[0]['_source']['coordinates'].split(",")
            loc = [float(coords[0]), float(coords[1]), term, results[0]['_source']['asciiname'], results[0]['_source']['feature_class'], results[0]['_source']['country_code3']]
            return loc

# final condition: if it doesn't have any special terms, just take the first result. 
# Not sure whether this should pick a city instead. Example: "Aleppo" should go to Aleppo the city. 
# But switching makes Damascus resolve to the wrong place, since the city of Damascus doesn't make it into the top 10 for some reason.
# But definitely don't take bodies of water
    else:
        for r in results:
            if r['_source']['feature_code'] == 'PPLA':
                coords = r['_source']['coordinates'].split(",")
                loc = [float(coords[0]), float(coords[1]), term, r['_source']['asciiname'], r['_source']['feature_class'], r['_source']['country_code3']]
                if loc:
                    return loc
        if loc == []:
            coords = results[0]['_source']['coordinates'].split(",")
            loc = [float(coords[0]), float(coords[1]), term, results[0]['_source']['asciiname'], results[0]['_source']['feature_class'], results[0]['_source']['country_code3']]
            return loc



In [53]:

    
P_list = ("city", "town", "village", "settlement", "capital", "cities", "villages", "towns", "neighborhood", "neighborhoods")
A_list = ("governorate", "province", "muhafazat")

# 
def subset_results(results, feature_class):
    for r in results:
        if r['_source']['feature_class'] == feature_class:
            return r

# Is there an exact match?
def check_names(results, term):
    new_results = []
    for r in results:
        if r['_source']['name'].lower() == term.lower():
            return r
    
## Filter based on the context.
## Then take edit distance.
    
    
def pick_best_result2(results, term, context):
    results = results['hits']['hits']
    context = set([x.lower() for x in context])
    
    #if context.intersection(P_list):
    #    print "city"
    #    place = subset_results(results, 'P')
    
    place = check_names(results, term) 
    
    if not place:
        print "No nothing"
        try:
            place = results[0]
        except IndexError:
            return []
    
    #place = results[0]
    coords = place['_source']['coordinates'].split(",")
    print "I'm at the end"
    loc = [float(coords[0]), float(coords[1]), term, place['_source']['asciiname'], place['_source']['feature_class'], place['_source']['country_code3']]
    return loc
   

def extract_feature_class(results, term, context):
    context = set([x.lower() for x in context])
    
    if context.intersection(P_list):
        return ['P']
    if context.intersection(A_list):
        return ['A']
    else:
        return ['A', 'P', 'S']



In [54]:

    
place_cache = {}

def places(text):
    #params = json.loads(tangelo.request_body().read())
    #text  = params['text']
    locations = []
    
    try:
        country = requests.post(country_endpoint, data=json.dumps({"text":text}))
        country_filter = [country.text]
        print country_filter
    except ValueError:
        return json.dumps(locations)

    out = utilities.mitie_context(text)
    
    for i in out['entities']:        
        if i['text'] in country_names:
             print " (Country/blacklist. Skipping...)"
        elif i['tag'] == "LOCATION" or i['tag'] == "Location":
            print i
            try:
                searchterm = re.sub(r"Governorate|District|Subdistrict|Airport", "", i['text']).strip() #put this in query_geonames?
                searchterm = re.sub("Dar 'a", "Dar'a", searchterm)
                feature_class = extract_feature_class(searchterm, i['text'], i['context'])
                cache_term = '___'.join([searchterm, ''.join(feature_class)])
                print cache_term
                try:
                    t = place_cache[cache_term]
                except KeyError:
                    t = utilities.query_geonames_featureclass(searchterm, country_filter, feature_class)
                    place_cache[cache_term] = t
                for n in t['hits']['hits']:
                    print n['_source'][u'name']
                print extract_feature_class(t, i['text'], i['context'])
                loc = pick_best_result2(t, i['text'], i['context'])
                # loc is a nice format for debugging and looks like [35.13179, 36.75783, 'searchterm', u'matchname', u'feature_class', u'country_code3']: 
                if loc:
                    formatted_loc = {"lat":loc[0], "lon":loc[1], "searchterm":loc[2], "placename":loc[3], "countrycode":loc[5]}
                    locations.append(formatted_loc)
            except Exception as e:
                print e

    print "Place cache is ",
    print len(place_cache)
    return json.dumps(locations)
    #print place_cache



In [55]:

    
places("When you travel alone, you are completely on your own schedule, which means you are free to do what you want to do—like follow a series of clues and piece together your father’s murder, which transpired 15 years ago. When you finally track the killer down in Zagreb, it will be your decision whether or not you want to give him mercy—not your travel buddy’s!")









    



[u'HRV']
{u'text': 'Zagreb', u'tag': u'LOCATION', u'score': 1.8976858716170544, u'context': ['killer', 'will', 'down', 'it', 'in', ',']}
Zagreb___APS
Zagreb
Zagreb
Westin Zagreb
Zagreb / Maksimir
Zagreb - Centar
Zagreb Airport
Grad Zagreb
Novi Zagreb
Zagreb County
Zagreb Cathedral
[u'A', u'P', u'S']
I'm at the end
Place cache is  1






    Out[55]:





'[{"lat": 45.81303, "searchterm": "Zagreb", "lon": 15.9779, "countrycode": "HRV", "placename": "Zagreb"}]'



In [56]:

    
places("Security sources told Reuters that Egyptian intelligence services had provided Libyan authorities with information that helped them free the Ethiopians who had been held by armed groups in the cities of Derna and Misrata.")









    



[u'LBY']
{u'text': 'Derna', u'tag': u'LOCATION', u'score': 0.5325049016869093, u'context': ['the', '.', 'cities', 'Misrata', 'of', 'and']}
Derna___P
Darnah
[u'P']
No nothing
I'm at the end
{u'text': 'Misrata', u'tag': u'LOCATION', u'score': 0.3382476299099453, u'context': ['of', 'Derna', 'and', '.']}
Misrata___APS
Mişrātah
[u'A', u'P', u'S']
No nothing
I'm at the end
Place cache is  3






    Out[56]:





'[{"lat": 32.76704, "searchterm": "Derna", "lon": 22.63669, "countrycode": "LBY", "placename": "Darnah"}, {"lat": 32.37535, "searchterm": "Misrata", "lon": 15.09254, "countrycode": "LBY", "placename": "Misratah"}]'



In [57]:

    
places("Another day and another government airstrike on insurgent-held districts in the benighted historic city of Aleppo. What was once the proud commercial capital of Syria is now suffering an intensifying blitz from the Syrian regime, and according to a report released today by Amnesty International, the attacks amount to war crimes and crimes against humanity.")









    



[u'SYR']
{u'text': 'Aleppo', u'tag': u'LOCATION', u'score': 1.5950356856596992, u'context': ['historic', 'was', 'city', 'What', 'of', '.']}
Aleppo___P
Aleppo
[u'P']
I'm at the end
 (Country/blacklist. Skipping...)
Place cache is  4






    Out[57]:





'[{"lat": 36.20124, "searchterm": "Aleppo", "lon": 37.16117, "countrycode": "SYR", "placename": "Aleppo"}]'



In [58]:

    
places("(Reuters) - A senior Syrian army officer was wounded in a suicide bombing in a central Damascus district on Monday, a monitoring group said, though the military denied the report.")









    



[u'SYR']
{u'text': 'Damascus', u'tag': u'LOCATION', u'score': 0.9358316976113451, u'context': ['in', 'Monday', 'a', 'on', 'central', 'district']}
Damascus___APS
Damascus
Sheraton Damascus
Dedeman Damascus
Damascus Governorate
Damascus Int. Airport
Damascus Airport Hotel
Minţaqat Dimashq
Muḩāfaz̧at Rīf Dimashq
Dimashq al Qadīmah
Maḩaţţat Barāmikah
[u'A', u'P', u'S']
I'm at the end
Place cache is  5






    Out[58]:





'[{"lat": 33.5102, "searchterm": "Damascus", "lon": 36.29128, "countrycode": "SYR", "placename": "Damascus"}]'



In [47]:

    
t = places("(Reuters) - Nepalese soldiers and villagers dug through snow mounds in a remote hamlet on Wednesday in search of scores of bodies of villagers and trekkers believed to be buried in an avalanche set off by last month's devastating earthquake, officials said. The death toll from the April 25 quake in the Himalayan mountain nation has reached 7,675, with more than 16,300 people injured, the government said. As rescuers hunted for more than 180 bodies in the village of Langtang, 60 km (37 miles) north of the capital Kathmandu, seven bodies including of that of a German trekker were recovered at Manaslu, another climbing site.")
t









    



[u'NPL']
{u'text': 'Langtang', u'tag': u'LOCATION', u'score': 1.3839311136406052, u'context': ['the', 'km', 'village', '60', 'of', ',']}
Langtang___P
Lāngtāng
Langtang Yubra
Langtang Lirung
Langtang Ri
[u'P']
city
No nothing
I'm at the end
{u'text': 'Kathmandu', u'tag': u'LOCATION', u'score': 1.5129605263692898, u'context': ['of', 'bodies', 'the', 'seven', 'capital', ',']}
Kathmandu___P
Kathmandu
[u'P']
city
I'm at the end
{u'text': 'Manaslu', u'tag': u'LOCATION', u'score': 0.7414735312396085, u'context': ['were', 'climbing', 'recovered', 'another', 'at', ',']}
Manaslu___APS
Manaslu
[u'A', u'P', u'S']
I'm at the end
Place cache is  6






    Out[47]:





'[{"lat": 28.21717, "searchterm": "Langtang", "lon": 85.52262, "countrycode": "NPL", "placename": "Langtang"}, {"lat": 27.70169, "searchterm": "Kathmandu", "lon": 85.3206, "countrycode": "NPL", "placename": "Kathmandu"}, {"lat": 27.71925, "searchterm": "Manaslu", "lon": 85.3211, "countrycode": "NPL", "placename": "Manaslu"}]'



In [59]:

    
places("Burundi protesters burned a man to death in the capital on Thursday, accusing him of being a member of the ruling party's Imbonerakure youth wing and saying the group had launched attacks on them, a witness and local media reported. They put tires around his neck and then burned him,' a witness told Reuters after seeing the incident in the Nyakabiga district of Bujumbura, one of the flashpoint areas during protests against the president's bid for a third term.")









    



[u'BDI']
 (Country/blacklist. Skipping...)
{u'text': 'Bujumbura', u'tag': u'LOCATION', u'score': 1.2471736498700576, u'context': ['Nyakabiga', 'of', 'district', 'one', 'of', ',']}
Bujumbura___APS
Bujumbura
College Bujumbura
Phare Bujumbura
Hôpital Bujumbura
École Bujumbura
Cathédrale Bujumbura
Stade Bujumbura
Monument Bujumbura
Dispensaire Bujumbura
Hôpital Bujumbura
[u'A', u'P', u'S']
I'm at the end
Place cache is  6






    Out[59]:





'[{"lat": -3.3822, "searchterm": "Bujumbura", "lon": 29.3644, "countrycode": "BDI", "placename": "Bujumbura"}]'



In [50]:

    
places("U.S. airstrikes helped Kurdish and Iraqi forces take control of Mosul Dam on Monday, fighting back ISIS militants who had seized the dam, President Obama told reporters. The stakes were huge for the millions of Iraqis who live downstream from the dam, the largest in the country. 'If that dam was breached it could have proven catastrophic, with floods that would have threatened the lives of thousands of civilians and endangered our embassy compound in Baghdad,' the President said.")









    



[u'IRQ']
{u'text': 'U.S.', u'tag': u'LOCATION', u'score': 1.1145741468376487, u'context': ['President', 'Kurdish', 'said', 'helped', '.', 'airstrikes']}
U.S.___APS
[u'A', u'P', u'S']
No nothing
{u'text': 'Mosul Dam', u'tag': u'LOCATION', u'score': 0.7705441219995173, u'context': ['take', ',', 'control', 'Monday', 'of', 'on']}
Mosul Dam___APS
Mosul
Mosul
University of Mosul
Mosul International Airport
Mosul Mediumwave Transmitter
Adhaim Dam
Haditha Dam
Ramadi Dam
Derbandikhan Dam
Dām Akuraydī
[u'A', u'P', u'S']
No nothing
I'm at the end
{u'text': 'Baghdad', u'tag': u'LOCATION', u'score': 1.631878177744647, u'context': ['embassy', 'the', 'compound', "'", 'in', ',']}
Baghdad___APS
Baghdad
Baghdad
Baghdad
New Baghdad
Muḩāfaz̧at Baghdād
Baghdad Tower
Baghdad Hotel
Baghdād al Qadīmah
Baghdad Clock Tower
Nāḩiyat Baghdād al Jadīdah
[u'A', u'P', u'S']
I'm at the end
Place cache is  10






    Out[50]:





'[{"lat": 36.335, "searchterm": "Mosul Dam", "lon": 43.11889, "countrycode": "IRQ", "placename": "Mosul"}, {"lat": 33.23333, "searchterm": "Baghdad", "lon": 44.23333, "countrycode": "IRQ", "placename": "Baghdad"}]'



In [104]:

    
import utilities
utilities.query_geonames_featureclass("Aleppo", ["IRQ", "SYR"], ["S", "A", "P"])









    Out[104]:





{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'nE3D4OUWRGWkCyOYkaHyOA',
    u'_index': u'geonames',
    u'_score': 3.022962,
    u'_source': {u'admin1_code': u'09',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'ALP,Alep,Alepas,Alepo,Aleppe,Aleppo,Aleppu,Alepp\xf3,Al\xe8p,Berea,Beroea,Chalepi,Gorad Khaleb,Halab,Haleb,Haleba,Halep,Heleb,H\u0259l\u0259b,Khaleb,Khaleb khot,Khalepion,Madinat Halab,Mad\u012bnat \u1e28alab,Xalab,a lei po,aleppea,aleppo,allepo,areppo,halaba,hlb,xa lep po,\u03a7\u03b1\u03bb\u03ad\u03c0\u03b9,\u0410\u043b\u0435\u043f,\u0410\u043b\u0435\u043f\u043f\u043e,\u0413\u043e\u0440\u0430\u0434 \u0425\u0430\u043b\u0435\u0431,\u0425\u0430\u043b\u0435\u0431,\u0425\u0430\u043b\u0435\u0431 \u0445\u043e\u0442,\u0425\u04c0\u0430\u043b\u0430\u0431,\u0425\u04d9\u043b\u0435\u0431,\u0540\u0561\u056c\u0565\u057a,\u05d7\u05d0\u05dc\u05d1,\u062d\u0644\u0628,\u071a\u0720\u0712,\u0905\u0932\u0947\u092a\u094d\u092a\u094b,\u0939\u0932\u092c,\u0986\u09b2\u09c7\u09aa\u09cd\u09aa\u09cb,\u0a39\u0a32\u0a2c,\u0b85\u0bb2\u0bc6\u0baa\u0bcd\u0baa\u0bcb,\u0d06\u0d32\u0d46\u0d2a\u0d4d\u0d2a\u0d4b,\u0e2d\u0e30\u0e40\u0e25\u0e1b\u0e42\u0e1b,\u10d0\u10da\u10d4\u10de\u10dd,\u1e28alab,\u30a2\u30ec\u30c3\u30dd,\u963f\u52d2\u9887,\uc54c\ub808\ud3ec',
     u'asciiname': u'Aleppo',
     u'cc2': u'',
     u'coordinates': u'36.20124,37.16117',
     u'country_code2': u'SY',
     u'country_code3': u'SYR',
     u'dem': u'401',
     u'elevation': u'',
     u'feature_class': u'P',
     u'feature_code': u'PPLA',
     u'geonameid': u'170063',
     u'modification_date': u'2014-01-01',
     u'name': u'Aleppo',
     u'population': u'1602264',
     u'timzeone': u'Asia/Damascus'},
    u'_type': u'geoname'}],
  u'max_score': 3.022962,
  u'total': 1},
 u'timed_out': False,
 u'took': 3}



In [52]:

    
payload = {
    "query": {
        "filtered": {
            "query": {
                "query_string": {
                    "query": "Mosul Dam",
                    "fields": ["asciiname^5", "alternativenames"]
                }
            },
                "filter": {
                    "and" : [
                        {
                         "terms" : {
                        "country_code3": ["IRQ"]
                            }
                        },{
                         "terms" : {
                        "feature_class": ["P", "A", "S"]
                        }
                    }
            ]
            }
        }
    }
}
    out = requests.post("http://localhost:9200/geonames/_search?pretty", data=json.dumps(payload))
    out.json()









    Out[52]:





{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'IaalrxKDSSaWf_6FtLlk_w',
    u'_index': u'geonames',
    u'_score': 6.757909,
    u'_source': {u'admin1_code': u'15',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'Al Mawsil,Al Maw\u015fil,Al Mosul,Al Musil,Al M\u016b\u015fil,Mosoel,Mosouli,Mossoul,Mossul,Mosul,Mosul khot,Mosula,Mosulas,Mosulo,Moszul,Mousl,Moussoul,Musil,Musi\u0142,Musul,M\xfbsil,OSM,almwsl,mo su er,mosala,mosul,mosula,mosuru,mwsl,mwswl,mws\u06b5,\u039c\u03bf\u03c3\u03bf\u03cd\u03bb\u03b7,\u041c\u043e\u0441\u0443\u043b,\u041c\u043e\u0441\u0443\u043b \u0445\u043e\u0442,\u05de\u05d0\u05e1\u05d5\u05dc,\u05de\u05d5\u05e1\u05d5\u05dc,\u0627\u0644\u0645\u0648\u0635\u0644,\u0645\u0648\u0633\u06b5,\u0645\u0648\u0635\u0644,\u0645\u0648\u0648\u0633\u06b5,\u0721\u0718\u0728\u0720,\u092e\u094b\u0938\u0941\u0932,\u0a2e\u0a4b\u0a38\u0a32,\u30e2\u30fc\u30b9\u30eb,\u6469\u82cf\u5c14,\ubaa8\uc220',
     u'asciiname': u'Mosul',
     u'cc2': u'',
     u'coordinates': u'36.335,43.11889',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'228',
     u'elevation': u'',
     u'feature_class': u'P',
     u'feature_code': u'PPLA',
     u'geonameid': u'99072',
     u'modification_date': u'2014-01-01',
     u'name': u'Mosul',
     u'population': u'1739800',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'WIKEKAFKQgyMHQqeiy39qQ',
    u'_index': u'geonames',
    u'_score': 6.39191,
    u'_source': {u'admin1_code': u'15',
     u'admin2_code': u'9166649',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'',
     u'asciiname': u'Mosul',
     u'cc2': u'',
     u'coordinates': u'36.3579,43.09937',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'252',
     u'elevation': u'',
     u'feature_class': u'A',
     u'feature_code': u'ADM2',
     u'geonameid': u'9166649',
     u'modification_date': u'2014-01-01',
     u'name': u'Mosul',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'emDC3X28REqGRuN0nMu_lg',
    u'_index': u'geonames',
    u'_score': 3.9187422,
    u'_source': {u'admin1_code': u'15',
     u'admin2_code': u'9166541',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'',
     u'asciiname': u'University of Mosul',
     u'cc2': u'',
     u'coordinates': u'36.37648,43.14219',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'274',
     u'elevation': u'',
     u'feature_class': u'S',
     u'feature_code': u'UNIV',
     u'geonameid': u'9612241',
     u'modification_date': u'2014-01-01',
     u'name': u'University of Mosul',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'2sIfkXC-Qsm-zc0ffDEnHg',
    u'_index': u'geonames',
    u'_score': 3.195955,
    u'_source': {u'admin1_code': u'',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'Mosul,Mosul International Airport,ORBM,OSM',
     u'asciiname': u'Mosul International Airport',
     u'cc2': u'',
     u'coordinates': u'36.30576,43.1474',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'214',
     u'elevation': u'216',
     u'feature_class': u'S',
     u'feature_code': u'AIRP',
     u'geonameid': u'6300125',
     u'modification_date': u'2014-01-01',
     u'name': u'Mosul International Airport',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'ynbFN4BuQcSuqAEUyLDjlw',
    u'_index': u'geonames',
    u'_score': 3.1349938,
    u'_source': {u'admin1_code': u'15',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'',
     u'asciiname': u'Mosul Mediumwave Transmitter',
     u'cc2': u'',
     u'coordinates': u'36.3539,43.2316',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'266',
     u'elevation': u'',
     u'feature_class': u'S',
     u'feature_code': u'TOWR',
     u'geonameid': u'7645586',
     u'modification_date': u'2014-01-01',
     u'name': u'Mosul Mediumwave Transmitter',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'6ye4ZJPHThOTqutO4KCiCQ',
    u'_index': u'geonames',
    u'_score': 0.84022355,
    u'_source': {u'admin1_code': u'18',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'',
     u'asciiname': u'Adhaim Dam',
     u'cc2': u'',
     u'coordinates': u'34.565,44.51556',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'94',
     u'elevation': u'',
     u'feature_class': u'S',
     u'feature_code': u'DAM',
     u'geonameid': u'9171945',
     u'modification_date': u'2014-01-01',
     u'name': u'Adhaim Dam',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'hgDXHgcfTHSjm0IB8ZxOug',
    u'_index': u'geonames',
    u'_score': 0.82521623,
    u'_source': {u'admin1_code': u'01',
     u'admin2_code': u'6765480',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'',
     u'asciiname': u'Haditha Dam',
     u'cc2': u'',
     u'coordinates': u'34.20694,42.355',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'110',
     u'elevation': u'',
     u'feature_class': u'S',
     u'feature_code': u'DAM',
     u'geonameid': u'9253599',
     u'modification_date': u'2014-01-01',
     u'name': u'Haditha Dam',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'y-e8u7TKRL6JMCdABb54UA',
    u'_index': u'geonames',
    u'_score': 0.82521623,
    u'_source': {u'admin1_code': u'01',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'Ar Ramadi,Ar Ram\u0101d\u012b,Ramadi Barrage,Ramadi Dam,Saddat ar Ramadi,Saddat ar Ram\u0101d\u012b',
     u'asciiname': u'Ramadi Dam',
     u'cc2': u'',
     u'coordinates': u'33.43417,43.26861',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'53',
     u'elevation': u'',
     u'feature_class': u'S',
     u'feature_code': u'DAM',
     u'geonameid': u'92066',
     u'modification_date': u'2014-01-01',
     u'name': u'Ramadi Dam',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'sgjT5ibQRyGzwBGs1hqIrw',
    u'_index': u'geonames',
    u'_score': 0.795547,
    u'_source': {u'admin1_code': u'10',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'',
     u'asciiname': u'Derbandikhan Dam',
     u'cc2': u'',
     u'coordinates': u'35.11278,45.70639',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'466',
     u'elevation': u'',
     u'feature_class': u'S',
     u'feature_code': u'DAM',
     u'geonameid': u'7836533',
     u'modification_date': u'2014-01-01',
     u'name': u'Derbandikhan Dam',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'},
   {u'_id': u'vf28J2IFRZSWseQ_8MI8MA',
    u'_index': u'geonames',
    u'_score': 0.76804584,
    u'_source': {u'admin1_code': u'15',
     u'admin2_code': u'',
     u'admin3_code': u'',
     u'admin4_code': u'',
     u'alternativenames': u'Dam Akuraydi,D\u0101m Akurayd\u012b,dam akrydy,\u062f\u0627\u0645 \u0627\u0643\u0631\u064a\u062f\u064a',
     u'asciiname': u'Dam Akuraydi',
     u'cc2': u'',
     u'coordinates': u'35.92579,42.41483',
     u'country_code2': u'IQ',
     u'country_code3': u'IRQ',
     u'dem': u'243',
     u'elevation': u'',
     u'feature_class': u'P',
     u'feature_code': u'PPL',
     u'geonameid': u'6778429',
     u'modification_date': u'2014-01-01',
     u'name': u'D\u0101m Akurayd\u012b',
     u'population': u'0',
     u'timzeone': u'Asia/Baghdad'},
    u'_type': u'geoname'}],
  u'max_score': 6.757909,
  u'total': 20},
 u'timed_out': False,
 u'took': 5}



In [8]:

    
utilities.query_geonames_featureclass("Kathmandu", ["NPL"], ["P"])









    Out[8]:





{u'error': u'SearchPhaseExecutionException[Failed to execute phase [query], all shards failed; shardFailures {[lJQCgs9ETyGdIuFYb9dSXA][geonames][0]: SearchParseException[[geonames][0]: from[-1],size[-1]: Parse Failure [Failed to parse source [{"query": {"filtered": {"filter": {"and": [{"terms": {"country_code3": ["NPL"], "fields": ["asciiname^5", "alternativenames"]}}, {"terms": {"feature_class": ["P"]}}]}, "query": {"query_string": {"query": "Kathmandu"}}}}}]]]; nested: QueryParsingException[[geonames] [terms] filter does not support multiple fields]; }{[lJQCgs9ETyGdIuFYb9dSXA][geonames][1]: SearchParseException[[geonames][1]: from[-1],size[-1]: Parse Failure [Failed to parse source [{"query": {"filtered": {"filter": {"and": [{"terms": {"country_code3": ["NPL"], "fields": ["asciiname^5", "alternativenames"]}}, {"terms": {"feature_class": ["P"]}}]}, "query": {"query_string": {"query": "Kathmandu"}}}}}]]]; nested: QueryParsingException[[geonames] [terms] filter does not support multiple fields]; }{[lJQCgs9ETyGdIuFYb9dSXA][geonames][2]: SearchParseException[[geonames][2]: from[-1],size[-1]: Parse Failure [Failed to parse source [{"query": {"filtered": {"filter": {"and": [{"terms": {"country_code3": ["NPL"], "fields": ["asciiname^5", "alternativenames"]}}, {"terms": {"feature_class": ["P"]}}]}, "query": {"query_string": {"query": "Kathmandu"}}}}}]]]; nested: QueryParsingException[[geonames] [terms] filter does not support multiple fields]; }{[lJQCgs9ETyGdIuFYb9dSXA][geonames][3]: SearchParseException[[geonames][3]: from[-1],size[-1]: Parse Failure [Failed to parse source [{"query": {"filtered": {"filter": {"and": [{"terms": {"country_code3": ["NPL"], "fields": ["asciiname^5", "alternativenames"]}}, {"terms": {"feature_class": ["P"]}}]}, "query": {"query_string": {"query": "Kathmandu"}}}}}]]]; nested: QueryParsingException[[geonames] [terms] filter does not support multiple fields]; }{[lJQCgs9ETyGdIuFYb9dSXA][geonames][4]: SearchParseException[[geonames][4]: from[-1],size[-1]: Parse Failure [Failed to parse source [{"query": {"filtered": {"filter": {"and": [{"terms": {"country_code3": ["NPL"], "fields": ["asciiname^5", "alternativenames"]}}, {"terms": {"feature_class": ["P"]}}]}, "query": {"query_string": {"query": "Kathmandu"}}}}}]]]; nested: QueryParsingException[[geonames] [terms] filter does not support multiple fields]; }]',
 u'status': 400}



In [ ]:



In [ ]: