In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
cluster=json.loads(file('../json/cluster.json','r').read())
citysave=json.loads(file('../json/citysave3.json','r').read())
N=json.loads(file('../json/N.json','r').read())

In [3]:
import wolframalpha
#app_id='T7449E-PXXTAHUHUA'
#nagyatom@yahoo.com
app_id='HHKXW4-Q6WJG2XAXW'
#csaladenespp@yahoo.com
client = wolframalpha.Client(app_id)

In [4]:
unicities={}
for i in cluster:
    if cluster[i] not in unicities:
        unicities[cluster[i]]=citysave[i]['country']

In [7]:
pop1=json.loads(file('../json/pop1c.json','r').read())
err1=json.loads(file('../json/pop1ec.json','r').read())
pop2=json.loads(file('../json/pop2c.json','r').read())
err2=json.loads(file('../json/pop2ec.json','r').read())
pop3=json.loads(file('../json/pop3c.json','r').read())
err3=json.loads(file('../json/pop3ec.json','r').read())
pop4=json.loads(file('../json/pop4c.json','r').read())
err4=json.loads(file('../json/pop4ec.json','r').read())

Try to reparse errors


In [8]:
err=err1+err2+err3+err4
G={}
error=[]
len(err)


Out[8]:
552

In [9]:
import unicodedata
def strip_accents(s):
   return ''.join(c for c in unicodedata.normalize('NFD', s)
                  if unicodedata.category(c) != 'Mn')
def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    only_ascii = nfkd_form.encode('ASCII', 'ignore')
    return only_ascii

In [10]:
for c in err:
    if c not in G.keys()+error:
        ys={"pop":0,"nearby":{}}
        q=remove_accents(strip_accents('population of '+c.split('/')[0].\
                                       replace('island','').\
                                       replace('Island','').strip()+', '+unicities[c]))
        res = client.query(q)
        good=True
        if 'pod' in res:
            for i in range(len(res['pod'])):
                try:
                    if res['pod'][i]['@title']=="Result":
                        x=res['pod'][i]['subpod']['plaintext']
                        if 'available' not in x:
                            popul=x[:x.find('people')-1]
                            if 'mill' in popul:
                                popul=popul[:popul.find('mill')-1]
                                if '|' in popul:popul=popul.split('|')[1].strip()
                            ys['pop']=int(float(popul)*1000000.0)
                            G[c]=ys
                            print 'partial success',c
                except: pass
                try:
                    if res['pod'][i]['@title']=="Nearby cities":
                        x=res['pod'][i]['subpod']['plaintext'].split('\n')
                        if 'available' not in x:
                            for y in x[:-1]:
                                people=y[y.rfind('|')+2:y.find('people')-1]
                                if 'mill' in people:
                                    people=float(people[:people.find('mill')-1])*1000000.0
                                km=float(y[y.find('|')+2:y.find(' km ')])
                                ys['nearby'][y.split('|')[0].split(',')[0].strip()]={"km":km,"people":int(people)}
                            G[c]=ys
                            print 'success',c
                            good=False
                except: pass
        if good: 
            print 'error',c
            error.append(c)


error Fort Huachuca Sierra Vista
error Kasos Island
error Santa Ana Island
error Fera Island
error St-Augustin
error York Landing
error Lamap
error Kadhdhoo Island
error Mangshi
error Araracuara
error Bhairawa
error Lake Manyara National Park
error Aménas
error Puerto Ordaz
error Kabalega Falls
error Inglaterra
error Labuha-Halmahera Island
error Kooddoo Island
error Kelsey
error Buol-Celebes Island
error Yap
error Tambor
error Bardufoss
error Gustavus
error San Andros
error Repulse Bay
error Ayers Rock
error Ukunda
error Boolgeeda
error Sishen
error Big Trout
error Whale Cove
error Sanikiluaq
error Santa Cruz-Graciosa Bay-Luova
error Coral Harbour
error Valesdir
error Pagadian

error Coondewanna
error Paraparaumu
error Whatì
error Fuyun
error Governor's Harbour
error Sachs Harbour
error Kangiqsujuaq
error Tabubil
error Gove
error Jeon Ju
error Bayannur
error Natuashish
error Fort Good Hope
error Ipota
error Kugaaruk
error Tofino
error St. Theresa Point
error Amook Bay
error North Ronaldsay
error Shamattawa
error Ra'sal-Khaymah
error Gods River
error Tokunoshima
error Muskrat Dam
error Club Makokola
error Cat Lake
error Kalimarau
error Assaluyeh
error Umiujaq
error Peureumeue-Sumatra Island
error Hall Beach
error Bearskin Lake
error Banmaw
error Tahuna-Sangihe Island
error Hemavan
error Mahikeng
error Bakalalan
error Moruya
error Anahim Lake
error Marsa Alam
error Bangda
error Sovetsky
error Sandy Lake
error Termas de Rio Hondo
error Praslin Island
error Rouyn
error Waskaganish
error Fort Hope
error Hoedspruit
error Puvirnituq
error Red Dog
error Ulusaba
error Keperveyem
error Cicia
error Tongoa Island
error Cloudbreak
error Long Akah
error Treasure Cay
error Saumlaki-Yamdena Island
error Misrata
error Kangding
error Fort Sandeman
error Guantanamo Bay Naval Station
error Doomadgee
error Makabana
error Nduli
error Al-Qamishli
error Hayman Island
error Bau-Bau
error Dover-Cheswold
error Isla Colón
error Los Roques
error Wamena
error Peawanuck
error Paama Island
error Quaqtaq
error Sumburgh
error Blimbingsari
error Webequie
error West Angelas
partial success Dharavandhoo Island
success Dharavandhoo Island
error Al-Qassim
error Cap Skirring
error Schefferville
error Humberside
partial success Nabire-Papua Island
error Nabire-Papua Island
error Eastmain River
error Walaha
error Golog
error Okinoshima
error Argyle
error Pickle Lake
error Shennongjia
error Mungeranie
error Tambolaka
error Mataveri
error Point Salines
error Cape Dorset
error Petropavlovsk-Kamchatsky
error Ekati
error Tumling Tar
error Kashechewan
error Poso-Celebes Island
error Tymvou
error Tanjung Selor-Borneo Island
error La Grande-4
error Suavanao
error Kasabonika
partial success Utila Island
success Utila Island
error Lac Brochet
error Beef Island
error Lamen Bay
error Patuxent River
error Ambryn Island
error Tulita
error Iles-de-la-Madeleine
error Paulatuk
error Opapimiskan Lake
error Pikangikum
error Fort Albany
error Waitangi
error Keewaywin
error Sabang-We Island
error Sege
error Loh-Linua
error Wapekeka
error Natashquan
error Inukjuak
error Merowe
error Sept-Iles
error Mara Lodges
partial success Fuvahmulah Island
success Fuvahmulah Island
error Pohnpei
error Solwesi
error Goose Bay
error Timika
error Malekula Island
error Vientiane
error Bryce Canyon
error Luang Namtha
error Satar Tacik-Flores Island
error Qaisumah
error Kao-Celebes Island
error Yoronjima
error Toli Toli-Celebes Island
error Tasiujaq
error Berens River
error Puerto Obaldia
error McArthur River Mine
error Floro
error Tadoule Lake
error Ifuru
error Thermal
error Ranai-Natuna Besar Island
error Ethiopia
error Matei
partial success Nevis
error Nevis
error Colville Lake
error Mont-Joli
error Samburu South
error Kebar
error Kingfisher Lake
error Australia
error Larantuka
partial success Djibouti
error Djibouti
error Malawi
error Yangyang
error Lebanon
partial success Kastelorizo Island
error Kastelorizo Island
partial success Anglesey
error Anglesey
error South Andros
partial success Islay
error Islay
partial success Zhezkazgan
error Zhezkazgan
error Songpan
error Saudi Arabia
error Tawitawi
error Mahshahr
partial success Dikson
error Dikson
partial success Tennant Creek
error Tennant Creek
error Barter Island
partial success Atka
error Atka
partial success Pajala
error Pajala
error Bermuda
error Peru
error Benin
partial success Virgin Gorda
error Virgin Gorda
error Castro
partial success San Cristobal
error San Cristobal
partial success Vanua Balavu
error Vanua Balavu
partial success Pangnirtung
error Pangnirtung
error Bamaga
partial success Fuerteventura
error Fuerteventura
error Palm Island
error Charleville
partial success Churchill
error Churchill
partial success Graciosa
error Graciosa
partial success Isle of Man
error Isle of Man
error Iron Mountain Kingsford
error Qinhuangdao
error Dobo-Kobror Island
error Muan
error Barra
error Formosa
error Longyearbyen
partial success Hachijojima
error Hachijojima
partial success Rankin Inlet
error Rankin Inlet
partial success Lawas
error Lawas
error Long Datih
error Roti-Rote Island
partial success Puerto Jimenez
error Puerto Jimenez
error Lansdowne House
error Barimunya
partial success Marudi
error Marudi
error Mirny
error Island Lake
error Lajes
error Puerto Iguazu
partial success Isles Of Scilly
error Isles Of Scilly
error Round Lake
partial success Carbondale-Murphysboro
error Carbondale-Murphysboro
partial success Coll Island
error Coll Island
partial success Kramfors
error Kramfors
error Marau
partial success Mong Hsat
error Mong Hsat
partial success Niue
error Niue
error Pantnagar
error Bingol
error Boa Vista
error Sintang
error Phu Quoc
error Lar
error Summer Beaver
error Telfer
error Fort Severn
error Qiemo
error Skopje
error Brize Norton
error Eastsound
error Churchill Falls
error Butuan
error Long Seridan
error Chakcharan
error Olympic Dam
error Fort Chipewyan
partial success Thimarafushi Island
success Thimarafushi Island
error Grise Fiord
error Chu Lai
error Tajima
error Lukla
error Sachigo Lake
error Nerlerit Inaat
error Mount Keith
error Gotalalamo-Morotai Island
error Ramata
error Ouango Fitini
error Geneina
error Skukuza
error Knock
partial success Merauke-Papua Island
success Merauke-Papua Island
error East Midlands
error Skardu
error Kuujjuarapik
error Mont Tremblant
error Ohrid
error Mpacha
error Mar del Plata
error Port Hope Simpson
error Arviat
error Kimmirut
error Kuujjuaq
error Wanzhou
error Snare Lake
error Deadmans Cay
error Kingscote
error Sveg
error Kaimana-Papua Island
error Carajas
error Déline
error Makkovik
error Kosrae
error Amami
error North Connel
error Beida
error Poplar Hill
error Kattiniq
error Ta'izz
error Luang Prabang
error Aupaluk
error Semera
error Batu Licin-Borneo Island
error Tébessi
error Kasompe
error Dillon's Bay
error Kalskag
error Dwangwa
error Eday
error Karumba
error Kithira
error Hommalinn
error Spring Point
error Wunnummin Lake
error North Spirit Lake
error Baker Lake
error Laos
error Salluit
error Oksibil-Papua Island
error Londolovit
error Ablow
error Abu Simbel
error Van Nuys
error Qikiqtarjuaq
error Liping
error Ivalo
error General Santos
error Sampit Airport
error Xieng Khouang
error Omitama
error Sawan
error N'Gaoundéré
error Memanbetsu
error Seronera
partial success Leonora
error Leonora
error Koh Samui
partial success Isla De Culebra
error Isla De Culebra
error Chlef
error La Grande Riviere
partial success Sogndal
error Sogndal
partial success Kitaakita
error Kitaakita
partial success La Palma
error La Palma
error Texada
error Philippines
partial success Daqing
error Daqing
partial success Aden
error Aden
error Pico
error Dominica
partial success Gode
error Gode
partial success Hassi Messaoud
error Hassi Messaoud
partial success Naryan-Mar
error Naryan-Mar
error Kelle
error Essendon
error Adak Island
partial success Narrandera
error Narrandera
partial success Ghat
error Ghat
partial success San Andres Island
success San Andres Island
error Solomon Islands
partial success Alor Island
error Alor Island
partial success Leros
error Leros
error Moro
partial success Rost
error Rost
partial success Kinmen
error Kinmen
partial success Tongatapu
error Tongatapu
error Værøy
partial success Male
error Male
error Morón
partial success Tachilek
error Tachilek
error Algeria
error Panjgur
error Mulu
partial success Lijiang
error Lijiang
partial success Goa
error Goa
partial success Ha'il
error Ha'il
partial success Arar
error Arar
error North Eleuthera
error Germany
error Kowanyama
partial success Taraz
error Taraz
partial success Vilhelmina
error Vilhelmina
error Barre-Montpelier
error Providenciales
partial success Papa Westray
error Papa Westray
partial success Lakeba Island
error Lakeba Island
error Nosy Be
partial success Gaua Island
error Gaua Island
partial success Ravensthorpe
error Ravensthorpe
error Karakelong Island
partial success Paro
error Paro
partial success Atiu Island
error Atiu Island
error Olga Bay
partial success Sao Jorge
error Sao Jorge
partial success Sakon Nakhon
error Sakon Nakhon
partial success Thandwe
error Thandwe
error Sorriso
error São Tomé and Principe
error Kiritimati
error Lencois
error Daocheng
error Aniwa
error Kashgar
error Riyan
error Nifty
error Alton-St Louis
error Beni Mellal
error Anua
error Londolozi
error Lands End
error Dease Lake
error Akulivik
error Kiri
error South Bimini
error Orsta Volda
error Tarama
error Kugluktuk
error Val-d'Or
error Liberia
error Moomba
partial success Syros Island
error Syros Island
error Paraguaná
error Pangkalan Bun
error Petropavlosk
partial success Oxford House
error Oxford House
partial success Winton
error Winton
error Ivujivik
error Kangiqsualujjuaq
partial success Kufra
error Kufra
error Fort Bliss-El Paso
partial success Vavau
error Vavau
error Leinster
error Shangri-La
error Xiahe
error La Toma (Catamayo)
partial success Bodo
error Bodo
error Oudomsay
error Gjögur
error Saint Marie
error Kish Island
error Cayman Brac
partial success Kalgoorlie
error Kalgoorlie
error Loyengalani
error Sandspit
error Namrole-Buru Island
error Onslow
error Nador
partial success Lycksele
error Lycksele
partial success Uray
error Uray
partial success Gallivare
error Gallivare
partial success Fakfak-Papua Island
error Fakfak-Papua Island
error Port-Menier
error Preguiça
error Ulukhaktok
error Stella Maris
error South Indian Lake
partial success Benbecula
error Benbecula
error Nemiscau
error Learmonth
error Iran
partial success Salalah
error Salalah
error Gamètì
partial success Mount Isa
error Mount Isa
error Balmaceda
partial success Lanzarote
error Lanzarote
partial success Ust-Kut
error Ust-Kut
error Kyzylorda
error Bario
error Moenjodaro
error Gods Lake Narrows
error Paraburdoo
error Simara
error Cayo Largo del Sur
error Koro Island
error Lutselk'e
error Manaung
error Deadhorse
partial success Nantucket
error Nantucket
partial success Hamilton Island
success Hamilton Island
error Cape Lisburne
error Little Cayman
error Rigolet
error Ramstein
error Red Sucker Lake
error Mayaguana
error Jomsom
error Lonorore
error Savannakhet
error Rurenabaque
error Cross Lake
error Blanc-Sablon
error Heho
error Cayo Coco
error Stronsay
error Kangirsuk
error Amboseli National Park
error Jiagedaqi
error Ikaria
error Monkey Mia
error Tin City
error Igloolik
error Gan Island
error Galela-Celebes Island
error Beigan Island
error Gillam
error Sulaimaniyah
error Phosphate Hill
error Wabush
error Bamfield
error Tokua
error Pakse
error Utopia Creek
error Dawadmi
error Putussibau-Borneo Island
error Kadanwari
error Iliamna
partial success Maamigili Island
success Maamigili Island
error Babo-Papua Island
error Mfuwe
error Kagau Island
error Stewart
error Chuuk
error Fort MacKay
error Kilaguni
error Kaadedhdhoo Island
error Wemindji
error Tengchong
error Nanki Shirahama
error Little Grand Rapids
error Taloyoak

In [11]:
print len(G),len(error)


89 544

In [12]:
for i in pop1:
    if i in G:print i,1
    G[i]=pop1[i]
for i in pop2:
    if i in G:print i,2
    G[i]=pop2[i]
for i in pop3:
    if i in G:print i,3
    G[i]=pop3[i]
for i in pop4:
    if i in G:print i,4
    G[i]=pop4[i]

In [16]:
file("../json/pop_cities.json",'w').write(json.dumps(G))
file("../json/pope_cities.json",'w').write(json.dumps(error))
print len(G)


3037 544

Country populations


In [20]:
G={}
error=[]

In [21]:
for c in N:
    if c not in G.keys()+error:
        print c,
        q='population of '+c
        try:
            res = client.query(q)
            for i in range(len(res['pod'])):
                if res['pod'][i]['@title']=="Result":
                    x=res['pod'][i]['subpod']['plaintext']
                    popul=x[:x.find('people')-1]
                    if 'mill' in popul:
                            popul=float(popul[:popul.find('mill')-1])*1000000.0
                    G[c]=int(popul)
        except: error.append(c)


Canada Libyan Arab Jamahiriya Guernsey Turkmenistan Lithuania FYR of Macedonia Cambodia Dem. Rep. of Congo Ethiopia Aruba Swaziland Belize Argentina Bolivia Cameroon Burkina Faso Ghana Saudi Arabia Cape Verde Slovenia Guatemala Bosnia and Herzegovina Guinea Russian Federation Germany Dominica Liberia Maldives Paraguay Pakistan Oman Tanzania Greenland Gabon Niue Monaco New Zealand Yemen Jersey Jamaica Namibia Albania Samoa United Arab Emirates Uruguay India Azerbaijan Madagascar Lesotho Saint Vincent and the Grenadines Kenya Tajikistan Turkey Afghanistan Fiji Bangladesh Eritrea Solomon Islands Saint Lucia Mongolia France Syrian Arab Republic Bermuda Slovakia Somalia Peru Vanuatu Nauru Norway Malawi Cook Islands Benin Cuba Montenegro Saint Kitts and Nevis Togo China Armenia Antigua and Barbuda Dominican Republic Ukraine Bahrain Tonga Finland Western Sahara Indonesia Mauritius Sweden Vietnam British Virgin Islands Guyana Mali Bulgaria United States Romania Angola Cayman Islands South Africa Cyprus Brunei Darussalam Malaysia Austria Mozambique Uganda Japan Niger Isle of Man Brazil Kuwait Panama Rep. of Moldova Costa Rica Luxembourg Bahamas Gibraltar Ireland Italy Nigeria Ecuador Czech Republic Australia Iran Algeria El Salvador Tuvalu Marshall Islands Chile Puerto Rico Belgium Kiribati Haiti Iraq Hong Kong Sierra Leone Georgia Gambia Philippines Portugal Morocco Croatia Guinea-Bissau Thailand Switzerland Grenada Seychelles Chad Estonia Kosovo Equatorial Guinea Lebanon Uzbekistan Egypt Djibouti Rwanda Timor-Leste Spain Colombia Burundi Taiwan Turks and Caicos Islands Barbados Qatar Palau Bhutan Sudan Palestinian Territories Nepal São Tomé and Principe Malta Netherlands Suriname Anguilla Venezuela Micronesia (Federated States of) Israel Myanmar (Burma) Iceland Zambia Senegal Papua New Guinea Cote d'Ivoire Lao People's Dem. Rep. Zimbabwe Jordan Denmark Kazakhstan Poland Cent African Rep Mauritania Kyrgyzstan Montserrat Andorra Trinidad and Tobago Latvia People's Republic of Korea South Sudan Hungary Belarus Honduras Mexico Tunisia Nicaragua Singapore Serbia Comoros United Kingdom Congo Greece Sri Lanka French Guiana Rep. of Korea Botswana

In [29]:
file("../json/pop_countries.json",'w').write(json.dumps(G))

In [26]:
error


Out[26]:
[u'FYR of Macedonia',
 u'India',
 u'China',
 u'Palestinian Territories',
 u'S\xe3o Tom\xe9 and Principe',
 u'Micronesia (Federated States of)',
 u"Lao People's Dem. Rep."]

In [31]:
for c in error:
    if c not in G.keys():
        print c,
        q='population of '+c
        try:
            res = client.query(q)
            for i in range(len(res['pod'])):
                if res['pod'][i]['@title']=="Result":
                    x=res['pod'][i]['subpod']['plaintext']
                    popul=x[:x.find('people')-1]
                    if 'mill' in popul:
                            popul=float(popul[:popul.find('mill')-1])*1000000.0
                    elif 'bill' in popul:
                            popul=float(popul[:popul.find('bill')-1])*1000000000.0
                    G[c]=int(popul)
        except: print c


FYR of Macedonia FYR of Macedonia
India China Palestinian Territories Palestinian Territories
São Tomé and Principe São Tomé and Principe
Micronesia (Federated States of) Micronesia (Federated States of)
Lao People's Dem. Rep. Lao People's Dem. Rep.

In [40]:
cc={'FYR of Macedonia':'Macedonia',
u'São Tomé and Principe':'Sao Tome and Principe',
'Micronesia (Federated States of)':'Micronesia',
u"Lao People's Dem. Rep.":'Laos'}
for c in error:
    if c not in G.keys()+['Palestinian Territories']:
        print c,
        q='population of '+cc[c]
        res = client.query(q)
        for i in range(len(res['pod'])):
            if res['pod'][i]['@title']=="Result":
                x=res['pod'][i]['subpod']['plaintext']
                popul=x[:x.find('people')-1]
                if 'mill' in popul:
                        popul=float(popul[:popul.find('mill')-1])*1000000.0
                elif 'bill' in popul:
                        popul=float(popul[:popul.find('bill')-1])*1000000000.0
                G[c]=int(popul)


São Tomé and Principe Micronesia (Federated States of) Lao People's Dem. Rep.

In [55]:
c='Palestinian Territories'
print c,
q='population of '+c
res = client.query(q)
for i in range(len(res['pod'])):
    s=0
    if res['pod'][i]['@title']=="Result":
        xx=res['pod'][i]['subpod']['plaintext'].split('\n')
        for x in xx[:-1]:
            popul=x[x.find('|')+2:x.find('people')-1]
            if 'mill' in popul:
                    popul=float(popul[:popul.find('mill')-1])*1000000.0
            elif 'bill' in popul:
                    popul=float(popul[:popul.find('bill')-1])*1000000000.0
            s+=int(popul)
        G[c]=s


Palestinian Territories

In [57]:
file("../json/pop_countries2.json",'w').write(json.dumps(G))