In [2]:
from collections import defaultdict
import glob
import json
import pprint

In [3]:
gane_names = []

#for fname in glob.glob("names-0*.json"):
#    f = open(fname)
#    text = f.read()
#    gane_names.extend(json.loads(text))
#    f.close()

f = open("tavo-edited-place-names.json")
text = f.read()
gane_names.extend(json.loads(text))
f.close()

print len(gane_names)


2432

In [4]:
gane_tree = defaultdict(dict)

for n in gane_names:
    try:
        placeURI = n.get('placeURI')
        parts = placeURI.split('/')
        if len(parts) > 5:
            placeURI = '/'.join(parts[:5])
        if "pleiades.stoa.org" in placeURI:
            branch = int(n.get('GANEid', -1))
            n['pid'] = placeURI.rstrip('/').split('/')[-1]
        else:
            branch = int(n.get('placeURI').split("placeID=")[1])
    except:
        print n 
        raise
    leaf = int(n.get('GANEid', -1))
    gane_tree[branch][leaf] = n

In [5]:
print len(gane_tree) # the number of GANE places


2135

In [6]:
print gane_tree.items()[0]


(57345, {57345: {u'externalURIs': False, u'placeURI': u'http://pleiades.stoa.org/places/863903', u'reference': {u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 3, p. 1561)', u'index-page': 1561, u'index-volume': 3}, u'title': u'Tafl\u012bs', u'maxDate': 750, u'title-languages': False, 'pid': u'863903', u'GANEid': 57345, u'periods': [u'Caliphate-Umayyad Middle East'], u'extent': {u'type': u'Point', u'coordinates': [44.3, 41.3]}, u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa', u'nameTransliterated': False, u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig', u'main-map': {u'map': u'BVII3.1', u'accuracy': u'4'}, u'minDate': 632}})

In [7]:
def in_pleiades(args):
    # is a GANE place in Pleiades?
    k, v = args
    try:
        return "pleiades.stoa.org" in v[k].get('placeURI')
    except KeyError:
        return False

In [8]:
x = gane_tree[13]
print in_pleiades((13, x))


False

In [9]:
pprint.pprint(x)


{12: {u'GANEid': 12,
      u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
      u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
      u'extent': {u'coordinates': [48, 30], u'type': u'Point'},
      u'externalURIs': False,
      u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
      u'maxDate': 1950,
      u'minDate': 1900,
      u'nameTransliterated': False,
      u'periods': [u'Ottoman Decline-Mandate Middle East'],
      u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
      u'reference': {u'index-page': 1,
                     u'index-volume': 1,
                     u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)'},
      u'title': u'\u02bfAba\u0304d\u0101n',
      u'title-languages': False},
 13: {u'GANEid': 13,
      u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
      u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
      u'extent': {u'coordinates': [48.1, 30.2], u'type': u'Point'},
      u'externalURIs': [{u'title': u'Abadan, Iran',
                         u'uri': u'http://en.wikipedia.org/wiki/Abadan,_Iran'},
                        {u'title': u'Encyclopaedia Iranica Online. A\u0304ba\u0304da\u0304n - City and island in the \u1e34\u016bzest\u0101n province at the head of the Persian Gulf',
                         u'uri': u'http://www.iranicaonline.org/articles/abadan'}],
      u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
      u'maxDate': 2000,
      u'minDate': 819,
      u'nameTransliterated': [u'Abadan'],
      u'periods': [u'Samanid-Ghaznavid Iran',
                   u'Safavid Middle East',
                   u'Ottoman Decline-Mandate Middle East',
                   u'Modern Middle East'],
      u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
      u'reference': {u'index-page': 1,
                     u'index-volume': 1,
                     u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)'},
      u'title': u'\u0100b\u0101d\u0101n',
      u'title-languages': [{u'iso': u'fas',
                            u'iso_stand': True,
                            u'language': u'[Modern] Persian'}]},
 78: {u'GANEid': 78,
      u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
      u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
      u'extent': {u'coordinates': [48.15, 30.15], u'type': u'Point'},
      u'externalURIs': False,
      u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
      u'maxDate': 1950,
      u'minDate': 750,
      u'nameTransliterated': [u"'Abadan", u'Abbadan', u"'Abbadan", u'Abadan'],
      u'periods': [u'Abassid Middle East',
                   u'Samanid-Ghaznavid Iran',
                   u'Seljuq-Khwarezmian Middle East',
                   u'Khwarezmian Middle East',
                   u'1200 BC Middle East',
                   u'Mongol Middle East',
                   u'Ilkhanate Middle East',
                   u'Timurid Middle East',
                   u'Ottoman Decline-Mandate Middle East'],
      u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
      u'reference': {u'index-page': 3,
                     u'index-volume': 1,
                     u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 3)'},
      u'title': u'\u02bfAbb\u0101d\u0101n',
      u'title-languages': [{u'iso': u'arb',
                            u'iso_stand': True,
                            u'language': u'[Standard Modern] Arabic'}]}}

In [10]:
for k, v in x.items():
    print k, v['title'], v['nameTransliterated']


12 ʿAbādān False
13 Ābādān [u'Abadan']
78 ʿAbbādān [u"'Abadan", u'Abbadan', u"'Abbadan", u'Abadan']

In [11]:
len(gane_tree)


Out[11]:
2135

In [12]:
from itertools import ifilter
n, y = ifilter(lambda item: len(item[1])>0, filter(lambda a: not(in_pleiades(a)), gane_tree.items())).next()
pprint.pprint(y)


{12: {u'GANEid': 12,
      u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
      u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
      u'extent': {u'coordinates': [48, 30], u'type': u'Point'},
      u'externalURIs': False,
      u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
      u'maxDate': 1950,
      u'minDate': 1900,
      u'nameTransliterated': False,
      u'periods': [u'Ottoman Decline-Mandate Middle East'],
      u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
      u'reference': {u'index-page': 1,
                     u'index-volume': 1,
                     u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)'},
      u'title': u'\u02bfAba\u0304d\u0101n',
      u'title-languages': False},
 13: {u'GANEid': 13,
      u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
      u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
      u'extent': {u'coordinates': [48.1, 30.2], u'type': u'Point'},
      u'externalURIs': [{u'title': u'Abadan, Iran',
                         u'uri': u'http://en.wikipedia.org/wiki/Abadan,_Iran'},
                        {u'title': u'Encyclopaedia Iranica Online. A\u0304ba\u0304da\u0304n - City and island in the \u1e34\u016bzest\u0101n province at the head of the Persian Gulf',
                         u'uri': u'http://www.iranicaonline.org/articles/abadan'}],
      u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
      u'maxDate': 2000,
      u'minDate': 819,
      u'nameTransliterated': [u'Abadan'],
      u'periods': [u'Samanid-Ghaznavid Iran',
                   u'Safavid Middle East',
                   u'Ottoman Decline-Mandate Middle East',
                   u'Modern Middle East'],
      u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
      u'reference': {u'index-page': 1,
                     u'index-volume': 1,
                     u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)'},
      u'title': u'\u0100b\u0101d\u0101n',
      u'title-languages': [{u'iso': u'fas',
                            u'iso_stand': True,
                            u'language': u'[Modern] Persian'}]},
 78: {u'GANEid': 78,
      u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
      u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
      u'extent': {u'coordinates': [48.15, 30.15], u'type': u'Point'},
      u'externalURIs': False,
      u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
      u'maxDate': 1950,
      u'minDate': 750,
      u'nameTransliterated': [u"'Abadan", u'Abbadan', u"'Abbadan", u'Abadan'],
      u'periods': [u'Abassid Middle East',
                   u'Samanid-Ghaznavid Iran',
                   u'Seljuq-Khwarezmian Middle East',
                   u'Khwarezmian Middle East',
                   u'1200 BC Middle East',
                   u'Mongol Middle East',
                   u'Ilkhanate Middle East',
                   u'Timurid Middle East',
                   u'Ottoman Decline-Mandate Middle East'],
      u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
      u'reference': {u'index-page': 3,
                     u'index-volume': 1,
                     u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 3)'},
      u'title': u'\u02bfAbb\u0101d\u0101n',
      u'title-languages': [{u'iso': u'arb',
                            u'iso_stand': True,
                            u'language': u'[Standard Modern] Arabic'}]}}

In [13]:
for k, v in y.items():
    print k, v['title'], v['nameTransliterated']


12 ʿAbādān False
13 Ābādān [u'Abadan']
78 ʿAbbādān [u"'Abadan", u'Abbadan', u"'Abbadan", u'Abadan']

In [14]:
moderns = filter(
    lambda x: 'Modern Middle East' in " ".join(x['periods']), 
    (y for y in x.values() for x in gane_tree.values()) )


print len(moderns)


2135

In [15]:
with open("gane-tree.json", "w") as f:
    f.write(json.dumps(gane_tree))

In [16]:
x


Out[16]:
{12: {u'GANEid': 12,
  u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
  u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
  u'extent': {u'coordinates': [48, 30], u'type': u'Point'},
  u'externalURIs': False,
  u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
  u'maxDate': 1950,
  u'minDate': 1900,
  u'nameTransliterated': False,
  u'periods': [u'Ottoman Decline-Mandate Middle East'],
  u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
  u'reference': {u'index-page': 1,
   u'index-volume': 1,
   u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)'},
  u'title': u'\u02bfAba\u0304d\u0101n',
  u'title-languages': False},
 13: {u'GANEid': 13,
  u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
  u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
  u'extent': {u'coordinates': [48.1, 30.2], u'type': u'Point'},
  u'externalURIs': [{u'title': u'Abadan, Iran',
    u'uri': u'http://en.wikipedia.org/wiki/Abadan,_Iran'},
   {u'title': u'Encyclopaedia Iranica Online. A\u0304ba\u0304da\u0304n - City and island in the \u1e34\u016bzest\u0101n province at the head of the Persian Gulf',
    u'uri': u'http://www.iranicaonline.org/articles/abadan'}],
  u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
  u'maxDate': 2000,
  u'minDate': 819,
  u'nameTransliterated': [u'Abadan'],
  u'periods': [u'Samanid-Ghaznavid Iran',
   u'Safavid Middle East',
   u'Ottoman Decline-Mandate Middle East',
   u'Modern Middle East'],
  u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
  u'reference': {u'index-page': 1,
   u'index-volume': 1,
   u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)'},
  u'title': u'\u0100b\u0101d\u0101n',
  u'title-languages': [{u'iso': u'fas',
    u'iso_stand': True,
    u'language': u'[Modern] Persian'}]},
 78: {u'GANEid': 78,
  u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
  u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
  u'extent': {u'coordinates': [48.15, 30.15], u'type': u'Point'},
  u'externalURIs': False,
  u'main-map': {u'accuracy': u'4', u'map': u'BX1'},
  u'maxDate': 1950,
  u'minDate': 750,
  u'nameTransliterated': [u"'Abadan", u'Abbadan', u"'Abbadan", u'Abadan'],
  u'periods': [u'Abassid Middle East',
   u'Samanid-Ghaznavid Iran',
   u'Seljuq-Khwarezmian Middle East',
   u'Khwarezmian Middle East',
   u'1200 BC Middle East',
   u'Mongol Middle East',
   u'Ilkhanate Middle East',
   u'Timurid Middle East',
   u'Ottoman Decline-Mandate Middle East'],
  u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13',
  u'reference': {u'index-page': 3,
   u'index-volume': 1,
   u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 3)'},
  u'title': u'\u02bfAbb\u0101d\u0101n',
  u'title-languages': [{u'iso': u'arb',
    u'iso_stand': True,
    u'language': u'[Standard Modern] Arabic'}]}}

In [17]:
from itertools import chain

all_periods = set(chain(*[n['periods'] for n in x.values()]))
print all_periods, len(all_periods)


set([u'Safavid Middle East', u'Ilkhanate Middle East', u'Mongol Middle East', u'Timurid Middle East', u'Ottoman Decline-Mandate Middle East', u'Seljuq-Khwarezmian Middle East', u'Modern Middle East', u'Khwarezmian Middle East', u'Abassid Middle East', u'Samanid-Ghaznavid Iran', u'1200 BC Middle East']) 11

In [18]:
def get_accuracy(name):
    main_map = name.get('main-map')
    if main_map:
        return main_map.get('accuracy')
    else:
        return None

points = sorted(filter(
            lambda t: t[0] and t[2].get('extent'),
            [(get_accuracy(v), k, v) for 
                k, v in x.items()] ))
print points[0]


(u'4', 12, {u'externalURIs': False, u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=13', u'reference': {u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 1, p. 1)', u'index-page': 1, u'index-volume': 1}, u'title': u'\u02bfAba\u0304d\u0101n', u'maxDate': 1950, u'title-languages': False, u'GANEid': 12, u'periods': [u'Ottoman Decline-Mandate Middle East'], u'extent': {u'type': u'Point', u'coordinates': [48, 30]}, u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa', u'nameTransliterated': False, u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig', u'main-map': {u'map': u'BX1', u'accuracy': u'4'}, u'minDate': 1900})

In [19]:
with open("abbadan.json", "w") as f:
    f.write(json.dumps({13: x}))

In [20]:
x.keys()


Out[20]:
[12, 13, 78]

In [21]:
13 in x


Out[21]:
True

In [22]:
print len(gane_tree)
tblisi = []
for pk, cluster in gane_tree.items():
    for k, item in cluster.items():
        if item['placeURI'].endswith('863903'):
            tblisi.append((pk, {k: item}))
print len(tblisi)
with open("tblisi.json", "w") as f:
    f.write(json.dumps(dict(tblisi), indent=2))


2135
11

In [23]:
with open("gane-all.json", "w") as f:
    f.write(json.dumps(gane_tree, indent=2))

In [24]:
len([k for k,v in gane_tree.items() if len(v) > 7])


Out[24]:
11

In [25]:
len(gane_tree)


Out[25]:
2135

In [26]:
seven = [{k:v} for k,v in gane_tree.items() if len(v) > 7][0]

In [27]:
seven[43205][43201]


Out[27]:
{u'GANEid': 43201,
 u'authors': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig, F. Deblauwe, E. Kansa',
 u'creators': u'B. Siewert-Mayer, H. Kopp, W. R\xf6llig',
 u'extent': {u'coordinates': [34.4, 37.5], u'type': u'Point'},
 u'externalURIs': False,
 u'main-map': {u'accuracy': u'2', u'map': u'BVII10'},
 u'maxDate': 1307,
 u'minDate': 1077,
 u'nameTransliterated': False,
 u'periods': [u'Rum/Crusader Anatolia'],
 u'placeURI': u'http://gap.alexandriaarchive.org/gane/edit-place?placeID=43205',
 u'reference': {u'index-page': 1175,
  u'index-volume': 2,
  u'text': u'A place name in the T\xfcbingen Atlas Index (Vol. 2, p. 1175)'},
 u'title': u'Nigda',
 u'title-languages': [{u'iso': u'fas',
   u'iso_stand': True,
   u'language': u'[Modern] Persian'}]}

In [28]:
t = gane_tree[61261]
print len(t)


17

In [29]:
with open("tehran.json", "w") as f:
    f.write(json.dumps({61261: t}))

In [30]:
with open('gane-failures.txt') as f:
    fails = list(map(int, f.readlines()))

In [31]:
len(fails)


Out[31]:
118

In [32]:
fails[:4]


Out[32]:
[2445, 2445, 59584, 59584]

In [33]:
failures = {k: gane_tree[k] for k in fails}

In [34]:
with open("gane-failures-toretry.json", 'w') as f:
    f.write(json.dumps(failures))

In [35]:
fails


Out[35]:
[2445,
 2445,
 59584,
 59584,
 6226,
 6226,
 68295,
 68295,
 15906,
 15906,
 48751,
 48751,
 38990,
 38990,
 42329,
 42329,
 3803,
 3803,
 3802,
 3802,
 57929,
 57929,
 68488,
 68488,
 68497,
 68497,
 23640,
 23640,
 68304,
 68304,
 23225,
 23225,
 68492,
 68492,
 43361,
 43361,
 43360,
 43360,
 43363,
 43363,
 43362,
 43362,
 23910,
 23910,
 37246,
 37246,
 66073,
 66073,
 61073,
 61073,
 21537,
 21537,
 23227,
 23227,
 33829,
 33829,
 43347,
 43347,
 43346,
 43346,
 41293,
 41293,
 14976,
 14976,
 67293,
 67293,
 43350,
 43350,
 43352,
 43352,
 43356,
 43356,
 23417,
 23417,
 10810,
 10810,
 54472,
 54472,
 33110,
 33110,
 48752,
 48752,
 14581,
 14581,
 37101,
 37101,
 49832,
 49832,
 16440,
 16440,
 12373,
 12373,
 10447,
 10447,
 62676,
 62676,
 9873,
 9873,
 56922,
 56922,
 61083,
 61083,
 59904,
 59904,
 61362,
 61362,
 68388,
 68388,
 68418,
 68418,
 68416,
 68416,
 68417,
 68417,
 68365,
 68365,
 68363,
 68363]

In [36]:
len(set(fails))


Out[36]:
59

In [37]:
with open('gane-failed-0912.txt') as f:
    last_fails = list(map(int, f.readlines()))

In [38]:
last_failures = {k: gane_tree[k] for k in last_fails}

In [39]:
with open("gane-failures-toretry-last.json", 'w') as f:
    f.write(json.dumps(last_failures))

In [ ]: