In [2]:
from collections import defaultdict
import glob
import json
import pprint
In [3]:
gane_names = []
#for fname in glob.glob("names-0*.json"):
# f = open(fname)
# text = f.read()
# gane_names.extend(json.loads(text))
# f.close()
f = open("tavo-edited-place-names.json")
text = f.read()
gane_names.extend(json.loads(text))
f.close()
print len(gane_names)
In [4]:
gane_tree = defaultdict(dict)
for n in gane_names:
try:
placeURI = n.get('placeURI')
parts = placeURI.split('/')
if len(parts) > 5:
placeURI = '/'.join(parts[:5])
if "pleiades.stoa.org" in placeURI:
branch = int(n.get('GANEid', -1))
n['pid'] = placeURI.rstrip('/').split('/')[-1]
else:
branch = int(n.get('placeURI').split("placeID=")[1])
except:
print n
raise
leaf = int(n.get('GANEid', -1))
gane_tree[branch][leaf] = n
In [5]:
print len(gane_tree) # the number of GANE places
In [6]:
print gane_tree.items()[0]
In [7]:
def in_pleiades(args):
# is a GANE place in Pleiades?
k, v = args
try:
return "pleiades.stoa.org" in v[k].get('placeURI')
except KeyError:
return False
In [8]:
x = gane_tree[13]
print in_pleiades((13, x))
In [9]:
pprint.pprint(x)
In [10]:
for k, v in x.items():
print k, v['title'], v['nameTransliterated']
In [11]:
len(gane_tree)
Out[11]:
In [12]:
from itertools import ifilter
n, y = ifilter(lambda item: len(item[1])>0, filter(lambda a: not(in_pleiades(a)), gane_tree.items())).next()
pprint.pprint(y)
In [13]:
for k, v in y.items():
print k, v['title'], v['nameTransliterated']
In [14]:
moderns = filter(
lambda x: 'Modern Middle East' in " ".join(x['periods']),
(y for y in x.values() for x in gane_tree.values()) )
print len(moderns)
In [15]:
with open("gane-tree.json", "w") as f:
f.write(json.dumps(gane_tree))
In [16]:
x
Out[16]:
In [17]:
from itertools import chain
all_periods = set(chain(*[n['periods'] for n in x.values()]))
print all_periods, len(all_periods)
In [18]:
def get_accuracy(name):
main_map = name.get('main-map')
if main_map:
return main_map.get('accuracy')
else:
return None
points = sorted(filter(
lambda t: t[0] and t[2].get('extent'),
[(get_accuracy(v), k, v) for
k, v in x.items()] ))
print points[0]
In [19]:
with open("abbadan.json", "w") as f:
f.write(json.dumps({13: x}))
In [20]:
x.keys()
Out[20]:
In [21]:
13 in x
Out[21]:
In [22]:
print len(gane_tree)
tblisi = []
for pk, cluster in gane_tree.items():
for k, item in cluster.items():
if item['placeURI'].endswith('863903'):
tblisi.append((pk, {k: item}))
print len(tblisi)
with open("tblisi.json", "w") as f:
f.write(json.dumps(dict(tblisi), indent=2))
In [23]:
with open("gane-all.json", "w") as f:
f.write(json.dumps(gane_tree, indent=2))
In [24]:
len([k for k,v in gane_tree.items() if len(v) > 7])
Out[24]:
In [25]:
len(gane_tree)
Out[25]:
In [26]:
seven = [{k:v} for k,v in gane_tree.items() if len(v) > 7][0]
In [27]:
seven[43205][43201]
Out[27]:
In [28]:
t = gane_tree[61261]
print len(t)
In [29]:
with open("tehran.json", "w") as f:
f.write(json.dumps({61261: t}))
In [30]:
with open('gane-failures.txt') as f:
fails = list(map(int, f.readlines()))
In [31]:
len(fails)
Out[31]:
In [32]:
fails[:4]
Out[32]:
In [33]:
failures = {k: gane_tree[k] for k in fails}
In [34]:
with open("gane-failures-toretry.json", 'w') as f:
f.write(json.dumps(failures))
In [35]:
fails
Out[35]:
In [36]:
len(set(fails))
Out[36]:
In [37]:
with open('gane-failed-0912.txt') as f:
last_fails = list(map(int, f.readlines()))
In [38]:
last_failures = {k: gane_tree[k] for k in last_fails}
In [39]:
with open("gane-failures-toretry-last.json", 'w') as f:
f.write(json.dumps(last_failures))
In [ ]: