In [1]:
# Allow to import without installing
import sys
sys.path.insert(0, "..")

Pythonify

If you have a reasonable amount of ram, then it's possible to load quite big XML files fully into memory and to general python dictionaries from them. These can then be saved out in compressed pickled format.

This is a low-tech, non-portable solution for subsequently quickly extracting data, but it's also quick and easy.


In [2]:
import osmdigest.pythonify as pythonify

import os
basedir = os.path.join("/media/disk", "OSM_Data")
filename = "illinois-latest.osm.xz"

Extract tags


In [3]:
tags = pythonify.Tags(os.path.join(basedir, filename))

In [6]:
pythonify.pickle(tags, "illinois_tags.pic.xz")
os.stat("illinois_tags.pic.xz").st_size / 1024**2


Out[6]:
20.2431640625

In [9]:
tags.nodes_from_key("name")[:5]


Out[9]:
[('Aurora Toll Plaza', 701092),
 ('Aurora Toll Plaza', 235231121),
 ('Aurora Toll Plaza', 461515840),
 ('Aurora Toll Plaza', 461515849),
 ('River Road Toll Plaza', 701654)]

In [10]:
tags_by_id = pythonify.TagsById(tags)

In [11]:
tags_by_id.node(701092)


Out[11]:
{'barrier': 'toll_booth',
 'name': 'Aurora Toll Plaza',
 'operator': 'Illinois State Toll Highway Authority',
 'ref': '61'}

Load tags back in


In [3]:
tags = pythonify.unpickle("illinois_tags.pic.xz")

In [4]:
list(tags.all_relation_tag_keys)[:5]


Out[4]:
['roundtrip', 'name:ba', 'tower:type', 'name:diq', 'name:kl']

In [6]:
tags.relations_from_key("tower:type")


Out[6]:
[('climbing', 5813084)]

In [7]:
pythonify.TagsById(tags).relation(5813084)


Out[7]:
{'layer': '2',
 'leisure': 'pitch',
 'name': 'Climbing Park',
 'sport': 'climbing',
 'tower:type': 'climbing',
 'type': 'multipolygon'}

Extract nodes

This is typically the most memory intensive operation.


In [10]:
nodes = pythonify.Nodes(os.path.join(basedir, filename))

In [11]:
pythonify.pickle(nodes, "illinois_nodes.pic.xz")
os.stat("illinois_nodes.pic.xz").st_size / 1024**2


Out[11]:
140.4869499206543

Extract ways and relations


In [3]:
ways = pythonify.Ways(os.path.join(basedir, filename))

In [5]:
pythonify.pickle(ways, "illinois_ways.pic.xz")
os.stat("illinois_ways.pic.xz").st_size / 1024**2


Out[5]:
31.613712310791016

In [8]:
relations = pythonify.Relations(os.path.join(basedir, filename))

In [9]:
pythonify.pickle(relations, "illinois_relations.pic.xz")
os.stat("illinois_relations.pic.xz").st_size / 1024**2


Out[9]:
1.08172607421875

Load back node data and recompress


In [3]:
nodes = pythonify.unpickle("illinois_nodes.pic.xz")

In [4]:
nodes = pythonify.NodesPacked.from_Nodes(nodes)

In [7]:
i = iter(nodes)
for j in range(10):
    print(next(i))


(219850, (-87.9101245, 41.7585879))
(219851, (-87.9076432, 41.7593116))
(700724, (-88.0158606, 41.7120272))
(700725, (-88.0116119, 41.7142377))
(700726, (-88.007417, 41.716384))
(700727, (-88.0091658, 41.7154871))
(700728, (-88.0029645, 41.7187545))
(700729, (-88.0005612, 41.7199717))
(700731, (-87.9887166, 41.7258174))
(700732, (-87.9915919, 41.7245362))

In [8]:
nodes[700732]


Out[8]:
(-87.9915919, 41.7245362)

In [9]:
pythonify.pickle(nodes, "illinois_nodes_packed.pic.xz")
os.stat("illinois_nodes_packed.pic.xz").st_size / 1024**2


Out[9]:
75.53979110717773

Load back way data


In [11]:
ways = pythonify.unpickle("illinois_ways.pic.xz")

In [15]:
print(next(iter(ways)))


(3819179, [20326165, 33235915, 2754748538, 2754748544, 33235916, 33235917, 33235918, 33235919, 33235920, 33235921, 33235922, 2754748527, 33235924, 33235925, 33235926, 33235927, 4320607048, 33235928, 2754748488, 2754748494, 2754748497, 4320607050, 4320607049, 2754748491, 33235929, 33235888, 33235889, 33235890, 158765688, 19222947, 19222948, 19222949, 19222950, 19222951, 19222952, 19222953, 19222954, 19222955, 19222956, 19222957, 19222958, 19222959, 19222960, 19222961, 158756608, 19222962, 19222963, 19222964, 19222965, 19222966, 19222967, 2754748511, 2754748502, 2754748500, 2754748505, 2754748508, 19222968, 19222969, 19222970, 19222971, 19222972, 2754748530, 2754748533, 2754748536, 19222973, 19222974, 19222975, 19222976, 19222977, 19222978, 2754748563, 2754748566, 19222979, 19222980, 19222981, 19222982, 19222983, 19222984, 19222985, 19222986, 4320607224, 19222987, 2754748586, 19222988, 19222989, 19222990, 2754748600, 19222991, 19222992, 19222993, 19222997, 19223002, 19223007, 19223008, 19223009, 2754748636, 19223010, 2754748642, 2754748656, 19223011, 19223012, 2754748671, 2754748680, 2754748678, 19223013, 19223014, 19223015, 19223016, 19223017, 19223018, 2754748685, 19223019, 19223020, 19223021, 19223022, 19223023, 19223024, 19223025, 19223026, 19223027, 19223028, 19223029, 19223030, 19223031, 2754748661, 19223032, 2754748645, 19223033, 2754748629, 19223034, 19223035, 19223036, 19223037, 19223038, 19223039, 19223040, 19223041, 19223042, 19223043, 2754748609, 19223044, 2754748606, 2754748597, 2754748593, 19223045, 19223046, 19223047, 19223048, 19223049, 19223050, 19223051, 19223052, 19223053, 19223054, 4320620131, 19223055, 19223056, 19223057, 19223058, 19223059, 19223060, 19223061, 19223062, 19223063, 19223064, 19223065, 19223066, 19223067, 19223068, 19223069, 19223070, 19223071, 19223072, 19223073, 19223074, 19223075, 19223076, 19223077, 19223078, 19223079, 19223080, 4320606992, 19223081, 19223082, 19223083, 19223084, 19223085, 19223086, 19223087, 2754748560, 2754748555, 2754748549, 2754748547, 19223088, 2754748541, 19223089, 19223090, 2754748522, 19223091, 19223092, 19223093, 19223094, 19223095, 19223096, 19223097, 19223098, 19223099, 19223100, 19223101, 19223102, 19223103, 19223104, 19223105, 19223106, 19223107, 19223108, 19223109, 19223110, 19223111, 19223112, 19223113, 19223114, 19223115, 19223116, 19223117, 19223118, 19223119, 19223120, 19223121, 19223122, 19223123, 19223124, 19223125, 19223126, 19223127, 19223128, 19223129, 19223130, 19223131, 19223132, 19223133, 19223134, 19223135, 19223136, 19223137, 19223138, 19223139, 19223140, 19223141, 19223142, 158758901, 19223143, 19223144, 19223145, 19223146, 19223147, 19223148, 19223149, 19223150, 19223151, 19223152, 19223153, 19223154, 19223155, 19223156, 19223157, 19223158, 19223159, 19223160, 19223161, 19223162, 19223163, 19223164, 19223165, 19223166, 19223167, 19223168, 19223169, 19223170, 19223171, 2754748475, 19223172, 2754748472, 19223173, 19223174, 19223175, 19223176, 19223177, 19223178, 19223179, 19223180, 19223181, 19223182, 19223183, 19223184, 19223185, 19223186, 19223187, 19223188, 19223189, 19223190, 2754748454, 2754748452, 19223191, 2754748449, 19223192, 19223193, 19223194])

In [17]:
nodes[20326165], nodes[33235915]


Out[17]:
((-85.201651, 46.0517207), (-85.2023348, 46.051174))

Process California data in one go


In [2]:
import osmdigest.pythonify as pythonify

import os
basedir = os.path.join("/media/disk", "OSM_Data")
filename = "california-latest.osm.xz"

In [ ]:
pythonify.pythonify_and_pickle(os.path.join(basedir, filename), os.path.join(basedir, "california"))