In [1]:
import altair as alt
from bs4 import BeautifulSoup
from vega_datasets import data
import gpdvega
import geopandas
import json
import shapely
import re
from matplotlib import pyplot as plt
%matplotlib inline
In [2]:
alt.renderers.enable('notebook') # render for Jupyter Notebook
Out[2]:
In [3]:
portraitRaw = open('../portrait.xml').read()
portraitSoup = BeautifulSoup(portraitRaw)
geos = portraitSoup.find_all('geo')
In [4]:
geotext = [item.text for item in geos]
In [5]:
lineNos = [int(item.parent.parent.find_previous('lb').attrs['n']) for item in geos]
In [6]:
points = [shapely.geometry.point.Point([float(x) for x in pt.strip().split()[-1::-1]]) for pt in geotext]
In [7]:
def clean(text):
return re.sub('\s+', ' ', text.strip().replace('\n', ' '))
In [8]:
names = [clean(item.parent.parent.find('placename').text.strip()) for item in geos]
In [9]:
gdf = geopandas.GeoDataFrame({'name': names, 'lineNo': lineNos, 'geometry': points})
In [10]:
gdf.head()
Out[10]:
From: https://www.naturalearthdata.com/downloads/50m-physical-vectors/50m-land/
In [11]:
gdf2 = geopandas.read_file('mapData/ne_50m_land.shp')
In [12]:
gdf.head()
Out[12]:
In [13]:
gdf2.head()
Out[13]:
In [14]:
gdf.crs = {'init': 'epsg:4326'}
# gdf.crs = {'init': 'epsg:27700'}
# gdf2.crs = {'init': 'epsg:27700'}
In [15]:
gdf.crs
Out[15]:
In [16]:
mercator, britain, americas, web = (4326, 27700, 2163, 3857)
#gdf = gdf.to_crs(epsg=web)
In [17]:
gdf.crs, gdf2.crs
Out[17]:
In [18]:
def lineToPercent(lineNo):
""" Convert the line number to a percentage in the narrative time of the novel,
i.e., 0% is the beginning of the novel, 100% is the end. """
maxLines = {1: 1848, 2: 1458, 3: 1584, 4: 922, 5: 2794}
totalLines = sum(maxLines.values())
chap = int(str(lineNo)[0])
linesIntoChap = lineNo - (chap * 10000)
preceedingLines = sum([maxLines[i] for i in range(1, chap)])
return (preceedingLines + linesIntoChap) / totalLines
In [19]:
gdf['percentNarrative'] = gdf['lineNo'].apply(lineToPercent)
In [20]:
gdf.head()
Out[20]:
In [21]:
gdf.head()
Out[21]:
In [22]:
gdf['longitude'] = gdf['geometry'].apply(lambda pt: pt.x)
gdf['latitude'] = gdf['geometry'].apply(lambda pt: pt.y)
In [23]:
# Slice so that it's only showing the British Isles
gdf = gdf.cx[-11:-5,50:58]
gdf2 = gdf2.cx[-11:-5,50:58]
# Reproject
gdf = gdf.to_crs(epsg=web)
gdf2 = gdf2.to_crs(epsg=web)
In [24]:
gdf.head()
Out[24]:
In [25]:
gdf2.head()
Out[25]:
In [26]:
gdf2 = gdf2[gdf2.index == 136]
In [27]:
# Plot base map
base = alt.Chart(gdf2).mark_geoshape(
fill='white',
stroke='black'
).properties(
width=800,
height=800
)
points = alt.Chart(gdf).project().mark_circle().encode(
latitude = 'latitude',
longitude = 'longitude',
color = 'percentNarrative',
tooltip = 'name').properties(width=800, height=800)
In [28]:
(base + points)
Out[28]: