In [ ]:
from lib import read_mkdocs
In [ ]:
from pyprojroot import here
In [ ]:
mkdocs_config = read_mkdocs()
nav = mkdocs_config["nav"]
docroot = here() / "docs"
In [ ]:
from lib import parse_navigation
# The goal here is to flatten the tree structure into a list of 2-tuples,
# where the title is the first element and the filename is the second element.
title_files = parse_navigation(nav, [])
title_files.insert(0, ('Preface', 'preface/preface.md'))
title_files
In [ ]:
from lib import exclude
exclusion = [
"Welcome",
"Get Setup",
"Prerequisites",
"Further Learning",
"Style Guide",
]
title_files = exclude(title_files, titles=exclusion)
In [ ]:
title_files
We now need to convert each of the files into Markua.
In [ ]:
strings = ["df.head()", "another_thing\ndf2.head()"]
def replace_dataframe_with_markdown(s: str):
new_string = ""
for line in s.split("\n"):
if (line.endswith(".head()")
or line.endswith(".describe()")
or line.endswith("correlation_centrality(graphs[0])")
or line.endswith("find_connected_persons(G, 'p2', 'c10')")
):
line = f"print({line}.to_markdown())"
new_string += line + "\n"
return new_string
# replace_dataframe_head_with_markdown(strings[0])
In [ ]:
def replace_render_html_with_raw(s: str):
new_string = ""
for line in s.split("\n"):
if line.startswith("render_html"):
line = line.replace("render_html", "")[1:-1]
new_string += line + "\n"
return new_string
In [ ]:
def replace_admonition(src: str):
if src.startswith("???"):
new_text = ""
for line in src.split("\n"):
if line.startswith("???"):
line = "*Note:*"
line = line.replace(" ", "")
new_text += line + "\n"
return new_text
return src
text = """??? note "Geospatial Viz"
As the creator of `nxviz`,
I would recommend using proper geospatial packages
to build custom geospatial graph viz,
such as [`pysal`](http://pysal.org/).)
That said, `nxviz` can probably do what you need
for a quick-and-dirty view of the data.
"""
print(replace_admonition(text))
In [ ]:
def replace_markdown_table_tabs(body: str):
return body.replace(" |", "|")
In [ ]:
from nbconvert.exporters import MarkdownExporter
from nbformat.notebooknode import NotebookNode
from nbconvert.preprocessors import ExecutePreprocessor
from lib import strip_execution_count
def nb2markdown(nb: NotebookNode, kernel: str):
"""
Compile final notebook into a single PDF while executing it.
:param nb: The compiled notebook object with all notebook cells.
:param kernel: String name of the kernel to output.
"""
# Convert all `.head()` to `.head().to_markdown()`
# before execution
for i, cell in enumerate(nb["cells"]):
src = nb["cells"][i]["source"]
src = (
src
.replace("HTML(anim(G2, msg, n_frames=4).to_html5_video())", "# HTML(anim(G2, msg, n_frames=4).to_html5_video())")
)
src = replace_dataframe_with_markdown(src)
src = replace_render_html_with_raw(src)
src = replace_admonition(src)
nb["cells"][i]["source"] = src
ep = ExecutePreprocessor(timeout=600, kernel_name=kernel)
ep.preprocess(nb)
strip_execution_count(nb)
pdf_exporter = MarkdownExporter()
body, resources = pdf_exporter.from_notebook_node(nb)
return body, resources
In [ ]:
from lib import read_notebook
In [ ]:
sample_chapters = ["Preface", "Learning Goals", "Introduction to Graphs", "The NetworkX API"]
In [ ]:
# Now, convert everything into plain text markdown.
In [ ]:
from pathlib import Path
from pyprojroot import here
build_dir = here() / "manuscript"
build_dir.mkdir(parents=True, exist_ok=True)
images_dir = build_dir / "images"
images_dir.mkdir(parents=True, exist_ok=True)
In [ ]:
def nth_repl_all(string: str, substring: str, replacement: str, nth: int) -> str:
"""Replace nth string with substring."""
find = string.find(substring)
# loop util we find no match
i = 1
while find != -1:
# if i is equal to nth we found nth matches so replace
if i == nth:
string = string[:find] + replacement + string[find + len(substring):]
i = 0
# find + len(sub) + 1 means we start after the last match
find = string.find(substring, find + len(substring) + 1)
i += 1
return string
In [ ]:
def mdlatex2lfmlatex(text):
text = nth_repl_all(text, substring="$$", replacement="{@@}", nth=1)
text = nth_repl_all(text, substring="$", replacement="{@@}", nth=1)
text = nth_repl_all(text, substring="{@@}", replacement="{$$}", nth=1)
text = nth_repl_all(text, substring="{$$}", replacement="{/$$}", nth=2)
return text
In [ ]:
In [ ]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
book_txt = ""
files_to_validate = []
for chapter, fpath_str in title_files:
logging.info(f"Processing chapter {chapter}")
fpath = Path(fpath_str)
source_path = docroot / fpath
# Handle notebooks
if source_path.suffix == ".ipynb":
text, resources = nb2markdown(read_notebook(source_path), kernel="nams")
# Handle markdown files
else:
with open(source_path, "r+") as f:
text = f.read()
resources = dict()
resources["outputs"] = dict()
text = f"# {chapter}\n\n" + text
if chapter in sample_chapters:
insert = "{sample: true}\n\n"
text = insert + text
# More processing: Replace all output_* with <relative_dir>_md_<autogen_numbers>
img_prefix = str(fpath.with_suffix(".md")).replace("/", "_").replace(".", "_") + "_"
text = text.replace("output_", "images/" + img_prefix)
# More processing: Leanpub Flavoured Markdown uses {$$} to delineate LaTeX.
# text = text.replace("$$", "{$$}")
text = mdlatex2lfmlatex(text)
# More preprocessing: Clean up tabs for all of the markdown tables
text = replace_markdown_table_tabs(text)
markdown_dir = (build_dir / fpath).with_suffix(".md")
markdown_dir.mkdir(parents=True, exist_ok=True)
# Write the text out
with open(markdown_dir / "index.md", "w+") as f:
f.write(text)
files_to_validate.append(markdown_dir / "index.md")
# Write the resources out
for k, v in resources["outputs"].items():
k = k.replace("output_", img_prefix)
logging.debug(f"image filename = {k}")
with open(images_dir / k, "wb") as f:
f.write(v)
book_txt = book_txt + str(fpath.with_suffix(".md") / "index.md") + "\n"
In [ ]:
# hack to find all images embedded in markdown and copy it to manuscript/images
from shutil import copyfile
for image in docroot.glob('**/images/*.png'):
copyfile(image, images_dir / image.name)
In [ ]:
with open(build_dir / "Book.txt", "w+") as f:
f.write(book_txt)
In [ ]:
def has_html(s: str) -> bool:
tag_openers = ["<li", "<ul", "<ol", "<span", "<p", "<em"]
for tag in tag_openers:
if tag in s:
return True
return False
In [ ]:
# Print a line if it's got problems
for fpath in files_to_validate:
with open(fpath, "r+") as f:
for line in f.readlines():
if line.startswith(" |"):
print(fpath, line)
if line.startswith("???"):
print(fpath, line)
if has_html(line):
print(fpath, line)
if " '| |" in line:
print(fpath, line)