In [8]:
from IPython.display import HTML
import markdown as md
In [9]:
l = """LOREM ipsum dolor sit amet, _consectetur_ adipiscing elit. Praesent dignissim orci a leo dapibus semper eget sed
sem. Pellentesque tellus nisl, condimentum nec libero id, __cursus consequat__ lectus. Ut quis nulla laoreet, efficitur
metus sit amet, <strike>viverra dui. Nam tempor ornare urna a consequat</strike>. Nulla dolor velit, sollicitudin sit
amet consectetur sed, interdum nec orci. Nunc suscipit tempus est ut porta. <u>Ut non felis a ligula suscipit
posuere quis sit amet elit</u>."""
markdown_text = """
# Heading1
## Heading 2
%s %s %s
## Heading 2
%s
- %s
- %s
- %s
## Heading 2
%s
4. %s
4. %s
4. %s
%s
""" % (l,l,l,l,l,l,l,l,l,l,l,l)
In [10]:
#HTML(md.markdown(markdown_text))
In [11]:
from markdown import markdown as md_markdown
from xml.etree.ElementTree import fromstring as et_fromstring
from xml.etree.ElementTree import tostring as et_tostring
from reportlab.platypus import BaseDocTemplate as plat_BaseDocTemplate
from reportlab.platypus import Frame as plat_Frame
from reportlab.platypus import Paragraph as plat_Paragraph
from reportlab.platypus import PageTemplate as plat_PageTemplate
from reportlab.lib.styles import getSampleStyleSheet as sty_getSampleStyleSheet
from reportlab.lib.pagesizes import A4 as ps_A4
from reportlab.lib.pagesizes import A5 as ps_A5
from reportlab.lib.pagesizes import landscape as ps_landscape
from reportlab.lib.pagesizes import portrait as ps_portrait
from reportlab.lib.units import inch as un_inch
The ReportFactory
class creates a ReportLab document / report object; the idea is that all style information as well as page layouts are collected in this object, so that when a different factory is passed to the writer object the report looks different.
In [12]:
class ReportFactory():
"""create a Reportlab report object using BaseDocTemplate
the report creation is a two-step process
1. instantiate a ReportFactory object
2. retrieve the report using the report() method
note: as it currently stands the report object is remembered in the
factory object, so another call to report() return the _same_ object;
this means that changing the paramters after report() has been called
for the first time will not have an impact
"""
def __init__(self, filename=None):
if filename == None: filename = 'report_x1.pdf'
# f = open (filename,'wb') -> reports can take a file handle!
self.filename = filename
self.pagesize = ps_portrait(ps_A4)
self.showboundary = 0
#PAGE_HEIGHT=defaultPageSize[1]; PAGE_WIDTH=defaultPageSize[0]
self.styles=sty_getSampleStyleSheet()
self.bullet = "\u2022"
self._report = None
@staticmethod
def static_page(canvas,doc):
"""template for report page
this template defines how the standard page looks (header, footer, background
objects; it does _not_ define the flow objects though, as those are separately
passed to the PageTemplate() function)
"""
canvas.saveState()
canvas.setFont('Times-Roman',9)
canvas.drawString(un_inch, 0.75 * un_inch, "Report - Page %d" % doc.page)
canvas.restoreState()
def refresh_styles(self):
"""refresh all styles
derived ReportLab styles need to be refreshed in case the parent style
has been modified; this does not really work though - it seems that the
styles are simply flattened....
"""
style_names = self.styles.__dict__['byName'].keys()
for name in style_names:
self.styles[name].refresh()
def report(self):
"""initialise a report object
this function initialised and returns a report object, based on the properties
set on the factory object at this point (note: the report object is only generated
_once_ and subsequent calls return the same object;this implies that most property
changes after this function has been called are not taken into account)
"""
if self._report == None:
rp = plat_BaseDocTemplate(self.filename,showBoundary=self.showboundary, pagesize=self.pagesize)
frame_page = plat_Frame(rp.leftMargin, rp.bottomMargin, rp.width, rp.height, id='main')
pagetemplates = [
plat_PageTemplate(id='Page',frames=frame_page,onPage=self.static_page),
]
rp.addPageTemplates(pagetemplates)
self._report = rp
return self._report
The ReportWriter
object executes the conversion from markdown to pdf. It is currently very simplistic - for example there is no entry hook for starting the conversion at the html level rather than at markdown, and only a few basic tags are implemented.
In [13]:
class ReportWriter():
def __init__(self, report_factory):
self._simple_tags = {
'h1' : 'Heading1',
'h2' : 'Heading2',
'h3' : 'Heading3',
'h4' : 'Heading4',
'h5' : 'Heading5',
'p' : 'BodyText',
}
self.rf = report_factory
self.report = report_factory.report();
def _render_simple_tag(self, el, story):
style_name = self._simple_tags[el.tag]
el.tag = 'para'
text = et_tostring(el)
story.append(plat_Paragraph(text,self.rf.styles[style_name]))
def _render_ol(self, el, story):
return self._render_error(el, story)
def _render_ul(self, ul_el, story):
for li_el in ul_el:
li_el.tag = 'para'
text = et_tostring(li_el)
story.append(plat_Paragraph(text,self.rf.styles['Bullet'], bulletText=self.rf.bullet))
def _render_error(self, el, story):
story.append(plat_Paragraph(
"<para fg='#ff0000' bg='#ffff00'>cannot render '%s' tag</para>" % el.tag,self.rf.styles['Normal']))
@staticmethod
def html_from_markdown(mdown, remove_newline=True, wrap=True):
"""convert markdown to html
mdown - the markdown to be converted
remove_newline - if True, all \n characters are removed after conversion
wrap - if True, the whole html is wrapped in an <html> tag
"""
html = md_markdown(mdown)
if remove_newline: html = html.replace("\n", "")
if wrap: html = "<html>"+html+"</html>"
return html
@staticmethod
def dom_from_html(html, wrap=False):
"""convert html into a dom tree
html - the html to be converted
wrap - if True, the whole html is wrapped in an <html> tag
"""
if wrap: html = "<html>"+html+"</html>"
dom = et_fromstring(html)
return (dom)
@staticmethod
def dom_from_markdown(mdown):
"""convert markdown into a dom tree
mdown - the markdown to be converted
wrap - if True, the whole html is wrapped in an <html> tag
"""
html = ReportWriter.html_from_markdown(mdown, remove_newline=True, wrap=True)
dom = ReportWriter.dom_from_html(html, wrap=False)
return (dom)
def create_report(self, mdown):
"""create report and write it do disk
mdown - markdown source of the report
"""
dom = self.dom_from_markdown(mdown)
story = []
for el in dom:
if el.tag in self._simple_tags:
self._render_simple_tag(el, story)
elif el.tag == 'ul':
self._render_ul(el, story)
elif el.tag == 'ol':
self._render_ol(el, story)
else:
self._render_error(el, story)
self.report.build(story)
create a standard report (A4, black text etc)
In [14]:
rfa4 = ReportFactory('report_a4.pdf')
pdfw = ReportWriter(rfa4)
pdfw.create_report(markdown_text*10)
create a second report with different parameters (A5, changed colors etc; the __dict__
method shows all the options that can be modified for changing styles)
In [15]:
#rfa5.styles['Normal'].__dict__
In [16]:
rfa5 = ReportFactory('report_a5.pdf')
rfa5.pagesize = ps_portrait(ps_A5)
#rfa5.styles['Normal'].textColor = '#664422'
#rfa5.refresh_styles()
rfa5.styles['BodyText'].textColor = '#666666'
rfa5.styles['Bullet'].textColor = '#666666'
rfa5.styles['Heading1'].textColor = '#000066'
rfa5.styles['Heading2'].textColor = '#000066'
rfa5.styles['Heading3'].textColor = '#000066'
In [17]:
pdfw = ReportWriter(rfa5)
pdfw.create_report(markdown_text*10)