Markdown 2 Reportlab - Mark III

Markdown

Here we create some lorem ipsum markdown text for testing


In [1]:
from IPython.display import HTML
import markdown as md

In [2]:
l = """LOREM ipsum dolor sit amet, _consectetur_ adipiscing elit. Praesent dignissim orci a leo dapibus semper eget sed 
sem. Pellentesque tellus nisl, condimentum nec libero id, __cursus consequat__ lectus. Ut quis nulla laoreet, efficitur 
metus sit amet, <strike>viverra dui. Nam tempor ornare urna a consequat</strike>. Nulla dolor velit, sollicitudin sit 
amet consectetur sed, interdum nec orci. Nunc suscipit tempus est ut porta. <u>Ut non felis a ligula suscipit 
posuere quis sit amet elit</u>."""

markdown_text = """
# Heading1
## Heading 2

%s %s %s


## Heading 2

%s

- %s
- %s
- %s

## Heading 2

%s

4. %s
4. %s
4. %s

%s
""" % (l,l,l,l,l,l,l,l,l,l,l,l)

In [3]:
#HTML(md.markdown(markdown_text))

Images

here we create some images for testing


In [4]:
import matplotlib.pyplot as plt
from io import BytesIO

fig = plt.figure(figsize=(12, 9))
plt.plot([1,2,4,8,16], 'o-', label='First Line')
plt.plot([2,4,6,8,10], 'x--', label='Second Line')
plt.ylabel('some numbers')
plt.xlabel('some numbers')
plt.title('some title')
plt.legend()
plt.show()

fig.savefig("figure.jpeg", format='jpeg')
imgdata = BytesIO()
fig.savefig(imgdata, format='jpeg')
imgdata.seek(0)  # rewind the data


Out[4]:
0

ReportLab

import the necessary functions one by one


In [5]:
from markdown import markdown as md_markdown

from xml.etree.ElementTree import fromstring as et_fromstring
from xml.etree.ElementTree import tostring as et_tostring

from PIL import Image

import reportlab.platypus as plat
from reportlab.lib.styles import getSampleStyleSheet as sty_getSampleStyleSheet
from reportlab.lib.pagesizes import A4 as ps_A4
from reportlab.lib.pagesizes import A5 as ps_A5
from reportlab.lib.pagesizes import landscape as ps_landscape
from reportlab.lib.pagesizes import portrait as ps_portrait
from reportlab.lib.units import inch as un_inch
from reportlab.pdfgen.canvas import pdfmetrics as cnv_pdfmetrics

The ReportFactory class creates a ReportLab document / report object; the idea is that all style information as well as page layouts are collected in this object, so that when a different factory is passed to the writer object the report looks different.


In [6]:
class ReportFactory():
    """create a Reportlab report object using BaseDocTemplate
    
    the report creation is a two-step process
    
    1. instantiate a ReportFactory object
    2. retrieve the report using the report() method
    
    note: as it currently stands the report object is remembered in the
    factory object, so another call to report() return the _same_ object;
    this means that changing the paramters after report() has been called
    for the first time will not have an impact
    """
    
    def __init__(self, filename=None):      
        if filename == None: filename = 'report_x1.pdf'
        # f = open (filename,'wb') -> reports can take a file handle!
        self.filename = filename
        self.pagesize = ps_portrait(ps_A4)
        self.showboundary = 0
        #PAGE_HEIGHT=defaultPageSize[1]; PAGE_WIDTH=defaultPageSize[0]
        self.styles=sty_getSampleStyleSheet()
        self.bullet = "\u2022"
        self._report = None
 
    @staticmethod
    def static_page(canvas,doc):
        """template for report page
        
        this template defines how the standard page looks (header, footer, background
        objects; it does _not_ define the flow objects though, as those are separately
        passed to the PageTemplate() function)
        """
        canvas.saveState()
        canvas.setFont('Times-Roman',9)
        canvas.drawString(un_inch, 0.75 * un_inch, "Report - Page %d" % doc.page)
        canvas.restoreState()
    
    def refresh_styles(self):
        """refresh all styles
        
        derived ReportLab styles need to be refreshed in case the parent style
        has been modified; this does not really work though - it seems that the
        styles are simply flattened....
        """
        style_names = self.styles.__dict__['byName'].keys()
        for name in style_names:
            self.styles[name].refresh()
            
    def report(self):
        """initialise a report object
        
        this function initialised and returns a report object, based on the properties
        set on the factory object at this point (note: the report object is only generated
        _once_ and subsequent calls return the same object;this implies that most property
        changes after this function has been called are not taken into account)
        """
        if self._report == None:
            rp = plat.BaseDocTemplate(self.filename,showBoundary=self.showboundary, pagesize=self.pagesize)
            frame_page = plat.Frame(rp.leftMargin, rp.bottomMargin, rp.width, rp.height, id='main')
            pagetemplates = [
                plat.PageTemplate(id='Page',frames=frame_page,onPage=self.static_page),
            ]
            rp.addPageTemplates(pagetemplates)
            self._report = rp
        return self._report

The ReportWriter object executes the conversion from markdown to pdf. It is currently very simplistic - for example there is no entry hook for starting the conversion at the html level rather than at markdown, and only a few basic tags are implemented.


In [7]:
class ReportWriter():
    
    def __init__(self, report_factory):
        self._simple_tags = {
            'h1'     : 'Heading1',
            'h2'     : 'Heading2',
            'h3'     : 'Heading3',
            'h4'     : 'Heading4',
            'h5'     : 'Heading5',
            'p'      : 'BodyText',
        }
        self.rf = report_factory
        self.report = report_factory.report()
        self.story = []
        
    def _render_simple_tag(self, el):
        style_name = self._simple_tags[el.tag]
        el.tag = 'para'
        text = et_tostring(el)
        self.story.append(plat.Paragraph(text,self.rf.styles[style_name]))
        
    def _render_ol(self, el):
        return self._render_error(el)
    
    def _render_ul(self, ul_el):
        for li_el in ul_el:
            li_el.tag = 'para'
            text = et_tostring(li_el)
            self.story.append(plat.Paragraph(text,self.rf.styles['Bullet'], bulletText=self.rf.bullet))
    
    def _render_error(self, el):
        self.story.append(plat.Paragraph(
            "<para fg='#ff0000' bg='#ffff00'>cannot render '%s' tag</para>" % el.tag,self.rf.styles['Normal']))
    
    @staticmethod
    def html_from_markdown(mdown, remove_newline=True, wrap=True):
        """convert markdown to html
        
        mdown - the markdown to be converted
        remove_newline - if True, all \n characters are removed after conversion
        wrap - if True, the whole html is wrapped in an <html> tag
        """
        html = md_markdown(mdown)
        if remove_newline: html = html.replace("\n", "")
        if wrap: html = "<html>"+html+"</html>"
        return html
    
    @staticmethod
    def dom_from_html(html, wrap=False):
        """convert html into a dom tree
        
        html - the html to be converted
        wrap - if True, the whole html is wrapped in an <html> tag 
        """
        if wrap: html = "<html>"+html+"</html>"
        dom = et_fromstring(html)
        return (dom)
    
    @staticmethod
    def dom_from_markdown(mdown):
        """convert markdown into a dom tree
        
        mdown - the markdown to be converted
        wrap - if True, the whole html is wrapped in an <html> tag 
        """
        html = ReportWriter.html_from_markdown(mdown, remove_newline=True, wrap=True)
        dom = ReportWriter.dom_from_html(html, wrap=False)
        return (dom)
    
    def append_from_html(self, html, wrap=True):
        """append to the story from an html text
        
        html - the html text
        wrap - whether the html must be wrapped into a single tag before rendering it into
                the DOM tree (the tag itself does not matter, but there must be one single
                root tag)
        
        Note: the html is converted into a DOM element, and then append_from_dom() is called
        """
        dom = self.dom_from_html(html, wrap)
        return self._append_from_dom(dom)
    
    def append_from_mdown(self, mdown):
        """append to the story from a markdown text
        
        mdown - the markdown text
        
        Note: the markdown is first converted into html and then into a DOM element,
        and then append_from_dom() is called
        """
        dom = self.dom_from_markdown(mdown)
        return self._append_from_dom(dom)
        
        
    def _append_from_dom(self, dom):
        """append to the story from a dom tree
        
        dom - the root element of the dom tree (note: the dom tree is modified!)
        
        Note: usually one would use append_from_mdown() or append_from_html() that in turn
        call this function here, rather than calling it directly
        """
        for el in dom:
            if el.tag in self._simple_tags:
                self._render_simple_tag(el)
            elif el.tag == 'ul':
                self._render_ul(el)
            elif el.tag == 'ol':
                self._render_ol(el)
            else:
                self._render_error(el)
        
        return self.story
    
    
    def append_from_image(self, imgfile, w=None, h=None):
        """append an image to the story
        
        imgfile - an image fileobject (including BytesIO)
        w,h - width and height of the image; both None -> page width
        
        note: ideally the image is in jpeg format
        """
        imgfile.seek(0)
        img = Image.open(imgfile)
        sx, sy = img.size
        ratio = sy/sx
        
        if w == None and h == None:
            w1 = 6*un_inch # TODO: this is _not_ the correct page width
            h1 = int(ratio * w1)
        elif h==None:
            w1 = w
            h1 = int(ratio * w)
        elif w==None:
            w1 = int(h/ratio)
            h1 = h
            
        imgfile.seek(0)
        plat_im = plat.Image(imgfile, width=w1, height=h1)
        plat_im.hAlign = 'CENTER'
        self.story.append(plat_im)
    
    
    def append_from_story(self, story):
        """append to the story from another story (or a single story element)

        story - the story array, containing the story elements (or a single story element)
        """
        if type(story) == 'list':
            self.story.extend(story)
        else:
            self.story.append(story)

    
    def finalise(self):
        """create the report, based on the story collected in the object
        """
        self.report.build(self.story)

Tables


In [8]:
class TableFactory():
    """create ReportLab table flowable, including style information
    
    METHODS
       
       create - returns a styled table object (that's the main method
                  of this class; the other one's are helpers)
       
       style - returns a style object
       getFonts - returns a list of all available fonts
       
    
    Note: most of the time the only function needed is create()
       
    
    PROPERTIES
    
        bgcolor                      - generic table background color
        txtcolor                     - generic table text color
        bgcolor_headings             - bgcolor for heading cells (None=use bgcolor)
        txtcolor_headings            - txtcolor for heading cells (None=use txtcolor)
        hline_color                  - color for horizontal gridlines (None=no lines)
        hline_heading                - color for horizontal line separating heading from body (None = no line)
        hline_topbot                 - color for horizontal line above and below the table
        vline_color                  - color for vertical gridlines (None=no lines)
        vline_heading                - color for vertical line separating heading from body (None = no line)
        vline_topbot                 - color for vertical line left and right of the table
    
    
    """
    def __init__(self):
        
        self.bgcolor  = '#ffffff'
        self.txtcolor = '#000000'
        self.bgcolor_headings  = '#ffffff'
        self.txtcolor_headings = None
        self.gridline_color = '#aaaaaa'
        self.hline_heading = '#000000'
        self.hline_topbot = '#000000'
        self.vline_heading = '#000000'
        self.vline_leftright = None
        
        self.font = None
        self.font_headings = None
        
    @staticmethod
    def getFonts():
        """returns list of all legal font names
        """
        return cnv_pdfmetrics.getRegisteredFontNames()
        
    def style(self, num_heading_rows=1, num_heading_cols=0):
        """get the table style object
        
        creates a new style object based on the properties set in self at the point
        the style object is created
        
        PARAMETERS
        
            num_heading_rows - number of table rows on top that are considered headings
            num_heading_cols - number of table cols on the left that are considered headings
       
       
        the style object returned can be modified using
        
            style.add(...)
            
        alternatively the table object can be modified using (this command adds the commands
        to the style, it does not replace them)
        
            Table.setStyle(...)
        
        for reference, here the table style commands (cf reference guide, p75++)
        
        CELL COMMANDS
        
            FONT	- takes fontname, optional fontsize and optional leading.
            FONTNAME (or FACE)	- takes fontname.
            FONTSIZE (or SIZE)	- takes fontsize in points; leading may get out of sync.
            LEADING	- takes leading in points.
            TEXTCOLOR	- takes a color name or (R,G,B) tuple.
            ALIGNMENT (or ALIGN)	- takes one of LEFT, RIGHT and CENTRE (or CENTER) or DECIMAL.
            LEFTPADDING	- takes an integer, defaults to 6.
            RIGHTPADDING	- takes an integer, defaults to 6.
            BOTTOMPADDING	- takes an integer, defaults to 3.
            TOPPADDING	- takes an integer, defaults to 3.
            BACKGROUND	- takes a color.
            ROWBACKGROUNDS	- takes a list of colors to be used cyclically.
            COLBACKGROUNDS	- takes a list of colors to be used cyclically.
            VALIGN	- takes one of TOP, MIDDLE or the default BOTTOM


        LINE COMMANDS
        
            BOX / OUTLINE	- takes width, color
            GRID	- takes width, color
            INNERGRID	- takes width, color
            LINEBELOW	- takes width, color
            LINEABOVE	- takes width, color
            LINEBEFORE	- takes width, color
            LINEAFTER	- takes width, color
        
        OTHER COMMANDS
        
            SPAN	- no additional arguments


        splitlast, splitfirst 	special indices to indicate split rows or cols

        """
        
        style = plat.TableStyle()
        style.add('VALIGN', (0,0), (-1,-1), 'MIDDLE')
        style.add('ALIGN', (0,0), (-1,-1), 'CENTER')
        if num_heading_cols > 0:
            style.add('ALIGN', (0,0), (num_heading_cols-1,-1), 'LEFT')
        
        if self.bgcolor != None:
            style.add('BACKGROUND', (0,0), (-1,-1), self.bgcolor)

        if self.txtcolor != None:
            style.add('TEXTCOLOR', (0,0), (-1,-1), self.txtcolor)
        
        if self.font != None:
            style.add('FONTNAME', (0,0), (-1,-1), self.font)

        if self.font_headings != None:
            if num_heading_rows > 0:
                style.add('FONTNAME', (0,0),  (-1,num_heading_rows-1),  self.font_headings)
            if num_heading_cols > 0:
                style.add('FONTNAME', (0,0),  (num_heading_cols-1,-1),  self.font_headings)

        if self.gridline_color != None:
             style.add('GRID',(0,0),(-1,-1),0.5,self.gridline_color)
        
        if self.bgcolor_headings != None:
            if num_heading_rows > 0:
                style.add('BACKGROUND', (0,0),  (-1,num_heading_rows-1),  self.bgcolor_headings)
            if num_heading_cols > 0:
                style.add('BACKGROUND', (0,0),  (num_heading_cols-1,-1),  self.bgcolor_headings)
        
        if self.hline_topbot != None:
            style.add('LINEABOVE', (0,0),  (-1,0),  2, self.hline_topbot)
            style.add('LINEBELOW', (0,-1), (-1,-1), 2, self.hline_topbot)
       
        if self.hline_heading != None and num_heading_rows > 0:
            style.add('LINEBELOW', (0, num_heading_rows-1),  (-1, num_heading_rows-1),  2, self.hline_heading)

        if self.vline_heading != None and num_heading_cols > 0:
            style.add('LINEAFTER', (num_heading_rows-1,0),  (num_heading_rows-1,-1),  2, self.hline_heading)

        if self.vline_leftright != None:
            style.add('LINEBEFORE', (0,0),  (0,-1),  2, self.vline_leftright)
            style.add('LINEAFTER', (-1,0), (-1,-1), 2, self.vline_leftright)
       
        return style
    
    
    def create(self, data, w=None, h=None, nhr=1, nhc=0):
        """creates a table object
        
        data - the table data data[row][col]=value
        w - column width (None, value * inch, or [value * inch])
        h - row height (None, value * inch, or [value * inch])
        nhr - number of table rows on top that are considered headings
        nhc - number of table cols on the left that are considered headings

        """
        style = self.style(nhr, nhc)
        table = plat.Table(data, colWidths=w, rowHeights=h, style=style)
        return table

In [13]:
TableFactory.getFonts()


Out[13]:
['Helvetica',
 'Helvetica-Bold',
 'Helvetica-Oblique',
 'Symbol',
 'Times-Roman',
 'ZapfDingbats']

In [10]:
tbl_data = [["", "Col1", "Col2", "Col3"],
       ["Row1", "c11", "c21","c31"],
       ["Row2", "c12", "c22","c32"],
       ["Row3", "c13", "c23","c33"],
       ]
tbl_data2 = [["Col0", "Col1", "Col2", "Col3"],
       ["c01", "c11", "c21","c31"],
       ["c02", "c12", "c22","c32"],
       ["c03", "c13", "c23","c33"],
       ]
colwidths = [1*un_inch,1*un_inch,1*un_inch,1*un_inch]
rowheights = [0.5*un_inch,0.5*un_inch,0.5*un_inch,0.5*un_inch]
#tbl_data

The commands passed to TableStyles come in three main groups which affect the table background, draw lines, or set cell styles.

The first element of each command is its identifier, the second and third arguments determine the cell coordinates of the box of cells which are affected with negative coordinates counting backwards from the limit values as in Python indexing.

  • The coordinates are given as (column, row) which follows the spreadsheet 'A1' model, but not the more natural (for mathematicians) 'RC' ordering.

  • The top left cell is (0, 0) the bottom right is (-1, -1).

  • Depending on the command various extra (???) occur at indices beginning at 3 on.


In [11]:
TF = TableFactory()
TF.bgcolor = '#eeeeff'
TF.txtcolor = '#000033'
TF.txtcolor_headings = '#0000aa'
TF.bgcolor_headings = '#eeeef0'
TF.hline_topbot = '#000000'
TF.vline_leftright = '#000000'
TF.font_headings = 'Helvetica-Bold'
table  = TF.create(tbl_data, w=1.5*un_inch, h=0.5*un_inch, nhr=1, nhc=1)
TF.vline_leftright = None
table2 = TF.create(tbl_data2, w=1.5*un_inch, h=0.3*un_inch, nhr=1, nhc=0)
#style

Create Report

create a standard report (A4, black text etc)


In [12]:
!rm r3port_a4.pdf
rfa4 = ReportFactory('r3port_a4.pdf')
pdfw = ReportWriter(rfa4)
pdfw.append_from_mdown("# Table")
pdfw.append_from_mdown("## Table1")
pdfw.append_from_story(table)
pdfw.append_from_mdown("## Table2")
pdfw.append_from_story(table2)
pdfw.append_from_mdown(markdown_text)
pdfw.append_from_image(imgdata)
pdfw.append_from_mdown(markdown_text)
pdfw.append_from_image(imgdata, h=3*un_inch)
pdfw.append_from_mdown(markdown_text)
pdfw.append_from_image(imgdata, w=3*un_inch)
pdfw.finalise()

In [12]:


In [12]: