In [1]:
from IPython.display import HTML
import markdown as md
In [2]:
l = """LOREM ipsum dolor sit amet, _consectetur_ adipiscing elit. Praesent dignissim orci a leo dapibus semper eget sed
sem. Pellentesque tellus nisl, condimentum nec libero id, __cursus consequat__ lectus. Ut quis nulla laoreet, efficitur
metus sit amet, <strike>viverra dui. Nam tempor ornare urna a consequat</strike>. Nulla dolor velit, sollicitudin sit
amet consectetur sed, interdum nec orci. Nunc suscipit tempus est ut porta. <u>Ut non felis a ligula suscipit
posuere quis sit amet elit</u>."""
markdown_text = """
# Heading1
## Heading 2
%s %s %s
## Heading 2
%s
- %s
- %s
- %s
## Heading 2
%s
4. %s
4. %s
4. %s
%s
""" % (l,l,l,l,l,l,l,l,l,l,l,l)
In [3]:
#HTML(md.markdown(markdown_text))
In [4]:
import matplotlib.pyplot as plt
from io import BytesIO
fig = plt.figure(figsize=(12, 9))
plt.plot([1,2,4,8,16], 'o-', label='First Line')
plt.plot([2,4,6,8,10], 'x--', label='Second Line')
plt.ylabel('some numbers')
plt.xlabel('some numbers')
plt.title('some title')
plt.legend()
plt.show()
fig.savefig("figure.jpeg", format='jpeg')
imgdata = BytesIO()
fig.savefig(imgdata, format='jpeg')
imgdata.seek(0) # rewind the data
Out[4]:
In [5]:
from markdown import markdown as md_markdown
from xml.etree.ElementTree import fromstring as et_fromstring
from xml.etree.ElementTree import tostring as et_tostring
from PIL import Image
import reportlab.platypus as plat
from reportlab.lib.styles import getSampleStyleSheet as sty_getSampleStyleSheet
from reportlab.lib.pagesizes import A4 as ps_A4
from reportlab.lib.pagesizes import A5 as ps_A5
from reportlab.lib.pagesizes import landscape as ps_landscape
from reportlab.lib.pagesizes import portrait as ps_portrait
from reportlab.lib.units import inch as un_inch
from reportlab.pdfgen.canvas import pdfmetrics as cnv_pdfmetrics
The ReportFactory
class creates a ReportLab document / report object; the idea is that all style information as well as page layouts are collected in this object, so that when a different factory is passed to the writer object the report looks different.
In [6]:
class ReportFactory():
"""create a Reportlab report object using BaseDocTemplate
the report creation is a two-step process
1. instantiate a ReportFactory object
2. retrieve the report using the report() method
note: as it currently stands the report object is remembered in the
factory object, so another call to report() return the _same_ object;
this means that changing the paramters after report() has been called
for the first time will not have an impact
"""
def __init__(self, filename=None):
if filename == None: filename = 'report_x1.pdf'
# f = open (filename,'wb') -> reports can take a file handle!
self.filename = filename
self.pagesize = ps_portrait(ps_A4)
self.showboundary = 0
#PAGE_HEIGHT=defaultPageSize[1]; PAGE_WIDTH=defaultPageSize[0]
self.styles=sty_getSampleStyleSheet()
self.bullet = "\u2022"
self._report = None
@staticmethod
def static_page(canvas,doc):
"""template for report page
this template defines how the standard page looks (header, footer, background
objects; it does _not_ define the flow objects though, as those are separately
passed to the PageTemplate() function)
"""
canvas.saveState()
canvas.setFont('Times-Roman',9)
canvas.drawString(un_inch, 0.75 * un_inch, "Report - Page %d" % doc.page)
canvas.restoreState()
def refresh_styles(self):
"""refresh all styles
derived ReportLab styles need to be refreshed in case the parent style
has been modified; this does not really work though - it seems that the
styles are simply flattened....
"""
style_names = self.styles.__dict__['byName'].keys()
for name in style_names:
self.styles[name].refresh()
def report(self):
"""initialise a report object
this function initialised and returns a report object, based on the properties
set on the factory object at this point (note: the report object is only generated
_once_ and subsequent calls return the same object;this implies that most property
changes after this function has been called are not taken into account)
"""
if self._report == None:
rp = plat.BaseDocTemplate(self.filename,showBoundary=self.showboundary, pagesize=self.pagesize)
frame_page = plat.Frame(rp.leftMargin, rp.bottomMargin, rp.width, rp.height, id='main')
pagetemplates = [
plat.PageTemplate(id='Page',frames=frame_page,onPage=self.static_page),
]
rp.addPageTemplates(pagetemplates)
self._report = rp
return self._report
The ReportWriter
object executes the conversion from markdown to pdf. It is currently very simplistic - for example there is no entry hook for starting the conversion at the html level rather than at markdown, and only a few basic tags are implemented.
In [7]:
class ReportWriter():
def __init__(self, report_factory):
self._simple_tags = {
'h1' : 'Heading1',
'h2' : 'Heading2',
'h3' : 'Heading3',
'h4' : 'Heading4',
'h5' : 'Heading5',
'p' : 'BodyText',
}
self.rf = report_factory
self.report = report_factory.report()
self.story = []
def _render_simple_tag(self, el):
style_name = self._simple_tags[el.tag]
el.tag = 'para'
text = et_tostring(el)
self.story.append(plat.Paragraph(text,self.rf.styles[style_name]))
def _render_ol(self, el):
return self._render_error(el)
def _render_ul(self, ul_el):
for li_el in ul_el:
li_el.tag = 'para'
text = et_tostring(li_el)
self.story.append(plat.Paragraph(text,self.rf.styles['Bullet'], bulletText=self.rf.bullet))
def _render_error(self, el):
self.story.append(plat.Paragraph(
"<para fg='#ff0000' bg='#ffff00'>cannot render '%s' tag</para>" % el.tag,self.rf.styles['Normal']))
@staticmethod
def html_from_markdown(mdown, remove_newline=True, wrap=True):
"""convert markdown to html
mdown - the markdown to be converted
remove_newline - if True, all \n characters are removed after conversion
wrap - if True, the whole html is wrapped in an <html> tag
"""
html = md_markdown(mdown)
if remove_newline: html = html.replace("\n", "")
if wrap: html = "<html>"+html+"</html>"
return html
@staticmethod
def dom_from_html(html, wrap=False):
"""convert html into a dom tree
html - the html to be converted
wrap - if True, the whole html is wrapped in an <html> tag
"""
if wrap: html = "<html>"+html+"</html>"
dom = et_fromstring(html)
return (dom)
@staticmethod
def dom_from_markdown(mdown):
"""convert markdown into a dom tree
mdown - the markdown to be converted
wrap - if True, the whole html is wrapped in an <html> tag
"""
html = ReportWriter.html_from_markdown(mdown, remove_newline=True, wrap=True)
dom = ReportWriter.dom_from_html(html, wrap=False)
return (dom)
def append_from_html(self, html, wrap=True):
"""append to the story from an html text
html - the html text
wrap - whether the html must be wrapped into a single tag before rendering it into
the DOM tree (the tag itself does not matter, but there must be one single
root tag)
Note: the html is converted into a DOM element, and then append_from_dom() is called
"""
dom = self.dom_from_html(html, wrap)
return self._append_from_dom(dom)
def append_from_mdown(self, mdown):
"""append to the story from a markdown text
mdown - the markdown text
Note: the markdown is first converted into html and then into a DOM element,
and then append_from_dom() is called
"""
dom = self.dom_from_markdown(mdown)
return self._append_from_dom(dom)
def _append_from_dom(self, dom):
"""append to the story from a dom tree
dom - the root element of the dom tree (note: the dom tree is modified!)
Note: usually one would use append_from_mdown() or append_from_html() that in turn
call this function here, rather than calling it directly
"""
for el in dom:
if el.tag in self._simple_tags:
self._render_simple_tag(el)
elif el.tag == 'ul':
self._render_ul(el)
elif el.tag == 'ol':
self._render_ol(el)
else:
self._render_error(el)
return self.story
def append_from_image(self, imgfile, w=None, h=None):
"""append an image to the story
imgfile - an image fileobject (including BytesIO)
w,h - width and height of the image; both None -> page width
note: ideally the image is in jpeg format
"""
imgfile.seek(0)
img = Image.open(imgfile)
sx, sy = img.size
ratio = sy/sx
if w == None and h == None:
w1 = 6*un_inch # TODO: this is _not_ the correct page width
h1 = int(ratio * w1)
elif h==None:
w1 = w
h1 = int(ratio * w)
elif w==None:
w1 = int(h/ratio)
h1 = h
imgfile.seek(0)
plat_im = plat.Image(imgfile, width=w1, height=h1)
plat_im.hAlign = 'CENTER'
self.story.append(plat_im)
def append_from_story(self, story):
"""append to the story from another story (or a single story element)
story - the story array, containing the story elements (or a single story element)
"""
if type(story) == 'list':
self.story.extend(story)
else:
self.story.append(story)
def finalise(self):
"""create the report, based on the story collected in the object
"""
self.report.build(self.story)
In [8]:
class TableFactory():
"""create ReportLab table flowable, including style information
METHODS
create - returns a styled table object (that's the main method
of this class; the other one's are helpers)
style - returns a style object
getFonts - returns a list of all available fonts
Note: most of the time the only function needed is create()
PROPERTIES
bgcolor - generic table background color
txtcolor - generic table text color
bgcolor_headings - bgcolor for heading cells (None=use bgcolor)
txtcolor_headings - txtcolor for heading cells (None=use txtcolor)
hline_color - color for horizontal gridlines (None=no lines)
hline_heading - color for horizontal line separating heading from body (None = no line)
hline_topbot - color for horizontal line above and below the table
vline_color - color for vertical gridlines (None=no lines)
vline_heading - color for vertical line separating heading from body (None = no line)
vline_topbot - color for vertical line left and right of the table
"""
def __init__(self):
self.bgcolor = '#ffffff'
self.txtcolor = '#000000'
self.bgcolor_headings = '#ffffff'
self.txtcolor_headings = None
self.gridline_color = '#aaaaaa'
self.hline_heading = '#000000'
self.hline_topbot = '#000000'
self.vline_heading = '#000000'
self.vline_leftright = None
self.font = None
self.font_headings = None
@staticmethod
def getFonts():
"""returns list of all legal font names
"""
return cnv_pdfmetrics.getRegisteredFontNames()
def style(self, num_heading_rows=1, num_heading_cols=0):
"""get the table style object
creates a new style object based on the properties set in self at the point
the style object is created
PARAMETERS
num_heading_rows - number of table rows on top that are considered headings
num_heading_cols - number of table cols on the left that are considered headings
the style object returned can be modified using
style.add(...)
alternatively the table object can be modified using (this command adds the commands
to the style, it does not replace them)
Table.setStyle(...)
for reference, here the table style commands (cf reference guide, p75++)
CELL COMMANDS
FONT - takes fontname, optional fontsize and optional leading.
FONTNAME (or FACE) - takes fontname.
FONTSIZE (or SIZE) - takes fontsize in points; leading may get out of sync.
LEADING - takes leading in points.
TEXTCOLOR - takes a color name or (R,G,B) tuple.
ALIGNMENT (or ALIGN) - takes one of LEFT, RIGHT and CENTRE (or CENTER) or DECIMAL.
LEFTPADDING - takes an integer, defaults to 6.
RIGHTPADDING - takes an integer, defaults to 6.
BOTTOMPADDING - takes an integer, defaults to 3.
TOPPADDING - takes an integer, defaults to 3.
BACKGROUND - takes a color.
ROWBACKGROUNDS - takes a list of colors to be used cyclically.
COLBACKGROUNDS - takes a list of colors to be used cyclically.
VALIGN - takes one of TOP, MIDDLE or the default BOTTOM
LINE COMMANDS
BOX / OUTLINE - takes width, color
GRID - takes width, color
INNERGRID - takes width, color
LINEBELOW - takes width, color
LINEABOVE - takes width, color
LINEBEFORE - takes width, color
LINEAFTER - takes width, color
OTHER COMMANDS
SPAN - no additional arguments
splitlast, splitfirst special indices to indicate split rows or cols
"""
style = plat.TableStyle()
style.add('VALIGN', (0,0), (-1,-1), 'MIDDLE')
style.add('ALIGN', (0,0), (-1,-1), 'CENTER')
if num_heading_cols > 0:
style.add('ALIGN', (0,0), (num_heading_cols-1,-1), 'LEFT')
if self.bgcolor != None:
style.add('BACKGROUND', (0,0), (-1,-1), self.bgcolor)
if self.txtcolor != None:
style.add('TEXTCOLOR', (0,0), (-1,-1), self.txtcolor)
if self.font != None:
style.add('FONTNAME', (0,0), (-1,-1), self.font)
if self.font_headings != None:
if num_heading_rows > 0:
style.add('FONTNAME', (0,0), (-1,num_heading_rows-1), self.font_headings)
if num_heading_cols > 0:
style.add('FONTNAME', (0,0), (num_heading_cols-1,-1), self.font_headings)
if self.gridline_color != None:
style.add('GRID',(0,0),(-1,-1),0.5,self.gridline_color)
if self.bgcolor_headings != None:
if num_heading_rows > 0:
style.add('BACKGROUND', (0,0), (-1,num_heading_rows-1), self.bgcolor_headings)
if num_heading_cols > 0:
style.add('BACKGROUND', (0,0), (num_heading_cols-1,-1), self.bgcolor_headings)
if self.hline_topbot != None:
style.add('LINEABOVE', (0,0), (-1,0), 2, self.hline_topbot)
style.add('LINEBELOW', (0,-1), (-1,-1), 2, self.hline_topbot)
if self.hline_heading != None and num_heading_rows > 0:
style.add('LINEBELOW', (0, num_heading_rows-1), (-1, num_heading_rows-1), 2, self.hline_heading)
if self.vline_heading != None and num_heading_cols > 0:
style.add('LINEAFTER', (num_heading_rows-1,0), (num_heading_rows-1,-1), 2, self.hline_heading)
if self.vline_leftright != None:
style.add('LINEBEFORE', (0,0), (0,-1), 2, self.vline_leftright)
style.add('LINEAFTER', (-1,0), (-1,-1), 2, self.vline_leftright)
return style
def create(self, data, w=None, h=None, nhr=1, nhc=0):
"""creates a table object
data - the table data data[row][col]=value
w - column width (None, value * inch, or [value * inch])
h - row height (None, value * inch, or [value * inch])
nhr - number of table rows on top that are considered headings
nhc - number of table cols on the left that are considered headings
"""
style = self.style(nhr, nhc)
table = plat.Table(data, colWidths=w, rowHeights=h, style=style)
return table
In [13]:
TableFactory.getFonts()
Out[13]:
In [10]:
tbl_data = [["", "Col1", "Col2", "Col3"],
["Row1", "c11", "c21","c31"],
["Row2", "c12", "c22","c32"],
["Row3", "c13", "c23","c33"],
]
tbl_data2 = [["Col0", "Col1", "Col2", "Col3"],
["c01", "c11", "c21","c31"],
["c02", "c12", "c22","c32"],
["c03", "c13", "c23","c33"],
]
colwidths = [1*un_inch,1*un_inch,1*un_inch,1*un_inch]
rowheights = [0.5*un_inch,0.5*un_inch,0.5*un_inch,0.5*un_inch]
#tbl_data
The commands passed to TableStyles come in three main groups which affect the table background, draw lines, or set cell styles.
The first element of each command is its identifier, the second and third arguments determine the cell coordinates of the box of cells which are affected with negative coordinates counting backwards from the limit values as in Python indexing.
The coordinates are given as (column, row) which follows the spreadsheet 'A1' model, but not the more natural (for mathematicians) 'RC' ordering.
The top left cell is (0, 0) the bottom right is (-1, -1).
Depending on the command various extra (???) occur at indices beginning at 3 on.
In [11]:
TF = TableFactory()
TF.bgcolor = '#eeeeff'
TF.txtcolor = '#000033'
TF.txtcolor_headings = '#0000aa'
TF.bgcolor_headings = '#eeeef0'
TF.hline_topbot = '#000000'
TF.vline_leftright = '#000000'
TF.font_headings = 'Helvetica-Bold'
table = TF.create(tbl_data, w=1.5*un_inch, h=0.5*un_inch, nhr=1, nhc=1)
TF.vline_leftright = None
table2 = TF.create(tbl_data2, w=1.5*un_inch, h=0.3*un_inch, nhr=1, nhc=0)
#style
create a standard report (A4, black text etc)
In [12]:
!rm r3port_a4.pdf
rfa4 = ReportFactory('r3port_a4.pdf')
pdfw = ReportWriter(rfa4)
pdfw.append_from_mdown("# Table")
pdfw.append_from_mdown("## Table1")
pdfw.append_from_story(table)
pdfw.append_from_mdown("## Table2")
pdfw.append_from_story(table2)
pdfw.append_from_mdown(markdown_text)
pdfw.append_from_image(imgdata)
pdfw.append_from_mdown(markdown_text)
pdfw.append_from_image(imgdata, h=3*un_inch)
pdfw.append_from_mdown(markdown_text)
pdfw.append_from_image(imgdata, w=3*un_inch)
pdfw.finalise()
In [12]:
In [12]: