In [1]:
import re
import numpy as np
from IPython.display import display as D
%rehashx

In [2]:
%cd ~/courses/phys518_critical_phenomena/lecture/


/home/justin/courses/phys518_critical_phenomena/lecture

In [3]:
pdfnum_re = re.compile(r"518_lecture_([0-9]{1,2})\.pdf")

In [4]:
files = !ls 518_lecture_*.pdf
D(files)


['518_lecture_10.pdf',
 '518_lecture_11.pdf',
 '518_lecture_12.pdf',
 '518_lecture_13.pdf',
 '518_lecture_14.pdf',
 '518_lecture_1.pdf',
 '518_lecture_2.pdf',
 '518_lecture_3.pdf',
 '518_lecture_4.pdf',
 '518_lecture_5.pdf',
 '518_lecture_6.pdf',
 '518_lecture_7.pdf',
 '518_lecture_8.pdf',
 '518_lecture_9.pdf']

In [5]:
newfiles = []
idxnames = []
for f in files:
    filenum = int(pdfnum_re.findall(f)[0])
    pdfname = r"Lecture\ "+format(filenum, "02d")+".pdf" 
    idxname = r"Lecture "+format(filenum, "d")
    newfiles.append(pdfname)
    idxnames.append(idxname)
    !cp $f $pdfname

Merge the PDF's into one


In [6]:
tempfile = "PHYS_518_lectures.tmp.pdf"
outfile = "PHYS_518_lectures.pdf"
!pdfjoin Lecture*.pdf --outfile $tempfile


          ----
  pdfjam: This is pdfjam version 2.08.
  pdfjam: Reading any site-wide or user-specific defaults...
          (none found)
  pdfjam: Effective call for this run of pdfjam:
          /usr/bin/pdfjam --fitpaper 'true' --rotateoversize 'true' --suffix joined --outfile PHYS_518_lectures.tmp.pdf -- Lecture\ 01.pdf - Lecture\ 02.pdf - Lecture\ 03.pdf - Lecture\ 04.pdf - Lecture\ 05.pdf - Lecture\ 06.pdf - Lecture\ 07.pdf - Lecture\ 08.pdf - Lecture\ 09.pdf - Lecture\ 10.pdf - Lecture\ 11.pdf - Lecture\ 12.pdf - Lecture\ 13.pdf - Lecture\ 14.pdf - 
  pdfjam: Calling pdflatex...
  pdfjam: Finished.  Output was to 'PHYS_518_lectures.tmp.pdf'.

Sort the files and correspondig index entries


In [7]:
sortind = np.argsort(newfiles)
newfiles = [ newfiles[i] for i in sortind ]
idxnames = [ idxnames[i] for i in sortind ]
D(idxnames)


['Lecture 1',
 'Lecture 2',
 'Lecture 3',
 'Lecture 4',
 'Lecture 5',
 'Lecture 6',
 'Lecture 7',
 'Lecture 8',
 'Lecture 9',
 'Lecture 10',
 'Lecture 11',
 'Lecture 12',
 'Lecture 13',
 'Lecture 14']

In [8]:
r = re.compile(r"Pages:\s*([0-9]+)")
idxentries = []
pagenum = 1
for name in idxnames:
    idxentry = r"[/Page " + format(pagenum,"d") \
        + r" /View [/XYZ null null null] /Title (" \
        + name \
        + r") /OUT pdfmark"
    idxentries.append(idxentry)
    pages_s = !pdfinfo $f | grep Pages
    pages = int(r.findall(pages_s[0])[0])
    pagenum += pages
idx = "\n".join(idxentries)

In [9]:
with file("index.info", "w") as idxinfo:
    idxinfo.write(idx)

Add the index to the pdf; instructions on creating index in PDF file obtained here:

http://linproject.blogspot.com/2012/06/adding-index-to-your-pdf-file.html


In [10]:
!gs -sDEVICE=pdfwrite -q -dBATCH -dNOPAUSE \
    -sOutputFile=$outfile index.info -f $tempfile

In [11]:
!rm -f $tempfile