In [1]:
import PyPDF2
In [2]:
src_pdf = PyPDF2.PdfFileReader('data/src/pdf/sample1.pdf')
dst_pdf = PyPDF2.PdfFileWriter()
In [3]:
dst_pdf.cloneReaderDocumentRoot(src_pdf)
In [4]:
with open('data/temp/sample1_no_meta.pdf', 'wb') as f:
dst_pdf.write(f)
In [5]:
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [6]:
dst_pdf.addMetadata({'/Producer': ''})
In [7]:
with open('data/temp/sample1_no_meta.pdf', 'wb') as f:
dst_pdf.write(f)
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [8]:
def remove_all_metadata(src_path, dst_path, producer=''):
src_pdf = PyPDF2.PdfFileReader(src_path)
dst_pdf = PyPDF2.PdfFileWriter()
dst_pdf.cloneReaderDocumentRoot(src_pdf)
dst_pdf.addMetadata({'/Producer': producer})
with open(dst_path, 'wb') as f:
dst_pdf.write(f)
In [9]:
remove_all_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [10]:
src_pdf = PyPDF2.PdfFileReader('data/src/pdf/sample1.pdf')
dst_pdf = PyPDF2.PdfFileWriter()
In [11]:
d = {key: src_pdf.documentInfo[key] for key in src_pdf.documentInfo.keys()}
In [12]:
print(d)
In [13]:
d.pop('/Creator')
d.pop('/Producer')
print(d)
In [14]:
dst_pdf.addMetadata(d)
In [15]:
with open('data/temp/sample1_remove_meta.pdf', 'wb') as f:
dst_pdf.write(f)
In [16]:
print(PyPDF2.PdfFileReader('data/temp/sample1_remove_meta.pdf').documentInfo)
In [17]:
def remove_metadata(src_path, dst_path, *args, producer=''):
src_pdf = PyPDF2.PdfFileReader(src_path)
dst_pdf = PyPDF2.PdfFileWriter()
dst_pdf.cloneReaderDocumentRoot(src_pdf)
d = {key: src_pdf.documentInfo[key] for key in src_pdf.documentInfo.keys()
if key not in args}
d.setdefault('/Producer', producer)
dst_pdf.addMetadata(d)
with open(dst_path, 'wb') as f:
dst_pdf.write(f)
In [18]:
remove_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
'/Creator', '/ModDate', '/CreationDate')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [19]:
remove_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
'/Creator', '/ModDate', '/CreationDate', '/Producer')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [20]:
remove_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
'/Creator', '/ModDate', '/CreationDate', '/Producer', producer='XXX')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [21]:
def select_metadata(src_path, dst_path, *args, producer=''):
src_pdf = PyPDF2.PdfFileReader(src_path)
dst_pdf = PyPDF2.PdfFileWriter()
dst_pdf.cloneReaderDocumentRoot(src_pdf)
d = {key: src_pdf.documentInfo[key] for key in src_pdf.documentInfo.keys()
if key in args}
d.setdefault('/Producer', producer)
dst_pdf.addMetadata(d)
with open(dst_path, 'wb') as f:
dst_pdf.write(f)
In [22]:
select_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
'/Title', '/ModDate')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)
In [23]:
select_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
'/Title', '/Producer')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)