In [1]:
import PyPDF2

In [2]:
src_pdf = PyPDF2.PdfFileReader('data/src/pdf/sample1.pdf')
dst_pdf = PyPDF2.PdfFileWriter()

In [3]:
dst_pdf.cloneReaderDocumentRoot(src_pdf)

In [4]:
with open('data/temp/sample1_no_meta.pdf', 'wb') as f:
    dst_pdf.write(f)

In [5]:
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': 'PyPDF2'}

In [6]:
dst_pdf.addMetadata({'/Producer': ''})

In [7]:
with open('data/temp/sample1_no_meta.pdf', 'wb') as f:
    dst_pdf.write(f)

print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': ''}

In [8]:
def remove_all_metadata(src_path, dst_path, producer=''):
    src_pdf = PyPDF2.PdfFileReader(src_path)
    dst_pdf = PyPDF2.PdfFileWriter()
    dst_pdf.cloneReaderDocumentRoot(src_pdf)
    dst_pdf.addMetadata({'/Producer': producer})
    with open(dst_path, 'wb') as f:
        dst_pdf.write(f)

In [9]:
remove_all_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': ''}

In [10]:
src_pdf = PyPDF2.PdfFileReader('data/src/pdf/sample1.pdf')
dst_pdf = PyPDF2.PdfFileWriter()

In [11]:
d = {key: src_pdf.documentInfo[key] for key in src_pdf.documentInfo.keys()}

In [12]:
print(d)


{'/Title': 'sample1', '/Producer': 'macOS バージョン10.14.2(ビルド18C54) Quartz PDFContext', '/Creator': 'Keynote', '/CreationDate': "D:20190114072947Z00'00'", '/ModDate': "D:20190114072947Z00'00'"}

In [13]:
d.pop('/Creator')
d.pop('/Producer')

print(d)


{'/Title': 'sample1', '/CreationDate': "D:20190114072947Z00'00'", '/ModDate': "D:20190114072947Z00'00'"}

In [14]:
dst_pdf.addMetadata(d)

In [15]:
with open('data/temp/sample1_remove_meta.pdf', 'wb') as f:
    dst_pdf.write(f)

In [16]:
print(PyPDF2.PdfFileReader('data/temp/sample1_remove_meta.pdf').documentInfo)


{'/Producer': 'PyPDF2', '/Title': 'sample1', '/CreationDate': "D:20190114072947Z00'00'", '/ModDate': "D:20190114072947Z00'00'"}

In [17]:
def remove_metadata(src_path, dst_path, *args, producer=''):
    src_pdf = PyPDF2.PdfFileReader(src_path)
    dst_pdf = PyPDF2.PdfFileWriter()
    dst_pdf.cloneReaderDocumentRoot(src_pdf)
    
    d = {key: src_pdf.documentInfo[key] for key in src_pdf.documentInfo.keys()
         if key not in args}
    
    d.setdefault('/Producer', producer)
    
    dst_pdf.addMetadata(d)
    with open(dst_path, 'wb') as f:
        dst_pdf.write(f)

In [18]:
remove_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
                '/Creator', '/ModDate', '/CreationDate')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': 'macOS バージョン10.14.2(ビルド18C54) Quartz PDFContext', '/Title': 'sample1'}

In [19]:
remove_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
                '/Creator', '/ModDate', '/CreationDate', '/Producer')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': '', '/Title': 'sample1'}

In [20]:
remove_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
                '/Creator', '/ModDate', '/CreationDate', '/Producer', producer='XXX')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': 'XXX', '/Title': 'sample1'}

In [21]:
def select_metadata(src_path, dst_path, *args, producer=''):
    src_pdf = PyPDF2.PdfFileReader(src_path)
    dst_pdf = PyPDF2.PdfFileWriter()
    dst_pdf.cloneReaderDocumentRoot(src_pdf)
    
    d = {key: src_pdf.documentInfo[key] for key in src_pdf.documentInfo.keys()
         if key in args}
    
    d.setdefault('/Producer', producer)
    
    dst_pdf.addMetadata(d)
    with open(dst_path, 'wb') as f:
        dst_pdf.write(f)

In [22]:
select_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
                '/Title', '/ModDate')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': '', '/Title': 'sample1', '/ModDate': "D:20190114072947Z00'00'"}

In [23]:
select_metadata('data/src/pdf/sample1.pdf', 'data/temp/sample1_no_meta.pdf',
                '/Title', '/Producer')
print(PyPDF2.PdfFileReader('data/temp/sample1_no_meta.pdf').documentInfo)


{'/Producer': 'macOS バージョン10.14.2(ビルド18C54) Quartz PDFContext', '/Title': 'sample1'}