EOF Token Not Found - How to Fix in PyPDF and PyPDF2?

I am trying to combine multiple PDF files into a single PDF file using Python. I tried PyPDF and PyPDF2 - in some files they both gave the same error:

PdfReadError: EOF not found

Here my code (page_files) is a list of PDF file paths to merge:

# use pypdf to combine pdf pages
output = PdfFileWriter()
for pf in page_files:
    filestream = file(pf, "rb")
    pdf = PdfFileReader(filestream)            
    for num in range(pdf.getNumPages()):
        output.addPage(pdf.getPage(num))            

# write final file
outputStream = file(pdf_full_path, "wb")
output.write(outputStream)
outputStream.close()

I read several StackOverflow threads on this topic, but none of them contain a solution that works. If you have successfully combined PDF files using Python, I would like to hear how to do it. Thank!

+9
source share
1 answer

Is there anyone else looking to merge the PDF โ€œlistโ€:

. glob . <- ^^

: glob

from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
import os
import glob

class MergeAllPDF:
    def __init__(self):
        self.mergelist = []

    def create(self, filepath, outpath, outfilename):
        self.outfilname = outfilename
        self.filepath = filepath
        self.outpath = outpath
        self.pdfs = glob.glob(self.filepath)
        self.myrange = len(self.pdfs)

        for _ in range(self.myrange):
            if self.pdfs:
                self.mergelist.append(self.pdfs.pop(0))
        self.merge()

    def merge(self):
        if self.mergelist:
            self.merger = PdfFileMerger()
            for pdf in self.mergelist:
                self.merger.append(open(pdf, 'rb'))  
            self.merger.write(self.outpath + "%s.pdf" % (self.outfilname))
            self.merger.close()
            self.mergelist = []
        else:
            print("mergelist is empty please check your input path")

# example how to use
#update your path here:


inpath = r"C:\Users\Fabian\Desktop\mergeallpdfs\scan\*.pdf" #here are your single page pdfs stored
outpath = r"C:\Users\Fabian\Desktop\mergeallpdfs\output\\" #here your merged pdf will be stored

b = MergeAllPDF()
b.create(inpath, outpath, "mergedpdf")
0

All Articles