fix for a bug introduced in the last commit.

erikkastelec · Sep 11, 2020 · 6c243e0 · 6c243e0
1 parent 08ef6c0
commit 6c243e0
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 6 deletions.
diff --git a/PDFScraper/__init__.py b/PDFScraper/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.1.7"
+__version__ = "1.1.8"
 
 import logging
 

diff --git a/PDFScraper/core.py b/PDFScraper/core.py
@@ -278,19 +278,21 @@ def convert_to_pdf(document: Document, tessdata_location: str, config_options=""
             logger.error(e)
             sys.exit(1)
     pdf_writer = PdfFileWriter()
+    pdf_files = []
     for filename in pdf_pages:
         pdf_file = open(filename, 'rb')
+        pdf_files.append(pdf_file)
         pdf_reader = PdfFileReader(pdf_file)
         for i in range(pdf_reader.numPages):
             page = pdf_reader.getPage(i)
             pdf_writer.addPage(page)
-        pdf_file.close()
     with open(tempfile.gettempdir() + "/PDFScraper" + "/" + document.filename + ".pdf", 'w+b') as out:
         pdf_writer.write(out)
         out.close()
         document.ocr_path = tempfile.gettempdir() + "/PDFScraper" + "/" + document.filename + ".pdf"
     # cleanup temporary files
-    for filename in pdf_pages:
+    for file, filename in zip(pdf_files, pdf_pages):
+        file.close()
         os.remove(filename)
 
 
@@ -497,4 +499,3 @@ def find_words_tables(tables, search_mode, search_words, match_score):
         if found:
             result.append(table)
     return result
-
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # PDFScraper
 [![PyPI version](https://badge.fury.io/py/PDFScraper.svg)](https://badge.fury.io/py/PDFScraper)
 
-CLI program for searching text and tables inside of PDF documents and displaying results in HTML. It combines [Pdfminer.six](https://github.com/pdfminer/pdfminer.six), [Camelot](https://github.com/camelot-dev/camelot) and [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) in a single program, which is simple to use.
+CLI program and library for extraction of PDF elements, which implements a search functionality that outputs summary in an HTML format. It combines [Pdfminer.six](https://github.com/pdfminer/pdfminer.six), [Camelot](https://github.com/camelot-dev/camelot) and [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) in a single program, which is simple to use.
 
 # How to use
 ### Install using pip

diff --git a/setup.py b/setup.py
@@ -51,7 +51,7 @@
         "yattag==1.14.0",
     ],
     name="PDFScraper",
-    version="1.1.7",
+    version="1.1.8",
     author="Erik Kastelec",
     author_email="[email protected]",
     description="PDF text and table search",