Test
This commit is contained in:
parent
1d90930e29
commit
ef993f8693
1 changed files with 9 additions and 2 deletions
|
|
@ -12,8 +12,12 @@ from .config import ARTICLES_CACHE_DIR
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
pdf_options = PdfFormatOption(pipeline_options=PdfPipelineOptions(generate_picture_images=True))
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.ocr_options.use_gpu = False
|
||||
pipeline_options.generate_picture_images = False
|
||||
pdf_options = PdfFormatOption(pipeline_options=pipeline_options)
|
||||
doc_converter = DocumentConverter(format_options={InputFormat.PDF: pdf_options})
|
||||
doc_converter = DocumentConverter()
|
||||
|
||||
def fetch_and_convert_article(article):
|
||||
"""
|
||||
|
|
@ -51,10 +55,13 @@ def fetch_and_convert_article(article):
|
|||
f.write(converted_text)
|
||||
logger.info("Conversion successful for article '%s'. Cached output.", article["id"])
|
||||
return converted_text
|
||||
except SystemExit as se:
|
||||
logger.exception("Docling conversion exited with error code %s for article '%s'. Skipping conversion.", se.code, article["id"])
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.exception("Conversion failed for article '%s': %s", article["id"], e)
|
||||
return ""
|
||||
finally:
|
||||
if os.path.exists(tmp_pdf_path):
|
||||
os.unlink(tmp_pdf_path)
|
||||
logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
|
||||
logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue