Test
This commit is contained in:
parent
1d90930e29
commit
ef993f8693
1 changed files with 9 additions and 2 deletions
|
|
@ -12,8 +12,12 @@ from .config import ARTICLES_CACHE_DIR
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
pdf_options = PdfFormatOption(pipeline_options=PdfPipelineOptions(generate_picture_images=True))
|
pipeline_options = PdfPipelineOptions()
|
||||||
|
pipeline_options.ocr_options.use_gpu = False
|
||||||
|
pipeline_options.generate_picture_images = False
|
||||||
|
pdf_options = PdfFormatOption(pipeline_options=pipeline_options)
|
||||||
doc_converter = DocumentConverter(format_options={InputFormat.PDF: pdf_options})
|
doc_converter = DocumentConverter(format_options={InputFormat.PDF: pdf_options})
|
||||||
|
doc_converter = DocumentConverter()
|
||||||
|
|
||||||
def fetch_and_convert_article(article):
|
def fetch_and_convert_article(article):
|
||||||
"""
|
"""
|
||||||
|
|
@ -51,10 +55,13 @@ def fetch_and_convert_article(article):
|
||||||
f.write(converted_text)
|
f.write(converted_text)
|
||||||
logger.info("Conversion successful for article '%s'. Cached output.", article["id"])
|
logger.info("Conversion successful for article '%s'. Cached output.", article["id"])
|
||||||
return converted_text
|
return converted_text
|
||||||
|
except SystemExit as se:
|
||||||
|
logger.exception("Docling conversion exited with error code %s for article '%s'. Skipping conversion.", se.code, article["id"])
|
||||||
|
return ""
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Conversion failed for article '%s': %s", article["id"], e)
|
logger.exception("Conversion failed for article '%s': %s", article["id"], e)
|
||||||
return ""
|
return ""
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(tmp_pdf_path):
|
if os.path.exists(tmp_pdf_path):
|
||||||
os.unlink(tmp_pdf_path)
|
os.unlink(tmp_pdf_path)
|
||||||
logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
|
logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue