From 353c69294ad49c9c1c82a0ba0f2634d41ff4141a Mon Sep 17 00:00:00 2001
From: Regis David Souza Mesquita <github@rdsm.dev>
Date: Sun, 2 Mar 2025 03:22:35 +0000
Subject: [PATCH 1/6] Break project into multiple files

---
 .gitignore           |   3 +
 Makefile             |  13 +
 README.md            | 128 ++++-----
 tests/test_vibe.py   | 103 +++++++
 vibe.py              | 628 -------------------------------------------
 vibe/config.py       |  22 ++
 vibe/converter.py    |  60 +++++
 vibe/fetcher.py      |  71 +++++
 vibe/filter.py       |  69 +++++
 vibe/main.py         |  49 ++++
 vibe/orchestrator.py |  80 ++++++
 vibe/rerank.py       |  54 ++++
 vibe/server.py       |  38 +++
 vibe/summarizer.py   |  38 +++
 vibe/tts.py          |  33 +++
 15 files changed, 687 insertions(+), 702 deletions(-)
 create mode 100644 Makefile
 create mode 100644 tests/test_vibe.py
 delete mode 100644 vibe.py
 create mode 100644 vibe/config.py
 create mode 100644 vibe/converter.py
 create mode 100644 vibe/fetcher.py
 create mode 100644 vibe/filter.py
 create mode 100644 vibe/main.py
 create mode 100644 vibe/orchestrator.py
 create mode 100644 vibe/rerank.py
 create mode 100644 vibe/server.py
 create mode 100644 vibe/summarizer.py
 create mode 100644 vibe/tts.py

diff --git a/.gitignore b/.gitignore
index 45a0bbc..eb6fe53 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 cache/
+__init__.py
+*.pyc
 .DS_Store
+.aider*
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b005e48
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+.PHONY: test run serve clean
+
+test:
+	python -m unittest discover -s tests
+
+run:
+	python -m vibe.main --generate --prompt "Your interests here" --max-articles 5 --output summary.mp3
+
+serve:
+	python -m vibe.main --serve
+
+clean:
+	rm -rf cache
diff --git a/README.md b/README.md
index ef82674..cda0e70 100644
--- a/README.md
+++ b/README.md
@@ -1,103 +1,83 @@
 # vibe: Article Summarization & TTS Pipeline
 
-vibe is a Python-based pipeline that automatically fetches the latest Computer Science research articles from arXiv, filters them for relevance using a language model (LLM), converts article PDFs to Markdown with Docling, generates narrative summaries, and synthesizes the summaries into an MP3 audio file using a text-to-speech (TTS) system. This tool is ideal for users who prefer listening to curated research summaries on the go or integrating the process into a larger system via an API.
+vibe is a Python-based pipeline that automatically fetches the latest Computer Science research articles from arXiv, filters them for relevance using a language model (LLM), converts article PDFs to Markdown with Docling, generates narrative summaries, and synthesizes the summaries into an MP3 audio file using a text-to-speech (TTS) system.
 
-## Features
+This repository has been refactored into a modular structure for improved maintainability.
 
-- **Fetch Articles:** Retrieves the latest Computer Science articles from arXiv.
-- **Cache Mechanism:** Caches article metadata and converted content to speed up subsequent requests.
-- **Relevance Filtering:** Uses an LLM to filter articles based on user-provided interests.
-- **PDF Conversion:** Converts PDF articles to Markdown format using Docling.
-- **Summarization:** Generates a fluid, narrative-style summary for each relevant article with the help of an LLM.
-- **Text-to-Speech:** Converts the final narrative summary into an MP3 file using KPipeline.
-- **Flask API:** Exposes the functionality via a RESTful endpoint for dynamic requests.
-- **CLI and Server Modes:** Run the pipeline as a one-off CLI command or as a continuously running Flask server.
+## Project Structure
 
-## Why Use vibe?
-
-- **Stay Updated:** Automatically curate and summarize the latest research articles so you can keep up with advancements in your field.
-- **Hands-Free Listening:** Enjoy audio summaries during your commute or while multitasking.
-- **Automated Workflow:** Seamlessly integrate multiple processing steps—from fetching and filtering to summarization and TTS.
-- **Flexible Deployment:** Use the CLI mode for quick summaries or deploy the Flask API for integration with other systems.
+- **vibe/** - Main package containing all modules:
+  - `config.py` - Configuration, constants, and cache setup.
+  - `fetcher.py` - Module to fetch articles from arXiv.
+  - `filter.py` - Module for relevance filtering using an LLM.
+  - `rerank.py` - Module to rerank articles.
+  - `converter.py` - Module to convert PDFs to Markdown.
+  - `summarizer.py` - Module to generate article summaries.
+  - `tts.py` - Module for text-to-speech conversion.
+  - `orchestrator.py` - Orchestrates the complete pipeline.
+  - `server.py` - Flask server exposing a REST API.
+  - `main.py` - CLI entry point.
+- **tests/** - Contains unit tests.
+- **requirements.txt** - Python package requirements.
+- **Makefile** - Makefile to run common tasks.
 
 ## Installation
 
 1. **Prerequisites:**
-   Ensure you have Python 3.x installed on your system.
+   - Python 3.x
+   - Install dependencies:
+     ```bash
+     pip install -r requirements.txt
+     ```
 
-2. **Clone the Repository:**
-   Clone this repository to your local machine.
+2. **Clone the repository:**
+   ```bash
+   git clone <repository_url>
+   cd <repository_directory>
 
-3. **Install Dependencies:**
-   Navigate to the project directory and install the required packages:
-   ```
-   pip install -r requirements.txt
-   ```
+Running the Application
 
-## Usage
+CLI Mode
 
-### CLI Mode
+To generate a summary MP3 using the CLI:
 
-Run the pipeline once to generate an MP3 summary file. For example:
-```
-python vibe.py --generate --prompt "I live in a mid-sized European city, working in the tech industry on AI-driven automation solutions. I prefer content focused on deep learning and reinforcement learning applications, and I want to filter out less relevant topics. Only include articles that are rated 9 or 10 on a relevance scale from 0 to 10." --max-articles 10 --output summary_cli.mp3
-```
-This command fetches the latest articles from arXiv, filters and ranks them based on your specified interests, generates narrative summaries, and converts the final summary into an MP3 file named `summary_cli.mp3`.
+python vibe/main.py --generate --prompt "Your interests and context here" --max-articles 5 --output summary.mp3
 
-### Server Mode
+Server Mode
+
+To run the Flask server:
+
+python vibe/main.py --serve
+
+Then, you can make a POST request to http://127.0.0.1:5000/process with a JSON payload:
 
-Alternatively, you can run vibe as a Flask server:
-```
-python vibe.py --serve
-```
-Once the server is running, you can process requests by sending a POST request to the `/process` endpoint. For example:
-```
 curl -X POST http://127.0.0.1:5000/process \
      -H "Content-Type: application/json" \
      -d '{"user_info": "Your interests here", "max_articles": 5, "new_only": false}'
-```
-The server processes the articles, generates an MP3 summary, and returns the file as a downloadable response.
 
-## Environment Variables
+Running Tests
 
-The following environment variables can be set to customize the behavior of vibe:
+The project includes basic tests to verify that modules are working as expected. To run the tests, execute:
 
-- `ARXIV_URL`: The URL used to fetch the latest arXiv articles. Defaults to `https://arxiv.org/list/cs/new`.
-- `LLM_URL`: The URL for the language model endpoint. Defaults to `http://127.0.0.1:4000/v1/chat/completions` (this is a litellm instance).
-- `MODEL_NAME`: The model name to be used by the LLM. Defaults to `mistral-small-latest`.
+make test
 
-Note that using the `mistral-small` model through their cloud service typically costs a few cents per run and completes the summarization process in around 4 minutes. It is also possible to run vibe with local LLMs (such as qwen 2.5 14b or mistral-small), although these local runs may take up to an hour.
+or
 
-## Project Structure
+python -m unittest discover -s tests
 
-- **vibe.py:** Main application file containing modules for:
-  - Fetching and caching arXiv articles.
-  - Filtering articles for relevance.
-  - Converting PDFs to Markdown using Docling.
-  - Summarizing articles via an LLM.
-  - Converting text summaries to speech (MP3) using KPipeline.
-  - Exposing a Flask API for processing requests.
-- **requirements.txt:** Contains the list of Python packages required by the project.
-- **CACHE_DIR:** Directory created at runtime for caching articles and processed files.
+Makefile Commands
+    •	make test - Run the unit tests.
+    •	make run - Run the application in CLI mode (you can modify the command inside the Makefile).
+    •	make serve - Run the Flask server.
+    •	make clean - Clean up temporary files (e.g., remove the cache directory).
 
-## Dependencies
+Environment Variables
 
-The project relies on several key libraries:
-- Flask
-- requests
-- beautifulsoup4
-- soundfile
-- docling
-- kokoro
+The following environment variables can be set to customize the behavior:
+    •	ARXIV_URL
+    •	LLM_URL
+    •	MODEL_NAME
 
-## Contributing
+License
 
-Contributions are welcome! Feel free to fork this repository and submit pull requests with improvements or bug fixes.
-
-## License
-
-This project is licensed under the MIT License.
-
-## Acknowledgments
-
-Thanks to the developers of [Docling](https://github.com/docling) and [Kokoro](https://github.com/kokoro) as well as the maintainers of BeautifulSoup and Flask for providing great tools that made this project possible.
\ No newline at end of file
+This project is licensed under the MIT License.
\ No newline at end of file
diff --git a/tests/test_vibe.py b/tests/test_vibe.py
new file mode 100644
index 0000000..63425ec
--- /dev/null
+++ b/tests/test_vibe.py
@@ -0,0 +1,103 @@
+import unittest
+from unittest.mock import patch, MagicMock
+
+# Import modules from the vibe package
+from vibe.fetcher import fetch_arxiv_list
+from vibe.filter import batch_relevance_filter
+from vibe.rerank import rerank_articles
+from vibe.converter import fetch_and_convert_article
+from vibe.summarizer import generate_article_summary
+from vibe.orchestrator import process_articles
+
+class TestVibeModules(unittest.TestCase):
+
+    @patch("vibe.fetcher.requests.get")
+    def test_fetch_arxiv_list(self, mock_get):
+        # Setup a fake response for arXiv HTML
+        fake_html = """
+        <html>
+          <body>
+            <dl>
+              <dt><a title="Abstract">arXiv:1234.5678</a> <a title="Download PDF" href="/pdf/1234.5678.pdf"></a></dt>
+              <dd>
+                <div class="list-title">Title: Test Article</div>
+                <p class="mathjax">This is a test abstract.</p>
+              </dd>
+            </dl>
+          </body>
+        </html>
+        """
+        mock_get.return_value.status_code = 200
+        mock_get.return_value.text = fake_html
+        articles = fetch_arxiv_list(force_refresh=True, arxiv_url="http://fakeurl")
+        self.assertEqual(len(articles), 1)
+        self.assertEqual(articles[0]["id"], "arXiv:1234.5678")
+
+    @patch("vibe.filter.requests.post")
+    def test_batch_relevance_filter(self, mock_post):
+        # Simulate LLM response
+        fake_response = MagicMock()
+        fake_response.status_code = 200
+        fake_response.json.return_value = {"choices": [{"message": {"content": '{"arXiv:1234.5678": "yes"}'}}]}
+        mock_post.return_value = fake_response
+
+        articles = [{"id": "arXiv:1234.5678", "title": "Test", "abstract": "Test abstract"}]
+        relevant_ids = batch_relevance_filter(articles, "dummy user")
+        self.assertIn("arXiv:1234.5678", relevant_ids)
+
+    @patch("vibe.rerank.requests.post")
+    def test_rerank_articles(self, mock_post):
+        fake_response = MagicMock()
+        fake_response.status_code = 200
+        fake_response.json.return_value = {"choices": [{"message": {"content": '{"ranking": ["arXiv:1234.5678"]}'}}]}
+        mock_post.return_value = fake_response
+
+        articles = [{"id": "arXiv:1234.5678", "title": "Test", "abstract": "Test abstract"}]
+        ranked = rerank_articles(articles, "dummy user")
+        self.assertEqual(ranked[0]["id"], "arXiv:1234.5678")
+
+    @patch("vibe.converter.requests.get")
+    def test_fetch_and_convert_article(self, mock_get):
+        # This test will simulate a failure to download a PDF
+        article = {"id": "arXiv:1234.5678", "pdf_url": "http://fakepdf", "title": "Test", "abstract": "Test abstract"}
+        mock_get.return_value.status_code = 404
+        content = fetch_and_convert_article(article)
+        self.assertEqual(content, "")
+
+    @patch("vibe.summarizer.requests.post")
+    def test_generate_article_summary(self, mock_post):
+        fake_response = MagicMock()
+        fake_response.status_code = 200
+        fake_response.json.return_value = {"choices": [{"message": {"content": "Summary text"}}]}
+        mock_post.return_value = fake_response
+        summary = generate_article_summary({"id": "arXiv:1234.5678", "title": "Test"}, "content", "dummy user")
+        self.assertEqual(summary, "Summary text")
+
+    @patch("vibe.orchestrator.fetch_arxiv_list")
+    @patch("vibe.orchestrator.batch_relevance_filter")
+    @patch("vibe.orchestrator.rerank_articles")
+    @patch("vibe.orchestrator.fetch_and_convert_article")
+    @patch("vibe.orchestrator.generate_article_summary")
+    def test_process_articles(self, mock_summary, mock_convert, mock_rerank, mock_filter, mock_fetch):
+        # Setup mocks for orchestrator pipeline
+        mock_fetch.return_value = [{
+            "id": "arXiv:1234.5678",
+            "title": "Test Article",
+            "abstract": "Test abstract",
+            "pdf_url": "http://fakepdf"
+        }]
+        mock_filter.return_value = {"arXiv:1234.5678"}
+        mock_rerank.return_value = [{
+            "id": "arXiv:1234.5678",
+            "title": "Test Article",
+            "abstract": "Test abstract",
+            "pdf_url": "http://fakepdf"
+        }]
+        mock_convert.return_value = "Converted content"
+        mock_summary.return_value = "Final summary"
+
+        summary = process_articles("dummy user", max_articles=1)
+        self.assertIn("Final summary", summary)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/vibe.py b/vibe.py
deleted file mode 100644
index d6870dd..0000000
--- a/vibe.py
+++ /dev/null
@@ -1,628 +0,0 @@
-#!/usr/bin/env python3
-import os
-import json
-import requests
-import subprocess
-from datetime import datetime
-import tempfile
-import logging
-import concurrent.futures
-import re
-from bs4 import BeautifulSoup
-
-# --- Docling Imports ---
-from docling.document_converter import DocumentConverter, PdfFormatOption
-from docling.datamodel.pipeline_options import PdfPipelineOptions
-from docling.datamodel.base_models import InputFormat
-from docling_core.types.doc import ImageRefMode
-
-# --- Kokoro & TTS Imports ---
-from kokoro import KPipeline
-import soundfile as sf
-
-# --- Flask Imports ---
-from flask import Flask, send_file, request, jsonify
-
-# --- Logging Configuration ---
-logging.basicConfig(
-    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-# --- Cache Setup ---
-CACHE_DIR = "cache"
-ARXIV_CACHE_FILE = os.path.join(CACHE_DIR, "arxiv_list.json")
-ARTICLES_CACHE_DIR = os.path.join(CACHE_DIR, "articles")
-if not os.path.exists(CACHE_DIR):
-    os.makedirs(CACHE_DIR)
-    logger.debug("Created cache directory: %s", CACHE_DIR)
-if not os.path.exists(ARTICLES_CACHE_DIR):
-    os.makedirs(ARTICLES_CACHE_DIR)
-    logger.debug("Created articles cache directory: %s", ARTICLES_CACHE_DIR)
-
-# --- Instantiate Docling Converter ---
-logger.debug("Instantiating Docling converter with PDF options.")
-pdf_options = PdfFormatOption(
-    pipeline_options=PdfPipelineOptions(generate_picture_images=True)
-)
-doc_converter = DocumentConverter(format_options={InputFormat.PDF: pdf_options})
-
-DEFAULT_ARXIV_URL = os.environ.get("ARXIV_URL", "https://arxiv.org/list/cs/new")
-DEFAULT_LLM_URL = os.environ.get("LLM_URL", "http://127.0.0.1:4000/v1/chat/completions")
-DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
-
-
-# --- Module: Fetcher ---
-def fetch_arxiv_list(force_refresh=False, arxiv_url=DEFAULT_ARXIV_URL):
-    """
-    Fetches the latest CS articles from arXiv. If a cache exists, reads from it
-    unless force_refresh is True. Otherwise, parses the arXiv page, extracts
-    article metadata, and caches it.
-    """
-    logger.debug("Checking for cached arXIV list at %s", ARXIV_CACHE_FILE)
-    if not force_refresh and os.path.exists(ARXIV_CACHE_FILE):
-        logger.info("Cache found for arXiv list. Loading from cache.")
-        with open(ARXIV_CACHE_FILE, "r", encoding="utf-8") as f:
-            articles = json.load(f)
-        logger.debug("Loaded %d articles from cache.", len(articles))
-        return articles
-
-    url = arxiv_url
-    logger.info("Fetching arXiv page from %s", url)
-    response = requests.get(url)
-    if response.status_code != 200:
-        logger.error(
-            "Failed to fetch arXiv page. Status code: %d", response.status_code
-        )
-        raise Exception("Failed to fetch arXiv page.")
-
-    logger.debug("Parsing arXiv HTML content.")
-    soup = BeautifulSoup(response.text, "html.parser")
-    articles = []
-    dl = soup.find("dl")
-    if not dl:
-        logger.error("No article list found on arXiv page.")
-        raise Exception("No article list found on arXiv page.")
-
-    dts = dl.find_all("dt")
-    dds = dl.find_all("dd")
-    logger.debug("Found %d dt tags and %d dd tags.", len(dts), len(dds))
-    for dt, dd in zip(dts, dds):
-        id_link = dt.find("a", title="Abstract")
-        if not id_link:
-            logger.debug("Skipping an article with no abstract link.")
-            continue
-        article_id = id_link.text.strip()
-        pdf_link = dt.find("a", title="Download PDF")
-        pdf_url = "https://arxiv.org" + pdf_link["href"] if pdf_link else None
-
-        title_div = dd.find("div", class_="list-title")
-        title = (
-            title_div.text.replace("Title:", "").strip() if title_div else "No title"
-        )
-
-        abstract_div = dd.find("p", class_="mathjax")
-        abstract = abstract_div.text.strip() if abstract_div else "No abstract"
-
-        articles.append(
-            {
-                "id": article_id,
-                "title": title,
-                "abstract": abstract,
-                "pdf_url": pdf_url,
-            }
-        )
-        logger.debug("Parsed article: %s", article_id)
-
-    with open(ARXIV_CACHE_FILE, "w", encoding="utf-8") as f:
-        json.dump(articles, f)
-    logger.info("Cached %d articles to %s", len(articles), ARXIV_CACHE_FILE)
-    return articles
-
-
-# --- Module: Batched Relevance Filter (Parallelized) ---
-def batch_relevance_filter(
-    articles,
-    user_info,
-    batch_size=50,
-    llm_url=DEFAULT_LLM_URL,
-    model_name=DEFAULT_MODEL_NAME,
-):
-    """
-    Sends articles to the LLM in batches to check their relevance.
-    Expects a JSON response mapping article IDs to "yes" or "no".
-    This version parallelizes the batched requests.
-    """
-    relevant_article_ids = set()
-    url = llm_url
-    logger.info("Starting batched relevance check for %d articles.", len(articles))
-
-    def process_batch(batch):
-        local_relevant_ids = set()
-        prompt_lines = [f"User info: {user_info}\n"]
-        prompt_lines.append(
-            "For each of the following articles, determine if it is relevant to the user. Respond in JSON format the keys are the article IDs and the values are 'yes' or 'no', do not add any preamble or any other form of text, your response will be parsed by a json parser immediatelly. remember you have to start your answer with valid json , you cannot add any text, the first char of your answer must be a { , no text."
-        )
-        for article in batch:
-            prompt_lines.append(
-                f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
-            )
-        prompt = "\n".join(prompt_lines)
-        payload = {
-            "model": model_name,
-            "messages": [{"role": "user", "content": prompt}],
-        }
-        try:
-            response = requests.post(url, json=payload)
-            if response.status_code != 200:
-                logger.error(
-                    "LLM batched relevance check failed for batch starting with article '%s' with status code: %d",
-                    batch[0]["id"],
-                    response.status_code,
-                )
-                return local_relevant_ids
-            data = response.json()
-            text_response = data["choices"][0]["message"]["content"].strip()
-            try:
-                match = re.search(r"\{.*\}", text_response, re.DOTALL)
-                if not match:
-                    raise ValueError("No valid JSON object found in response")
-                json_str = match.group(0)
-                logger.debug("Batch response: %s", json_str[:200])
-                result = json.loads(json_str)
-                for article_id, verdict in result.items():
-                    if isinstance(verdict, str) and verdict.lower().strip() == "yes":
-                        local_relevant_ids.add(article_id)
-            except Exception as e:
-                logger.exception("Failed to parse JSON from LLM response: %s", e)
-            return local_relevant_ids
-        except Exception as e:
-            logger.exception("Error during batched relevance check: %s", e)
-            return local_relevant_ids
-
-    batches = [
-        articles[i : i + batch_size] for i in range(0, len(articles), batch_size)
-    ]
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        futures = [executor.submit(process_batch, batch) for batch in batches]
-        for future in concurrent.futures.as_completed(futures):
-            relevant_article_ids.update(future.result())
-
-    logger.info(
-        "Batched relevance check complete. %d articles marked as relevant.",
-        len(relevant_article_ids),
-    )
-    return relevant_article_ids
-
-
-# --- Module: Rerank Articles (Improved JSON extraction) ---
-def rerank_articles(
-    articles, user_info, llm_url=DEFAULT_LLM_URL, model_name=DEFAULT_MODEL_NAME
-):
-    """
-    Calls the LLM to reorder the articles by importance. Returns the reordered list.
-    Expects a JSON response with a 'ranking' key pointing to a list of article IDs, ordered from most relevant to least relevant.
-    """
-    if not articles:
-        return []
-
-    url = llm_url
-    logger.info("Starting rerank for %d articles.", len(articles))
-
-    prompt_lines = [
-        f"User info: {user_info}\n",
-        'Please rank the following articles from most relevant to least relevant. Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.',
-    ]
-    for article in articles:
-        prompt_lines.append(
-            f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
-        )
-    prompt = "\n".join(prompt_lines)
-    payload = {"model": model_name, "messages": [{"role": "user", "content": prompt}]}
-
-    try:
-        response = requests.post(url, json=payload)
-        if response.status_code != 200:
-            logger.error(
-                "LLM reranking request failed with status code: %d",
-                response.status_code,
-            )
-            return articles  # fallback: return original order
-
-        data = response.json()
-        text_response = data["choices"][0]["message"]["content"].strip()
-
-        match = re.search(r"\{.*\}", text_response, re.DOTALL)
-        if not match:
-            logger.error("No valid JSON found in rerank response.")
-            return articles
-        json_str = match.group(0)
-        rerank_result = json.loads(json_str)
-        ranking_list = rerank_result.get("ranking", [])
-
-        # Create a map for quick lookup
-        article_map = {a["id"]: a for a in articles}
-        reordered = []
-        for art_id in ranking_list:
-            if art_id in article_map:
-                reordered.append(article_map[art_id])
-        # Add any articles not mentioned in the ranking_list, to preserve them at the end
-        remaining = [a for a in articles if a["id"] not in ranking_list]
-        reordered.extend(remaining)
-
-        return reordered
-
-    except Exception as e:
-        logger.exception("Error during rerank: %s", e)
-        return articles
-
-
-# --- Module: Document Converter ---
-def fetch_and_convert_article(article):
-    """
-    Checks for a cached conversion of the article.
-    If absent, downloads the PDF, converts it using Docling,
-    caches the Markdown text, and returns it.
-    """
-    safe_id = article["id"].replace(":", "_")
-    cache_file = os.path.join(ARTICLES_CACHE_DIR, f"{safe_id}.txt")
-    logger.debug("Checking for cached conversion of article '%s'.", article["id"])
-    if os.path.exists(cache_file):
-        logger.info("Found cached conversion for article '%s'.", article["id"])
-        with open(cache_file, "r", encoding="utf-8") as f:
-            return f.read()
-
-    if not article["pdf_url"]:
-        logger.error("No PDF URL for article '%s'. Skipping conversion.", article["id"])
-        return ""
-    logger.info(
-        "Downloading PDF for article '%s' from %s", article["id"], article["pdf_url"]
-    )
-    response = requests.get(article["pdf_url"])
-    if response.status_code != 200:
-        logger.error("Failed to download PDF for article '%s'.", article["id"])
-        return ""
-
-    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_pdf:
-        tmp_pdf.write(response.content)
-        tmp_pdf_path = tmp_pdf.name
-    logger.debug("PDF saved temporarily at %s", tmp_pdf_path)
-
-    try:
-        logger.info("Converting PDF for article '%s' using Docling.", article["id"])
-        conv_result = doc_converter.convert(source=tmp_pdf_path)
-        converted_text = conv_result.document.export_to_markdown()
-        with open(cache_file, "w", encoding="utf-8") as f:
-            f.write(converted_text)
-        logger.info(
-            "Conversion successful for article '%s'. Cached output.", article["id"]
-        )
-        return converted_text
-    except Exception as e:
-        logger.exception("Conversion failed for article '%s': %s", article["id"], e)
-        return ""
-    finally:
-        if os.path.exists(tmp_pdf_path):
-            os.unlink(tmp_pdf_path)
-            logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
-
-
-# --- Module: Summarizer (Parallelizable) ---
-def generate_article_summary(
-    article, content, user_info, llm_url=DEFAULT_LLM_URL, model_name=DEFAULT_MODEL_NAME
-):
-    """
-    Generates a fluid, narrative summary for the article using the LLM.
-    The summary starts with a connecting phrase like 'And now,  {article title}'.
-    """
-    url = llm_url
-    prompt = (
-        f"User info: {user_info}\n\n"
-        f"Please summarize the following article titled '{article['title']}' in a fluid narrative prose style without lists or visual cues. "
-        f"Begin the summary with a connecting segment like 'And now, Article: {article['title']}'.\n\n"
-        f"Article Content:\n{content}"
-    )
-    payload = {
-        "model": model_name,
-        "messages": [{"role": "user", "content": prompt}],
-    }
-    logger.info("Generating summary for article '%s'.", article["id"])
-    try:
-        response = requests.post(url, json=payload)
-        if response.status_code != 200:
-            logger.error(
-                "LLM summarization failed for article '%s'. Status code: %d",
-                article["id"],
-                response.status_code,
-            )
-            return ""
-        data = response.json()
-        summary = data["choices"][0]["message"]["content"].strip()
-        logger.debug("Summary for article '%s': %s", article["id"], summary[:100])
-        return summary
-    except Exception as e:
-        logger.exception("Error summarizing article '%s': %s", article["id"], e)
-        return ""
-
-
-# --- Module: TTS Converter ---
-def text_to_speech(text, output_mp3):
-    """
-    Converts the provided text to speech using KPipeline.
-    A temporary WAV file is generated and then converted to MP3 using ffmpeg.
-    """
-    logger.info("Starting text-to-speech conversion.")
-    pipeline = KPipeline(lang_code="a")
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
-        temp_wav_path = tmp_wav.name
-    logger.debug("Temporary WAV file created at %s", temp_wav_path)
-
-    try:
-        generator = pipeline(text, voice="af_bella", speed=1, split_pattern=r"\n+")
-        with sf.SoundFile(temp_wav_path, "w", 24000, channels=1) as f:
-            for chunk_index, (_, _, audio) in enumerate(generator):
-                logger.debug("Writing audio chunk %d to WAV file.", chunk_index)
-                f.write(audio)
-        logger.info("WAV file generated. Converting to MP3 with ffmpeg.")
-        subprocess.run(["ffmpeg", "-y", "-i", temp_wav_path, output_mp3], check=True)
-        logger.info("MP3 file created at %s", output_mp3)
-    finally:
-        if os.path.exists(temp_wav_path):
-            os.unlink(temp_wav_path)
-            logger.debug("Temporary WAV file %s removed.", temp_wav_path)
-
-
-# --- Orchestrator: Process Articles (Parallelizing summarization) ---
-def process_articles(
-    user_info,
-    arxiv_url=DEFAULT_ARXIV_URL,
-    llm_url=DEFAULT_LLM_URL,
-    model_name=DEFAULT_MODEL_NAME,
-    max_articles=5,
-    new_only=False,
-):
-    """
-    Executes the full pipeline:
-      1. Fetch arXiv articles (cached if available, unless new_only=True).
-      2. If new_only, filter out articles that have already been cached as .txt files.
-      3. Batch-check relevance via LLM (parallelized).
-      4. Re-rank articles by importance using the LLM.
-      5. Select the top `max_articles`.
-      6. For each selected article, download and convert the PDF to Markdown (sequential).
-      7. Generate a narrative summary for each article (parallelized if not cached).
-      8. Combine all summaries into a final narrative.
-    """
-    logger.info("Starting article processing pipeline.")
-    # Step 1: fetch articles with potential force_refresh
-    articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url)
-    logger.info("Total articles fetched: %d", len(articles))
-
-    # Step 2: if new_only is True, filter out articles older than the most recent cached article
-    if new_only:
-        cached_articles = [
-            f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")
-        ]
-        if cached_articles:
-
-            def parse_id(id_str):
-                if id_str.lower().startswith("ar"):
-                    id_str = id_str[6:]
-                parts = id_str.split(".")
-                return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
-
-            most_recent = max(cached_articles, key=parse_id)
-            articles = [
-                article
-                for article in articles
-                if parse_id(article["id"]) > parse_id(most_recent)
-            ]
-            logger.info(
-                "After filtering by most recent article id %s, %d articles remain.",
-                most_recent,
-                len(articles),
-            )
-        else:
-            logger.info(
-                "No cached articles found, proceeding with all fetched articles."
-            )
-
-    # Step 3: batch relevance check (parallelized)
-    relevant_ids = batch_relevance_filter(
-        articles, user_info, llm_url=llm_url, model_name=model_name
-    )
-    relevant_articles = [
-        article for article in articles if article["id"] in relevant_ids
-    ]
-    logger.info(
-        "Found %d relevant articles out of %d.", len(relevant_articles), len(articles)
-    )
-
-    # Step 4: rerank
-    reranked_articles = rerank_articles(
-        relevant_articles, user_info, llm_url=llm_url, model_name=model_name
-    )
-
-    # Step 5: select top max_articles
-    final_candidates = reranked_articles[:max_articles]
-
-    # Step 6: convert PDFs sequentially
-    articles_with_content = []
-    for article in final_candidates:
-        content = fetch_and_convert_article(article)
-        if content:
-            articles_with_content.append((article, content))
-        else:
-            logger.warning("No content obtained for article '%s'.", article["id"])
-
-    # Step 7: generate summaries in parallel
-    summaries = []
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        future_to_article = {
-            executor.submit(
-                generate_article_summary,
-                article,
-                content,
-                user_info,
-                llm_url,
-                model_name,
-            ): article
-            for article, content in articles_with_content
-        }
-        for future in concurrent.futures.as_completed(future_to_article):
-            article = future_to_article[future]
-            try:
-                summary = future.result()
-                if summary:
-                    summaries.append(summary)
-                else:
-                    logger.warning(
-                        "No summary generated for article '%s'.", article["id"]
-                    )
-            except Exception as e:
-                logger.exception(
-                    "Error generating summary for article '%s': %s", article["id"], e
-                )
-
-    # Step 8: combine summaries
-    final_summary = "\n\n".join(summaries) + " "
-    final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe.py"
-
-    logger.info(
-        "Final summary generated with length %d characters.", len(final_summary)
-    )
-    return final_summary
-
-
-# --- Flask Application ---
-app = Flask(__name__)
-
-
-@app.route("/process", methods=["POST"])
-def process_endpoint():
-    """
-    Expects JSON with a 'user_info' field.
-    Optionally accepts 'max_articles' (default 5) and 'new_only' (boolean).
-    Runs the complete pipeline and returns the final MP3 file.
-    """
-    data = request.get_json()
-    user_info = data.get("user_info", "")
-    if not user_info:
-        logger.error("user_info not provided in request.")
-        return jsonify({"error": "user_info not provided"}), 400
-
-    max_articles = data.get("max_articles", 5)
-    new_only = data.get("new_only", False)
-
-    logger.info(
-        "Processing request with user_info: %s, max_articles: %s, new_only: %s",
-        user_info,
-        max_articles,
-        new_only,
-    )
-    final_summary = process_articles(
-        user_info,
-        arxiv_url=DEFAULT_ARXIV_URL,
-        llm_url=DEFAULT_LLM_URL,
-        model_name=DEFAULT_MODEL_NAME,
-        max_articles=max_articles,
-        new_only=new_only,
-    )
-    if not final_summary.strip():
-        logger.error("No summaries generated.")
-        return jsonify({"error": "No summaries generated."}), 500
-
-    output_mp3 = os.path.join(CACHE_DIR, "final_output.mp3")
-    try:
-        text_to_speech(final_summary, output_mp3)
-    except Exception as e:
-        logger.exception("TTS conversion failed: %s", e)
-        return jsonify({"error": f"TTS conversion failed: {e}"}), 500
-
-    logger.info("Process complete. Returning MP3 file.")
-    return send_file(output_mp3, as_attachment=True)
-
-
-# --- Main ---
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="vibe: Article Summarization & TTS Pipeline"
-    )
-    parser.add_argument("--serve", action="store_true", help="Run as a Flask server.")
-    parser.add_argument(
-        "--generate",
-        action="store_true",
-        help="Run the pipeline once, generate a summary MP3, then exit.",
-    )
-    parser.add_argument(
-        "--prompt",
-        type=str,
-        default="",
-        help="User info (interests, context) for LLM filtering & summaries.",
-    )
-    parser.add_argument(
-        "--max-articles",
-        type=int,
-        default=5,
-        help="Maximum articles to process in the pipeline.",
-    )
-    parser.add_argument(
-        "--new-only",
-        action="store_true",
-        help="If set, only process articles newer than cached.",
-    )
-    parser.add_argument(
-        "--arxiv-url",
-        type=str,
-        default=DEFAULT_ARXIV_URL,
-        help="URL for fetching arXiv articles.",
-    )
-    parser.add_argument(
-        "--llm-url", type=str, default=DEFAULT_LLM_URL, help="URL of the LLM endpoint."
-    )
-    parser.add_argument(
-        "--model-name",
-        type=str,
-        default=DEFAULT_MODEL_NAME,
-        help="Name of model to pass to the LLM endpoint.",
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        default="final_output.mp3",
-        help="Output path for the generated MP3 file.",
-    )
-
-    args = parser.parse_args()
-
-    if args.serve:
-        logger.info("Starting Flask application in verbose mode.")
-        app.run(debug=True)
-    elif args.generate:
-        # Run the pipeline directly and produce an MP3 file
-        logger.info("Running pipeline in CLI mode.")
-        user_info = args.prompt
-        final_summary = process_articles(
-            user_info=user_info,
-            arxiv_url=args.arxiv_url,
-            llm_url=args.llm_url,
-            model_name=args.model_name,
-            max_articles=args.max_articles,
-            new_only=args.new_only,
-        )
-        if not final_summary.strip():
-            logger.error("No summaries generated.")
-            exit(1)
-
-        output_mp3 = args.output
-        try:
-            text_to_speech(final_summary, output_mp3)
-            logger.info(f"Generated MP3 at: {output_mp3}")
-        except Exception as e:
-            logger.exception("TTS conversion failed: %s", e)
-            exit(1)
-    else:
-        # Default to Flask server if neither flag is set
-        logger.info("No --serve or --generate specified; running Flask by default.")
-        app.run(debug=True)
diff --git a/vibe/config.py b/vibe/config.py
new file mode 100644
index 0000000..51dc1ac
--- /dev/null
+++ b/vibe/config.py
@@ -0,0 +1,22 @@
+import os
+import logging
+
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+CACHE_DIR = "cache"
+if not os.path.exists(CACHE_DIR):
+    os.makedirs(CACHE_DIR)
+    logger.debug("Created cache directory: %s", CACHE_DIR)
+
+ARXIV_CACHE_FILE = os.path.join(CACHE_DIR, "arxiv_list.json")
+ARTICLES_CACHE_DIR = os.path.join(CACHE_DIR, "articles")
+if not os.path.exists(ARTICLES_CACHE_DIR):
+    os.makedirs(ARTICLES_CACHE_DIR)
+    logger.debug("Created articles cache directory: %s", ARTICLES_CACHE_DIR)
+
+DEFAULT_ARXIV_URL = os.environ.get("ARXIV_URL", "https://arxiv.org/list/cs/new")
+DEFAULT_LLM_URL = os.environ.get("LLM_URL", "http://127.0.0.1:4000/v1/chat/completions")
+DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
\ No newline at end of file
diff --git a/vibe/converter.py b/vibe/converter.py
new file mode 100644
index 0000000..856b5c3
--- /dev/null
+++ b/vibe/converter.py
@@ -0,0 +1,60 @@
+import os
+import json
+import tempfile
+import requests
+import logging
+import subprocess
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.datamodel.pipeline_options import PdfPipelineOptions
+from docling.datamodel.base_models import InputFormat
+
+from .config import ARTICLES_CACHE_DIR
+
+logger = logging.getLogger(__name__)
+
+pdf_options = PdfFormatOption(pipeline_options=PdfPipelineOptions(generate_picture_images=True))
+doc_converter = DocumentConverter(format_options={InputFormat.PDF: pdf_options})
+
+def fetch_and_convert_article(article):
+    """
+    Checks for a cached conversion of the article.
+    If absent, downloads the PDF, converts it using Docling,
+    caches the Markdown text, and returns it.
+    """
+    safe_id = article["id"].replace(":", "_")
+    cache_file = os.path.join(ARTICLES_CACHE_DIR, f"{safe_id}.txt")
+    logger.debug("Checking for cached conversion of article '%s'.", article["id"])
+    if os.path.exists(cache_file):
+        logger.info("Found cached conversion for article '%s'.", article["id"])
+        with open(cache_file, "r", encoding="utf-8") as f:
+            return f.read()
+
+    if not article["pdf_url"]:
+        logger.error("No PDF URL for article '%s'. Skipping conversion.", article["id"])
+        return ""
+    logger.info("Downloading PDF for article '%s' from %s", article["id"], article["pdf_url"])
+    response = requests.get(article["pdf_url"])
+    if response.status_code != 200:
+        logger.error("Failed to download PDF for article '%s'.", article["id"])
+        return ""
+
+    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_pdf:
+        tmp_pdf.write(response.content)
+        tmp_pdf_path = tmp_pdf.name
+    logger.debug("PDF saved temporarily at %s", tmp_pdf_path)
+
+    try:
+        logger.info("Converting PDF for article '%s' using Docling.", article["id"])
+        conv_result = doc_converter.convert(source=tmp_pdf_path)
+        converted_text = conv_result.document.export_to_markdown()
+        with open(cache_file, "w", encoding="utf-8") as f:
+            f.write(converted_text)
+        logger.info("Conversion successful for article '%s'. Cached output.", article["id"])
+        return converted_text
+    except Exception as e:
+        logger.exception("Conversion failed for article '%s': %s", article["id"], e)
+        return ""
+    finally:
+        if os.path.exists(tmp_pdf_path):
+            os.unlink(tmp_pdf_path)
+            logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
\ No newline at end of file
diff --git a/vibe/fetcher.py b/vibe/fetcher.py
new file mode 100644
index 0000000..0474da3
--- /dev/null
+++ b/vibe/fetcher.py
@@ -0,0 +1,71 @@
+import os
+import json
+import requests
+from bs4 import BeautifulSoup
+import logging
+from .config import ARXIV_CACHE_FILE
+
+logger = logging.getLogger(__name__)
+
+def fetch_arxiv_list(force_refresh=False, arxiv_url=None):
+    """
+    Fetches the latest CS articles from arXiv. If a cache exists, reads from it
+    unless force_refresh is True. Otherwise, parses the arXiv page, extracts
+    article metadata, and caches it.
+    """
+    if arxiv_url is None:
+        from .config import DEFAULT_ARXIV_URL
+        arxiv_url = DEFAULT_ARXIV_URL
+
+    logger.debug("Checking for cached arXIV list at %s", ARXIV_CACHE_FILE)
+    if not force_refresh and os.path.exists(ARXIV_CACHE_FILE):
+        logger.info("Cache found for arXiv list. Loading from cache.")
+        with open(ARXIV_CACHE_FILE, "r", encoding="utf-8") as f:
+            articles = json.load(f)
+        logger.debug("Loaded %d articles from cache.", len(articles))
+        return articles
+
+    logger.info("Fetching arXiv page from %s", arxiv_url)
+    response = requests.get(arxiv_url)
+    if response.status_code != 200:
+        logger.error("Failed to fetch arXiv page. Status code: %d", response.status_code)
+        raise Exception("Failed to fetch arXiv page.")
+
+    logger.debug("Parsing arXiv HTML content.")
+    soup = BeautifulSoup(response.text, "html.parser")
+    articles = []
+    dl = soup.find("dl")
+    if not dl:
+        logger.error("No article list found on arXiv page.")
+        raise Exception("No article list found on arXiv page.")
+
+    dts = dl.find_all("dt")
+    dds = dl.find_all("dd")
+    logger.debug("Found %d dt tags and %d dd tags.", len(dts), len(dds))
+    for dt, dd in zip(dts, dds):
+        id_link = dt.find("a", title="Abstract")
+        if not id_link:
+            logger.debug("Skipping an article with no abstract link.")
+            continue
+        article_id = id_link.text.strip()
+        pdf_link = dt.find("a", title="Download PDF")
+        pdf_url = "https://arxiv.org" + pdf_link["href"] if pdf_link else None
+
+        title_div = dd.find("div", class_="list-title")
+        title = title_div.text.replace("Title:", "").strip() if title_div else "No title"
+
+        abstract_div = dd.find("p", class_="mathjax")
+        abstract = abstract_div.text.strip() if abstract_div else "No abstract"
+
+        articles.append({
+            "id": article_id,
+            "title": title,
+            "abstract": abstract,
+            "pdf_url": pdf_url,
+        })
+        logger.debug("Parsed article: %s", article_id)
+
+    with open(ARXIV_CACHE_FILE, "w", encoding="utf-8") as f:
+        json.dump(articles, f)
+    logger.info("Cached %d articles to %s", len(articles), ARXIV_CACHE_FILE)
+    return articles
\ No newline at end of file
diff --git a/vibe/filter.py b/vibe/filter.py
new file mode 100644
index 0000000..59fe46f
--- /dev/null
+++ b/vibe/filter.py
@@ -0,0 +1,69 @@
+import json
+import re
+import requests
+import logging
+import concurrent.futures
+
+logger = logging.getLogger(__name__)
+
+def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, model_name=None):
+    """
+    Sends articles to the LLM in batches to check their relevance.
+    Expects a JSON response mapping article IDs to "yes" or "no".
+    This version parallelizes the batched requests.
+    """
+    if llm_url is None or model_name is None:
+        from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+        llm_url = llm_url or DEFAULT_LLM_URL
+        model_name = model_name or DEFAULT_MODEL_NAME
+
+    relevant_article_ids = set()
+    logger.info("Starting batched relevance check for %d articles.", len(articles))
+
+    def process_batch(batch):
+        local_relevant_ids = set()
+        prompt_lines = [f"User info: {user_info}\n"]
+        prompt_lines.append(
+            "For each of the following articles, determine if it is relevant to the user. Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. Do not add any extra text; the response must start with a '{'."
+        )
+        for article in batch:
+            prompt_lines.append(
+                f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
+            )
+        prompt = "\n".join(prompt_lines)
+        payload = {
+            "model": model_name,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+        try:
+            response = requests.post(llm_url, json=payload)
+            if response.status_code != 200:
+                logger.error("LLM batched relevance check failed for batch starting with article '%s' with status code: %d", batch[0]["id"], response.status_code)
+                return local_relevant_ids
+            data = response.json()
+            text_response = data["choices"][0]["message"]["content"].strip()
+            try:
+                match = re.search(r"\{.*\}", text_response, re.DOTALL)
+                if not match:
+                    raise ValueError("No valid JSON object found in response")
+                json_str = match.group(0)
+                logger.debug("Batch response: %s", json_str[:200])
+                result = json.loads(json_str)
+                for article_id, verdict in result.items():
+                    if isinstance(verdict, str) and verdict.lower().strip() == "yes":
+                        local_relevant_ids.add(article_id)
+            except Exception as e:
+                logger.exception("Failed to parse JSON from LLM response: %s", e)
+            return local_relevant_ids
+        except Exception as e:
+            logger.exception("Error during batched relevance check: %s", e)
+            return local_relevant_ids
+
+    batches = [articles[i: i + batch_size] for i in range(0, len(articles), batch_size)]
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(process_batch, batch) for batch in batches]
+        for future in concurrent.futures.as_completed(futures):
+            relevant_article_ids.update(future.result())
+
+    logger.info("Batched relevance check complete. %d articles marked as relevant.", len(relevant_article_ids))
+    return relevant_article_ids
\ No newline at end of file
diff --git a/vibe/main.py b/vibe/main.py
new file mode 100644
index 0000000..ee70764
--- /dev/null
+++ b/vibe/main.py
@@ -0,0 +1,49 @@
+import argparse
+import logging
+from vibe.orchestrator import process_articles
+from vibe.tts import text_to_speech
+from vibe.config import DEFAULT_ARXIV_URL, DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+def main():
+    parser = argparse.ArgumentParser(description="vibe: Article Summarization & TTS Pipeline")
+    parser.add_argument("--serve", action="store_true", help="Run as a Flask server.")
+    parser.add_argument("--generate", action="store_true", help="Run the pipeline once and generate a summary MP3, then exit.")
+    parser.add_argument("--prompt", type=str, default="", help="User info for LLM filtering & summaries.")
+    parser.add_argument("--max-articles", type=int, default=5, help="Maximum articles to process in the pipeline.")
+    parser.add_argument("--new-only", action="store_true", help="Only process articles newer than cached.")
+    parser.add_argument("--arxiv-url", type=str, default=DEFAULT_ARXIV_URL, help="URL for fetching arXiv articles.")
+    parser.add_argument("--llm-url", type=str, default=DEFAULT_LLM_URL, help="URL of the LLM endpoint.")
+    parser.add_argument("--model-name", type=str, default=DEFAULT_MODEL_NAME, help="Name of model to pass to the LLM endpoint.")
+    parser.add_argument("--output", type=str, default="final_output.mp3", help="Output path for the generated MP3 file.")
+    
+    args = parser.parse_args()
+
+    if args.serve:
+        from vibe.server import app
+        logger.info("Starting Flask server.")
+        app.run(debug=True)
+    elif args.generate:
+        logger.info("Running pipeline in CLI mode.")
+        user_info = args.prompt
+        final_summary = process_articles(user_info, arxiv_url=args.arxiv_url, llm_url=args.llm_url, model_name=args.model_name, max_articles=args.max_articles, new_only=args.new_only)
+        if not final_summary.strip():
+            logger.error("No summaries generated.")
+            exit(1)
+        try:
+            text_to_speech(final_summary, args.output)
+            logger.info(f"Generated MP3 at: {args.output}")
+        except Exception as e:
+            logger.exception("TTS conversion failed: %s", e)
+            exit(1)
+    else:
+        logger.info("No mode specified; defaulting to Flask server.")
+        from vibe.server import app
+        app.run(debug=True)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/vibe/orchestrator.py b/vibe/orchestrator.py
new file mode 100644
index 0000000..bd174fe
--- /dev/null
+++ b/vibe/orchestrator.py
@@ -0,0 +1,80 @@
+import os
+import logging
+import concurrent.futures
+from datetime import datetime
+
+from .config import ARTICLES_CACHE_DIR
+from .fetcher import fetch_arxiv_list
+from .filter import batch_relevance_filter
+from .rerank import rerank_articles
+from .converter import fetch_and_convert_article
+from .summarizer import generate_article_summary
+from .tts import text_to_speech
+
+logger = logging.getLogger(__name__)
+
+def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False):
+    """
+    Executes the full pipeline:
+      1. Fetch arXiv articles.
+      2. Optionally filter out articles older than cached ones if new_only is True.
+      3. Batch-check relevance via LLM.
+      4. Rerank articles.
+      5. Select top max_articles.
+      6. Convert PDFs to Markdown.
+      7. Generate narrative summaries.
+      8. Combine summaries into a final narrative.
+    """
+    articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url)
+    logger.info("Total articles fetched: %d", len(articles))
+
+    if new_only:
+        cached_articles = [f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")]
+        if cached_articles:
+            def parse_id(id_str):
+                if id_str.lower().startswith("ar"):
+                    id_str = id_str[6:]
+                parts = id_str.split(".")
+                return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
+            most_recent = max(cached_articles, key=parse_id)
+            articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
+            logger.info("After filtering by most recent article id %s, %d articles remain.", most_recent, len(articles))
+        else:
+            logger.info("No cached articles found, proceeding with all fetched articles.")
+
+    relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
+    relevant_articles = [article for article in articles if article["id"] in relevant_ids]
+    logger.info("Found %d relevant articles out of %d.", len(relevant_articles), len(articles))
+
+    reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
+    final_candidates = reranked_articles[:max_articles]
+
+    articles_with_content = []
+    for article in final_candidates:
+        content = fetch_and_convert_article(article)
+        if content:
+            articles_with_content.append((article, content))
+        else:
+            logger.warning("No content obtained for article '%s'.", article["id"])
+
+    summaries = []
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_to_article = {
+            executor.submit(generate_article_summary, article, content, user_info, llm_url, model_name): article
+            for article, content in articles_with_content
+        }
+        for future in concurrent.futures.as_completed(future_to_article):
+            article = future_to_article[future]
+            try:
+                summary = future.result()
+                if summary:
+                    summaries.append(summary)
+                else:
+                    logger.warning("No summary generated for article '%s'.", article["id"])
+            except Exception as e:
+                logger.exception("Error generating summary for article '%s': %s", article["id"], e)
+
+    final_summary = "\n\n".join(summaries)
+    final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
+    logger.info("Final summary generated with length %d characters.", len(final_summary))
+    return final_summary
\ No newline at end of file
diff --git a/vibe/rerank.py b/vibe/rerank.py
new file mode 100644
index 0000000..aa398e8
--- /dev/null
+++ b/vibe/rerank.py
@@ -0,0 +1,54 @@
+import json
+import re
+import requests
+import logging
+
+logger = logging.getLogger(__name__)
+
+def rerank_articles(articles, user_info, llm_url=None, model_name=None):
+    """
+    Calls the LLM to reorder the articles by importance. Returns the reordered list.
+    Expects a JSON response with a 'ranking' key pointing to a list of article IDs.
+    """
+    if not articles:
+        return []
+
+    if llm_url is None or model_name is None:
+        from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+        llm_url = llm_url or DEFAULT_LLM_URL
+        model_name = model_name or DEFAULT_MODEL_NAME
+
+    logger.info("Starting rerank for %d articles.", len(articles))
+    prompt_lines = [
+        f"User info: {user_info}\n",
+        'Please rank the following articles from most relevant to least relevant. Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.',
+    ]
+    for article in articles:
+        prompt_lines.append(
+            f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
+        )
+    prompt = "\n".join(prompt_lines)
+    payload = {"model": model_name, "messages": [{"role": "user", "content": prompt}]}
+
+    try:
+        response = requests.post(llm_url, json=payload)
+        if response.status_code != 200:
+            logger.error("LLM reranking request failed with status code: %d", response.status_code)
+            return articles
+        data = response.json()
+        text_response = data["choices"][0]["message"]["content"].strip()
+        match = re.search(r"\{.*\}", text_response, re.DOTALL)
+        if not match:
+            logger.error("No valid JSON found in rerank response.")
+            return articles
+        json_str = match.group(0)
+        rerank_result = json.loads(json_str)
+        ranking_list = rerank_result.get("ranking", [])
+        article_map = {a["id"]: a for a in articles}
+        reordered = [article_map[art_id] for art_id in ranking_list if art_id in article_map]
+        remaining = [a for a in articles if a["id"] not in ranking_list]
+        reordered.extend(remaining)
+        return reordered
+    except Exception as e:
+        logger.exception("Error during rerank: %s", e)
+        return articles
\ No newline at end of file
diff --git a/vibe/server.py b/vibe/server.py
new file mode 100644
index 0000000..b7886c1
--- /dev/null
+++ b/vibe/server.py
@@ -0,0 +1,38 @@
+from flask import Flask, send_file, request, jsonify
+import logging
+from .orchestrator import process_articles
+from .config import CACHE_DIR
+
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+
+@app.route("/process", methods=["POST"])
+def process_endpoint():
+    data = request.get_json()
+    user_info = data.get("user_info", "")
+    if not user_info:
+        logger.error("user_info not provided in request.")
+        return jsonify({"error": "user_info not provided"}), 400
+
+    max_articles = data.get("max_articles", 5)
+    new_only = data.get("new_only", False)
+
+    logger.info("Processing request with user_info: %s, max_articles: %s, new_only: %s", user_info, max_articles, new_only)
+    final_summary = process_articles(user_info, max_articles=max_articles, new_only=new_only)
+    if not final_summary.strip():
+        logger.error("No summaries generated.")
+        return jsonify({"error": "No summaries generated."}), 500
+
+    output_mp3 = f"{CACHE_DIR}/final_output.mp3"
+    try:
+        from .tts import text_to_speech
+        text_to_speech(final_summary, output_mp3)
+    except Exception as e:
+        logger.exception("TTS conversion failed: %s", e)
+        return jsonify({"error": f"TTS conversion failed: {e}"}), 500
+
+    logger.info("Process complete. Returning MP3 file.")
+    return send_file(output_mp3, as_attachment=True)
+
+if __name__ == "__main__":
+    app.run(debug=True)
\ No newline at end of file
diff --git a/vibe/summarizer.py b/vibe/summarizer.py
new file mode 100644
index 0000000..60e3075
--- /dev/null
+++ b/vibe/summarizer.py
@@ -0,0 +1,38 @@
+import requests
+import logging
+from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+
+logger = logging.getLogger(__name__)
+
+def generate_article_summary(article, content, user_info, llm_url=None, model_name=None):
+    """
+    Generates a fluid, narrative summary for the article using the LLM.
+    The summary starts with a connecting phrase.
+    """
+    if llm_url is None or model_name is None:
+        llm_url = DEFAULT_LLM_URL
+        model_name = DEFAULT_MODEL_NAME
+
+    prompt = (
+        f"User info: {user_info}\n\n"
+        f"Please summarize the following article titled '{article['title']}' in a fluid narrative prose style without lists or visual cues. "
+        f"Begin the summary with a connecting segment like 'And now, Article: {article['title']}'.\n\n"
+        f"Article Content:\n{content}"
+    )
+    payload = {
+        "model": model_name,
+        "messages": [{"role": "user", "content": prompt}],
+    }
+    logger.info("Generating summary for article '%s'.", article["id"])
+    try:
+        response = requests.post(llm_url, json=payload)
+        if response.status_code != 200:
+            logger.error("LLM summarization failed for article '%s'. Status code: %d", article["id"], response.status_code)
+            return ""
+        data = response.json()
+        summary = data["choices"][0]["message"]["content"].strip()
+        logger.debug("Summary for article '%s': %s", article["id"], summary[:100])
+        return summary
+    except Exception as e:
+        logger.exception("Error summarizing article '%s': %s", article["id"], e)
+        return ""
\ No newline at end of file
diff --git a/vibe/tts.py b/vibe/tts.py
new file mode 100644
index 0000000..57e3142
--- /dev/null
+++ b/vibe/tts.py
@@ -0,0 +1,33 @@
+import os
+import subprocess
+import tempfile
+import logging
+import soundfile as sf
+from kokoro import KPipeline
+
+logger = logging.getLogger(__name__)
+
+def text_to_speech(text, output_mp3):
+    """
+    Converts the provided text to speech using KPipeline.
+    Generates a temporary WAV file and converts it to MP3 using ffmpeg.
+    """
+    logger.info("Starting text-to-speech conversion.")
+    pipeline = KPipeline(lang_code="a")
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
+        temp_wav_path = tmp_wav.name
+    logger.debug("Temporary WAV file created at %s", temp_wav_path)
+
+    try:
+        generator = pipeline(text, voice="af_bella", speed=1, split_pattern=r"\n+")
+        with sf.SoundFile(temp_wav_path, "w", 24000, channels=1) as f:
+            for chunk_index, (_, _, audio) in enumerate(generator):
+                logger.debug("Writing audio chunk %d to WAV file.", chunk_index)
+                f.write(audio)
+        logger.info("WAV file generated. Converting to MP3 with ffmpeg.")
+        subprocess.run(["ffmpeg", "-y", "-i", temp_wav_path, output_mp3], check=True)
+        logger.info("MP3 file created at %s", output_mp3)
+    finally:
+        if os.path.exists(temp_wav_path):
+            os.unlink(temp_wav_path)
+            logger.debug("Temporary WAV file %s removed.", temp_wav_path)
\ No newline at end of file

From 55215a0edbc0bffb59084f130fa566c629527470 Mon Sep 17 00:00:00 2001
From: Regis David Souza Mesquita <github@rdsm.dev>
Date: Sun, 2 Mar 2025 03:28:36 +0000
Subject: [PATCH 2/6] Adds frontend

---
 Dockerfile           | 27 ++++++++++++++++++
 templates/index.html | 67 ++++++++++++++++++++++++++++++++++++++++++++
 vibe/server.py       |  6 +++-
 3 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile
 create mode 100644 templates/index.html

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..4ac088b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,27 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+# Install system dependencies including ffmpeg
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set environment variables for litellm API key and model (users can override these)
+ENV LITELLM_API_KEY=""
+ENV MODEL_NAME="mistral-small-latest"
+
+# Set working directory
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Expose port 5000 for the Flask server
+EXPOSE 5000
+
+# Command to run the Flask server
+CMD ["python", "vibe/main.py", "--serve"]
\ No newline at end of file
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..d948cd0
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,67 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>vibe: Article Summarization &amp; TTS Pipeline</title>
+  <style>
+    body { font-family: Arial, sans-serif; margin: 40px; }
+    .container { max-width: 600px; margin: auto; }
+    input[type="text"], textarea { width: 100%; padding: 10px; margin: 8px 0; }
+    button { padding: 10px 20px; font-size: 16px; }
+    .hidden { display: none; }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <h1>Welcome to vibe</h1>
+    <p>Enter your interests below to generate a summary MP3 of the latest Computer Science research articles.</p>
+    <form id="interestForm">
+      <label for="user_info">Your Interests:</label>
+      <textarea id="user_info" name="user_info" rows="4" required></textarea>
+      <br>
+      <button type="submit">Submit</button>
+    </form>
+    <div id="status" class="hidden">
+      <p>Processing your request, please wait...</p>
+    </div>
+  </div>
+  <script>
+    document.getElementById('interestForm').addEventListener('submit', async function(e) {
+      e.preventDefault();
+      const userInfo = document.getElementById('user_info').value;
+      document.getElementById('status').classList.remove('hidden');
+      
+      try {
+        const response = await fetch('/process', {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json'
+          },
+          body: JSON.stringify({
+            user_info: userInfo,
+            max_articles: 5,
+            new_only: false
+          })
+        });
+        
+        if (response.ok) {
+          const blob = await response.blob();
+          const url = window.URL.createObjectURL(blob);
+          const a = document.createElement('a');
+          a.style.display = 'none';
+          a.href = url;
+          a.download = 'summary.mp3';
+          document.body.appendChild(a);
+          a.click();
+          window.URL.revokeObjectURL(url);
+        } else {
+          alert('Error processing your request.');
+        }
+      } catch (error) {
+        alert('An error occurred: ' + error);
+      }
+      document.getElementById('status').classList.add('hidden');
+    });
+  </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/vibe/server.py b/vibe/server.py
index b7886c1..d7233e0 100644
--- a/vibe/server.py
+++ b/vibe/server.py
@@ -1,4 +1,4 @@
-from flask import Flask, send_file, request, jsonify
+from flask import Flask, send_file, request, jsonify, render_template
 import logging
 from .orchestrator import process_articles
 from .config import CACHE_DIR
@@ -34,5 +34,9 @@ def process_endpoint():
     logger.info("Process complete. Returning MP3 file.")
     return send_file(output_mp3, as_attachment=True)
 
+@app.route("/")
+def index():
+    return render_template("index.html")
+
 if __name__ == "__main__":
     app.run(debug=True)
\ No newline at end of file

From 5bd4cf40a408d447beb96fb93f133ef3331c6aa6 Mon Sep 17 00:00:00 2001
From: Regis David Souza Mesquita <github@rdsm.dev>
Date: Sun, 2 Mar 2025 03:53:46 +0000
Subject: [PATCH 3/6] Fixes

---
 requirements.txt     |  3 ++-
 templates/index.html | 26 +++++++++++++++++++++-----
 vibe/config.py       |  3 ++-
 vibe/orchestrator.py | 40 ++++++++++++++++++++++++++++++++++------
 vibe/server.py       | 28 +++++++++++++++++++++-------
 5 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index fc6e61a..03fc57c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@ requests
 beautifulsoup4
 soundfile
 docling
-kokoro
\ No newline at end of file
+kokoro
+Flask-SocketIO
\ No newline at end of file
diff --git a/templates/index.html b/templates/index.html
index d948cd0..6a97e48 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -8,8 +8,11 @@
     .container { max-width: 600px; margin: auto; }
     input[type="text"], textarea { width: 100%; padding: 10px; margin: 8px 0; }
     button { padding: 10px 20px; font-size: 16px; }
+    #status { border: 1px solid #ccc; padding: 10px; margin-top: 20px; max-height: 200px; overflow-y: auto; }
     .hidden { display: none; }
   </style>
+  <!-- Include Socket.IO client library -->
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.6.1/socket.io.min.js"></script>
 </head>
 <body>
   <div class="container">
@@ -22,15 +25,29 @@
       <button type="submit">Submit</button>
     </form>
     <div id="status" class="hidden">
-      <p>Processing your request, please wait...</p>
+      <p><strong>Status Updates:</strong></p>
     </div>
   </div>
   <script>
+    // Initialize Socket.IO connection and listen for trace messages
+    var socket = io();
+    socket.on('trace', function(data) {
+      var statusDiv = document.getElementById('status');
+      if (statusDiv.classList.contains('hidden')) {
+        statusDiv.classList.remove('hidden');
+      }
+      var p = document.createElement('p');
+      p.textContent = data.message;
+      statusDiv.appendChild(p);
+    });
+
     document.getElementById('interestForm').addEventListener('submit', async function(e) {
       e.preventDefault();
-      const userInfo = document.getElementById('user_info').value;
-      document.getElementById('status').classList.remove('hidden');
-      
+      var userInfo = document.getElementById('user_info').value;
+      var statusDiv = document.getElementById('status');
+      statusDiv.innerHTML = "<p><strong>Status Updates:</strong></p>";
+      statusDiv.classList.remove('hidden');
+
       try {
         const response = await fetch('/process', {
           method: 'POST',
@@ -60,7 +77,6 @@
       } catch (error) {
         alert('An error occurred: ' + error);
       }
-      document.getElementById('status').classList.add('hidden');
     });
   </script>
 </body>
diff --git a/vibe/config.py b/vibe/config.py
index 51dc1ac..0f13110 100644
--- a/vibe/config.py
+++ b/vibe/config.py
@@ -6,7 +6,8 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 
-CACHE_DIR = "cache"
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+CACHE_DIR = os.path.join(BASE_DIR, "cache")
 if not os.path.exists(CACHE_DIR):
     os.makedirs(CACHE_DIR)
     logger.debug("Created cache directory: %s", CACHE_DIR)
diff --git a/vibe/orchestrator.py b/vibe/orchestrator.py
index bd174fe..9272cc4 100644
--- a/vibe/orchestrator.py
+++ b/vibe/orchestrator.py
@@ -13,7 +13,7 @@ from .tts import text_to_speech
 
 logger = logging.getLogger(__name__)
 
-def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False):
+def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
     """
     Executes the full pipeline:
       1. Fetch arXiv articles.
@@ -25,10 +25,15 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
       7. Generate narrative summaries.
       8. Combine summaries into a final narrative.
     """
+    if trace_callback:
+        trace_callback("Starting pipeline: fetching arXiv articles...")
     articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url)
-    logger.info("Total articles fetched: %d", len(articles))
+    if trace_callback:
+        trace_callback(f"Fetched {len(articles)} articles from arXiv.")
 
     if new_only:
+        if trace_callback:
+            trace_callback("Filtering articles for new content based on cache...")
         cached_articles = [f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")]
         if cached_articles:
             def parse_id(id_str):
@@ -38,25 +43,40 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
                 return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
             most_recent = max(cached_articles, key=parse_id)
             articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
-            logger.info("After filtering by most recent article id %s, %d articles remain.", most_recent, len(articles))
+            if trace_callback:
+                trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
         else:
-            logger.info("No cached articles found, proceeding with all fetched articles.")
+            if trace_callback:
+                trace_callback("No cached articles found; processing all fetched articles.")
 
+    if trace_callback:
+        trace_callback("Performing relevance filtering via LLM...")
     relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
     relevant_articles = [article for article in articles if article["id"] in relevant_ids]
-    logger.info("Found %d relevant articles out of %d.", len(relevant_articles), len(articles))
+    if trace_callback:
+        trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")
 
+    if trace_callback:
+        trace_callback("Reranking articles based on relevance...")
     reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
     final_candidates = reranked_articles[:max_articles]
 
+    if trace_callback:
+        trace_callback("Converting article PDFs to Markdown...")
     articles_with_content = []
     for article in final_candidates:
         content = fetch_and_convert_article(article)
         if content:
             articles_with_content.append((article, content))
+            if trace_callback:
+                trace_callback(f"Converted article {article['id']} to Markdown.")
         else:
             logger.warning("No content obtained for article '%s'.", article["id"])
+            if trace_callback:
+                trace_callback(f"Failed to convert article {article['id']}.")
 
+    if trace_callback:
+        trace_callback("Generating narrative summaries for articles...")
     summaries = []
     with concurrent.futures.ThreadPoolExecutor() as executor:
         future_to_article = {
@@ -69,12 +89,20 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
                 summary = future.result()
                 if summary:
                     summaries.append(summary)
+                    if trace_callback:
+                        trace_callback(f"Generated summary for article {article['id']}.")
                 else:
                     logger.warning("No summary generated for article '%s'.", article["id"])
+                    if trace_callback:
+                        trace_callback(f"Summary generation failed for article {article['id']}.")
             except Exception as e:
                 logger.exception("Error generating summary for article '%s': %s", article["id"], e)
+                if trace_callback:
+                    trace_callback(f"Error generating summary for article {article['id']}.")
 
     final_summary = "\n\n".join(summaries)
     final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
+    if trace_callback:
+        trace_callback("Final summary generated. Starting TTS conversion.")
     logger.info("Final summary generated with length %d characters.", len(final_summary))
-    return final_summary
\ No newline at end of file
+    return final_summary
diff --git a/vibe/server.py b/vibe/server.py
index d7233e0..04f54b2 100644
--- a/vibe/server.py
+++ b/vibe/server.py
@@ -1,10 +1,13 @@
 from flask import Flask, send_file, request, jsonify, render_template
 import logging
-from .orchestrator import process_articles
-from .config import CACHE_DIR
+from vibe.orchestrator import process_articles
+from vibe.config import CACHE_DIR
+
+from flask_socketio import SocketIO, emit
 
 logger = logging.getLogger(__name__)
-app = Flask(__name__)
+app = Flask(__name__, template_folder="../templates")
+socketio = SocketIO(app)
 
 @app.route("/process", methods=["POST"])
 def process_endpoint():
@@ -18,15 +21,22 @@ def process_endpoint():
     new_only = data.get("new_only", False)
 
     logger.info("Processing request with user_info: %s, max_articles: %s, new_only: %s", user_info, max_articles, new_only)
-    final_summary = process_articles(user_info, max_articles=max_articles, new_only=new_only)
+    # Define trace_callback to emit trace messages via WebSockets
+    def trace_callback(message):
+        socketio.emit("trace", {"message": message})
+    final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
     if not final_summary.strip():
         logger.error("No summaries generated.")
         return jsonify({"error": "No summaries generated."}), 500
 
-    output_mp3 = f"{CACHE_DIR}/final_output.mp3"
+    import uuid, os
+    mp3_filename = f"final_{uuid.uuid4().hex}.mp3"
+    output_mp3 = os.path.join(CACHE_DIR, mp3_filename)
+
     try:
-        from .tts import text_to_speech
+        from vibe.tts import text_to_speech
         text_to_speech(final_summary, output_mp3)
+        trace_callback("Text-to-Speech conversion complete. MP3 file generated.")
     except Exception as e:
         logger.exception("TTS conversion failed: %s", e)
         return jsonify({"error": f"TTS conversion failed: {e}"}), 500
@@ -38,5 +48,9 @@ def process_endpoint():
 def index():
     return render_template("index.html")
 
+@socketio.on("connect")
+def handle_connect():
+    emit("trace", {"message": "Connected to server. Ready to process your request."})
+
 if __name__ == "__main__":
-    app.run(debug=True)
\ No newline at end of file
+    socketio.run(app, debug=True)
\ No newline at end of file

From 281fd03aa8797863be0b39809af3f9684aef20d6 Mon Sep 17 00:00:00 2001
From: Regis David Souza Mesquita <github@rdsm.dev>
Date: Sun, 2 Mar 2025 04:02:26 +0000
Subject: [PATCH 4/6] More readable readme

---
 README.md | 141 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 87 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index cda0e70..dce9f95 100644
--- a/README.md
+++ b/README.md
@@ -1,83 +1,116 @@
-# vibe: Article Summarization & TTS Pipeline
+# vibe: Your Personal AI Research Summarizer 🎧
 
-vibe is a Python-based pipeline that automatically fetches the latest Computer Science research articles from arXiv, filters them for relevance using a language model (LLM), converts article PDFs to Markdown with Docling, generates narrative summaries, and synthesizes the summaries into an MP3 audio file using a text-to-speech (TTS) system.
+**vibe** is a smart assistant that fetches the latest Computer Science research articles from arXiv, identifies the most relevant ones based on your interests, summarizes them into engaging narratives, and even reads them aloud by generating an MP3 audio summary. Perfect for staying informed effortlessly!
 
-This repository has been refactored into a modular structure for improved maintainability.
+---
 
-## Project Structure
+## 🎯 What Can vibe Do for You?
 
-- **vibe/** - Main package containing all modules:
-  - `config.py` - Configuration, constants, and cache setup.
-  - `fetcher.py` - Module to fetch articles from arXiv.
-  - `filter.py` - Module for relevance filtering using an LLM.
-  - `rerank.py` - Module to rerank articles.
-  - `converter.py` - Module to convert PDFs to Markdown.
-  - `summarizer.py` - Module to generate article summaries.
-  - `tts.py` - Module for text-to-speech conversion.
-  - `orchestrator.py` - Orchestrates the complete pipeline.
-  - `server.py` - Flask server exposing a REST API.
-  - `main.py` - CLI entry point.
-- **tests/** - Contains unit tests.
-- **requirements.txt** - Python package requirements.
-- **Makefile** - Makefile to run common tasks.
+- Automatically fetch the newest CS research from arXiv.
+- Filter and rank articles tailored to your specific interests.
+- Summarize articles into a smooth, narrative-friendly format.
+- Generate an MP3 audio summary to listen on-the-go.
+- Provide real-time progress updates while generating your summaries.
 
-## Installation
+---
 
-1. **Prerequisites:**
-   - Python 3.x
-   - Install dependencies:
-     ```bash
-     pip install -r requirements.txt
-     ```
+## 🚀 Quick Start Guide
 
-2. **Clone the repository:**
-   ```bash
-   git clone <repository_url>
-   cd <repository_directory>
+### ✅ Step 1: Installation
 
-Running the Application
+Make sure you have Python (3.x) installed, then run:
 
-CLI Mode
+```bash
+pip install -r requirements.txt
+```
 
-To generate a summary MP3 using the CLI:
+### ✅ Step 2: Clone vibe to Your Machine
 
-python vibe/main.py --generate --prompt "Your interests and context here" --max-articles 5 --output summary.mp3
+```bash
+git clone <repository_url>
+cd vibe
+```
 
-Server Mode
+### 🛠 Running vibe
 
-To run the Flask server:
+You can use vibe in two ways: via a command-line interface (CLI) or through a friendly web interface with real-time updates.
 
+#### 1️⃣ CLI Mode
+
+To quickly generate an audio summary directly from your terminal:
+
+```bash
+python vibe/main.py --generate --prompt "Your interests here" --max-articles 5 --output summary.mp3
+```
+
+Your audio summary will be saved as `summary.mp3`. Just play and enjoy!
+
+#### 2️⃣ Server Mode (Recommended 🎉)
+
+We’ve built a simple, intuitive web landing page that lets you interact easily with vibe:
+
+First, launch the Flask server by running:
+
+```bash
 python vibe/main.py --serve
+```
 
-Then, you can make a POST request to http://127.0.0.1:5000/process with a JSON payload:
+Then open your web browser and go to:
 
-curl -X POST http://127.0.0.1:5000/process \
-     -H "Content-Type: application/json" \
-     -d '{"user_info": "Your interests here", "max_articles": 5, "new_only": false}'
+```
+http://127.0.0.1:5000
+```
 
-Running Tests
+#### ✨ How It Works:
 
-The project includes basic tests to verify that modules are working as expected. To run the tests, execute:
+- Enter your interests directly on the landing page.
+- Click “Submit” and relax while vibe fetches and summarizes the best articles for you.
+- Watch live status updates appear on-screen, letting you know exactly what’s happening behind the scenes.
+- Once complete, an audio summary (`summary.mp3`) will automatically download. It’s that easy!
 
+---
+
+## 🧪 Running Tests
+
+Ensure vibe stays reliable with the built-in test suite. Just run:
+
+```bash
 make test
+```
 
-or
+Or manually:
 
+```bash
 python -m unittest discover -s tests
+```
 
-Makefile Commands
-    •	make test - Run the unit tests.
-    •	make run - Run the application in CLI mode (you can modify the command inside the Makefile).
-    •	make serve - Run the Flask server.
-    •	make clean - Clean up temporary files (e.g., remove the cache directory).
+---
 
-Environment Variables
+## ⚙️ Makefile Commands
 
-The following environment variables can be set to customize the behavior:
-    •	ARXIV_URL
-    •	LLM_URL
-    •	MODEL_NAME
+We’ve made common tasks simpler:
 
-License
+- `make test` – Runs unit tests.
+- `make run` – Runs vibe in CLI mode (you can customize this command inside the Makefile).
+- `make serve` – Starts the Flask server with the web interface.
+- `make clean` – Cleans temporary files (cache, temporary directories).
 
-This project is licensed under the MIT License.
\ No newline at end of file
+---
+
+## 🌎 Environment Variables
+
+Customize vibe with these optional environment variables:
+
+- `ARXIV_URL` – URL for fetching articles from arXiv.
+- `LLM_URL` – URL of your preferred language model endpoint.
+- `MODEL_NAME` – Name of the language model to use.
+
+---
+
+## 📜 License
+
+vibe is open source under the MIT License. Use it, modify it, enjoy it!
+
+---
+
+✨ Enjoy exploring the latest research effortlessly with vibe! ✨
\ No newline at end of file

From 1d90930e293483a8176c1a125ce6e1659e7d3304 Mon Sep 17 00:00:00 2001
From: Regis David Souza Mesquita <github@rdsm.dev>
Date: Sun, 2 Mar 2025 12:47:03 +0000
Subject: [PATCH 5/6] More features

---
 Dockerfile           | 16 ++++++--------
 README.md            | 33 ++++++++++++++++++++++++++++
 requirements.txt     |  4 +++-
 vibe/config.py       |  4 ++--
 vibe/filter.py       | 52 +++++++++++++++++---------------------------
 vibe/llm.py          | 38 ++++++++++++++++++++++++++++++++
 vibe/llm_config.toml |  8 +++++++
 vibe/main.py         | 24 +++++++++++++-------
 vibe/orchestrator.py | 25 ++++++++++++++-------
 vibe/rerank.py       | 23 ++++++--------------
 vibe/server.py       | 11 ++++++++--
 vibe/summarizer.py   | 24 +++++---------------
 12 files changed, 165 insertions(+), 97 deletions(-)
 create mode 100644 vibe/llm.py
 create mode 100644 vibe/llm_config.toml

diff --git a/Dockerfile b/Dockerfile
index 4ac088b..f6f83da 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,27 +1,25 @@
 # Use an official Python runtime as a parent image
-FROM python:3.9-slim
+FROM python:3.12-slim
 
 # Install system dependencies including ffmpeg
 RUN apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg \
+    espeak-ng \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 
-# Set environment variables for litellm API key and model (users can override these)
-ENV LITELLM_API_KEY=""
-ENV MODEL_NAME="mistral-small-latest"
-
 # Set working directory
 WORKDIR /app
 
+COPY ./requirements.txt ./requirements.txt
+
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
 # Copy the current directory contents into the container at /app
 COPY . .
 
-# Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
-
 # Expose port 5000 for the Flask server
 EXPOSE 5000
 
 # Command to run the Flask server
-CMD ["python", "vibe/main.py", "--serve"]
\ No newline at end of file
+CMD ["python", "-m", "vibe.main", "--serve"]
diff --git a/README.md b/README.md
index dce9f95..efcbc21 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,39 @@ http://127.0.0.1:5000
 
 ---
 
+## 🗄 API Documentation
+
+### Available Endpoints
+
+#### 1. `/process` (POST)
+
+**Description:** Generates a summary MP3 from provided user interests.
+
+**Request Body:**
+
+```json
+{
+  "user_info": "Your interests here",
+  "max_articles": 5,  // Number of articles to process
+  "new_only": true    // Fetch only new articles not in the cache
+}
+```
+
+**Response:**
+
+- **Success:** Returns a 200 status code with a generated MP3 file.
+- **Error:** Returns a 500 status code with an error message.
+
+**Example:**
+
+```bash
+curl -X POST http://localhost:5000/process \
+  -H 'Content-Type: application/json' \
+  -d '{"user_info": "AI, Machine Learning", "max_articles": 5, "new_only": true}'
+```
+
+---
+
 ## 🧪 Running Tests
 
 Ensure vibe stays reliable with the built-in test suite. Just run:
diff --git a/requirements.txt b/requirements.txt
index 03fc57c..3253d55 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,6 @@ beautifulsoup4
 soundfile
 docling
 kokoro
-Flask-SocketIO
\ No newline at end of file
+Flask-SocketIO
+tomli
+litellm
diff --git a/vibe/config.py b/vibe/config.py
index 0f13110..e546039 100644
--- a/vibe/config.py
+++ b/vibe/config.py
@@ -19,5 +19,5 @@ if not os.path.exists(ARTICLES_CACHE_DIR):
     logger.debug("Created articles cache directory: %s", ARTICLES_CACHE_DIR)
 
 DEFAULT_ARXIV_URL = os.environ.get("ARXIV_URL", "https://arxiv.org/list/cs/new")
-DEFAULT_LLM_URL = os.environ.get("LLM_URL", "http://127.0.0.1:4000/v1/chat/completions")
-DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
\ No newline at end of file
+DEFAULT_LLM_URL = os.environ.get("LLM_URL", "https://api.mistral.ai/v1/chat/completions")
+DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
diff --git a/vibe/filter.py b/vibe/filter.py
index 59fe46f..839a3f6 100644
--- a/vibe/filter.py
+++ b/vibe/filter.py
@@ -1,22 +1,18 @@
 import json
 import re
-import requests
 import logging
 import concurrent.futures
 
+from .llm import chat_llm
+
 logger = logging.getLogger(__name__)
 
-def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, model_name=None):
+def batch_relevance_filter(articles, user_info, batch_size=50, llm_level="medium"):
     """
     Sends articles to the LLM in batches to check their relevance.
     Expects a JSON response mapping article IDs to "yes" or "no".
-    This version parallelizes the batched requests.
+    This version parallelizes the batched requests using chat_llm.
     """
-    if llm_url is None or model_name is None:
-        from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
-        llm_url = llm_url or DEFAULT_LLM_URL
-        model_name = model_name or DEFAULT_MODEL_NAME
-
     relevant_article_ids = set()
     logger.info("Starting batched relevance check for %d articles.", len(articles))
 
@@ -24,40 +20,32 @@ def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, mod
         local_relevant_ids = set()
         prompt_lines = [f"User info: {user_info}\n"]
         prompt_lines.append(
-            "For each of the following articles, determine if it is relevant to the user. Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. Do not add any extra text; the response must start with a '{'."
+            "For each of the following articles, determine if it is relevant to the user. "
+            "Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. "
+            "Do not add extra text; the response must start with '{'."
         )
         for article in batch:
             prompt_lines.append(
                 f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
             )
         prompt = "\n".join(prompt_lines)
-        payload = {
-            "model": model_name,
-            "messages": [{"role": "user", "content": prompt}],
-        }
+
         try:
-            response = requests.post(llm_url, json=payload)
-            if response.status_code != 200:
-                logger.error("LLM batched relevance check failed for batch starting with article '%s' with status code: %d", batch[0]["id"], response.status_code)
+            response_text = chat_llm(prompt, level=llm_level)
+            match = re.search(r"\{.*\}", response_text, re.DOTALL)
+            if not match:
+                logger.error("No valid JSON object found in LLM response for relevance filter.")
                 return local_relevant_ids
-            data = response.json()
-            text_response = data["choices"][0]["message"]["content"].strip()
-            try:
-                match = re.search(r"\{.*\}", text_response, re.DOTALL)
-                if not match:
-                    raise ValueError("No valid JSON object found in response")
-                json_str = match.group(0)
-                logger.debug("Batch response: %s", json_str[:200])
-                result = json.loads(json_str)
-                for article_id, verdict in result.items():
-                    if isinstance(verdict, str) and verdict.lower().strip() == "yes":
-                        local_relevant_ids.add(article_id)
-            except Exception as e:
-                logger.exception("Failed to parse JSON from LLM response: %s", e)
-            return local_relevant_ids
+            json_str = match.group(0)
+            logger.debug("Batch response: %s", json_str[:200])
+            result = json.loads(json_str)
+            for article_id, verdict in result.items():
+                if isinstance(verdict, str) and verdict.lower().strip() == "yes":
+                    local_relevant_ids.add(article_id)
         except Exception as e:
             logger.exception("Error during batched relevance check: %s", e)
-            return local_relevant_ids
+
+        return local_relevant_ids
 
     batches = [articles[i: i + batch_size] for i in range(0, len(articles), batch_size)]
     with concurrent.futures.ThreadPoolExecutor() as executor:
diff --git a/vibe/llm.py b/vibe/llm.py
new file mode 100644
index 0000000..ec55c56
--- /dev/null
+++ b/vibe/llm.py
@@ -0,0 +1,38 @@
+import os
+import logging
+import litellm
+import tomli
+
+logger = logging.getLogger(__name__)
+CONFIG_PATH = os.path.join(os.path.dirname(__file__), "llm_config.toml")
+
+try:
+    with open(CONFIG_PATH, "rb") as f:
+        _CONFIG = tomli.load(f)
+except FileNotFoundError:
+    logger.warning("LLM config file llm_config.toml not found. Using default settings.")
+    exit(-1)
+
+
+def chat_llm(prompt: str, level: str = "medium") -> str:
+    """
+    Sends 'prompt' to the LLM defined by the 'level' block in llm_config.toml.
+    Returns the LLM's text output.
+    """
+    llm_settings = _CONFIG["llms"].get(level, {})
+    api_key = llm_settings.get("api_key", os.environ.get("MISTRAL_API_KEY"))
+    api_base = llm_settings.get("api_base", "https://api.mistral.ai")
+    model = llm_settings.get("model", "mistral/mistral-small-latest")
+
+    try:
+        # Using the litellm library to call the chat endpoint
+        response = litellm.completion(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            api_base=api_base,
+            api_key=api_key,
+        )
+        return response["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        logger.exception("Error calling LLM: %s", e)
+        return ""
diff --git a/vibe/llm_config.toml b/vibe/llm_config.toml
new file mode 100644
index 0000000..6761266
--- /dev/null
+++ b/vibe/llm_config.toml
@@ -0,0 +1,8 @@
+[llms.low]
+model = "mistral/mistral-small-latest"
+
+[llms.medium]
+model = "mistral/mistral-small-latest"
+
+[llms.high]
+model = "mistral/mistral-small-latest"
diff --git a/vibe/main.py b/vibe/main.py
index ee70764..2210a36 100644
--- a/vibe/main.py
+++ b/vibe/main.py
@@ -2,7 +2,7 @@ import argparse
 import logging
 from vibe.orchestrator import process_articles
 from vibe.tts import text_to_speech
-from vibe.config import DEFAULT_ARXIV_URL, DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+from vibe.config import DEFAULT_ARXIV_URL
 
 logging.basicConfig(
     level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -17,20 +17,28 @@ def main():
     parser.add_argument("--max-articles", type=int, default=5, help="Maximum articles to process in the pipeline.")
     parser.add_argument("--new-only", action="store_true", help="Only process articles newer than cached.")
     parser.add_argument("--arxiv-url", type=str, default=DEFAULT_ARXIV_URL, help="URL for fetching arXiv articles.")
-    parser.add_argument("--llm-url", type=str, default=DEFAULT_LLM_URL, help="URL of the LLM endpoint.")
-    parser.add_argument("--model-name", type=str, default=DEFAULT_MODEL_NAME, help="Name of model to pass to the LLM endpoint.")
     parser.add_argument("--output", type=str, default="final_output.mp3", help="Output path for the generated MP3 file.")
-    
+
+    # New: LLM Level
+    parser.add_argument("--llm-level", type=str, default="medium", choices=["low","medium","high"],
+                        help="Desired LLM quality level: low, medium, or high. Defaults to medium.")
+
     args = parser.parse_args()
 
     if args.serve:
         from vibe.server import app
         logger.info("Starting Flask server.")
-        app.run(debug=True)
+        app.run(host='0.0.0.0', port='14200', debug=True)
     elif args.generate:
         logger.info("Running pipeline in CLI mode.")
         user_info = args.prompt
-        final_summary = process_articles(user_info, arxiv_url=args.arxiv_url, llm_url=args.llm_url, model_name=args.model_name, max_articles=args.max_articles, new_only=args.new_only)
+        final_summary = process_articles(
+            user_info,
+            arxiv_url=args.arxiv_url,
+            max_articles=args.max_articles,
+            new_only=args.new_only,
+            llm_level=args.llm_level
+        )
         if not final_summary.strip():
             logger.error("No summaries generated.")
             exit(1)
@@ -43,7 +51,7 @@ def main():
     else:
         logger.info("No mode specified; defaulting to Flask server.")
         from vibe.server import app
-        app.run(debug=True)
+        app.run(host='0.0.0.0', port='14200', debug=True)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/vibe/orchestrator.py b/vibe/orchestrator.py
index 9272cc4..c513a1c 100644
--- a/vibe/orchestrator.py
+++ b/vibe/orchestrator.py
@@ -9,11 +9,17 @@ from .filter import batch_relevance_filter
 from .rerank import rerank_articles
 from .converter import fetch_and_convert_article
 from .summarizer import generate_article_summary
-from .tts import text_to_speech
 
 logger = logging.getLogger(__name__)
 
-def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
+def process_articles(
+    user_info,
+    arxiv_url=None,
+    max_articles=5,
+    new_only=False,
+    trace_callback=None,
+    llm_level="medium"
+):
     """
     Executes the full pipeline:
       1. Fetch arXiv articles.
@@ -42,7 +48,10 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
                 parts = id_str.split(".")
                 return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
             most_recent = max(cached_articles, key=parse_id)
-            articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
+            articles = [
+                article for article in articles
+                if parse_id(article["id"]) > parse_id(most_recent)
+            ]
             if trace_callback:
                 trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
         else:
@@ -51,14 +60,14 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
 
     if trace_callback:
         trace_callback("Performing relevance filtering via LLM...")
-    relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
+    relevant_ids = batch_relevance_filter(articles, user_info, llm_level=llm_level)
     relevant_articles = [article for article in articles if article["id"] in relevant_ids]
     if trace_callback:
         trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")
 
     if trace_callback:
         trace_callback("Reranking articles based on relevance...")
-    reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
+    reranked_articles = rerank_articles(relevant_articles, user_info, llm_level=llm_level)
     final_candidates = reranked_articles[:max_articles]
 
     if trace_callback:
@@ -80,7 +89,7 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
     summaries = []
     with concurrent.futures.ThreadPoolExecutor() as executor:
         future_to_article = {
-            executor.submit(generate_article_summary, article, content, user_info, llm_url, model_name): article
+            executor.submit(generate_article_summary, article, content, user_info, llm_level): article
             for article, content in articles_with_content
         }
         for future in concurrent.futures.as_completed(future_to_article):
@@ -103,6 +112,6 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
     final_summary = "\n\n".join(summaries)
     final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
     if trace_callback:
-        trace_callback("Final summary generated. Starting TTS conversion.")
+        trace_callback("Final summary generated.")
     logger.info("Final summary generated with length %d characters.", len(final_summary))
-    return final_summary
+    return final_summary
\ No newline at end of file
diff --git a/vibe/rerank.py b/vibe/rerank.py
index aa398e8..5fc1f80 100644
--- a/vibe/rerank.py
+++ b/vibe/rerank.py
@@ -1,11 +1,12 @@
 import json
 import re
-import requests
 import logging
 
+from .llm import chat_llm
+
 logger = logging.getLogger(__name__)
 
-def rerank_articles(articles, user_info, llm_url=None, model_name=None):
+def rerank_articles(articles, user_info, llm_level="medium"):
     """
     Calls the LLM to reorder the articles by importance. Returns the reordered list.
     Expects a JSON response with a 'ranking' key pointing to a list of article IDs.
@@ -13,31 +14,21 @@ def rerank_articles(articles, user_info, llm_url=None, model_name=None):
     if not articles:
         return []
 
-    if llm_url is None or model_name is None:
-        from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
-        llm_url = llm_url or DEFAULT_LLM_URL
-        model_name = model_name or DEFAULT_MODEL_NAME
-
     logger.info("Starting rerank for %d articles.", len(articles))
     prompt_lines = [
         f"User info: {user_info}\n",
-        'Please rank the following articles from most relevant to least relevant. Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.',
+        ('Please rank the following articles from most relevant to least relevant. '
+         'Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.')
     ]
     for article in articles:
         prompt_lines.append(
             f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
         )
     prompt = "\n".join(prompt_lines)
-    payload = {"model": model_name, "messages": [{"role": "user", "content": prompt}]}
 
     try:
-        response = requests.post(llm_url, json=payload)
-        if response.status_code != 200:
-            logger.error("LLM reranking request failed with status code: %d", response.status_code)
-            return articles
-        data = response.json()
-        text_response = data["choices"][0]["message"]["content"].strip()
-        match = re.search(r"\{.*\}", text_response, re.DOTALL)
+        response_text = chat_llm(prompt, level=llm_level)
+        match = re.search(r"\{.*\}", response_text, re.DOTALL)
         if not match:
             logger.error("No valid JSON found in rerank response.")
             return articles
diff --git a/vibe/server.py b/vibe/server.py
index 04f54b2..ec7eaa0 100644
--- a/vibe/server.py
+++ b/vibe/server.py
@@ -2,7 +2,6 @@ from flask import Flask, send_file, request, jsonify, render_template
 import logging
 from vibe.orchestrator import process_articles
 from vibe.config import CACHE_DIR
-
 from flask_socketio import SocketIO, emit
 
 logger = logging.getLogger(__name__)
@@ -24,7 +23,15 @@ def process_endpoint():
     # Define trace_callback to emit trace messages via WebSockets
     def trace_callback(message):
         socketio.emit("trace", {"message": message})
-    final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
+
+    final_summary = process_articles(
+        user_info,
+        arxiv_url=None,
+        max_articles=max_articles,
+        new_only=new_only,
+        trace_callback=trace_callback,
+        llm_level="medium"  # hard-coded here; could be user-configurable
+    )
     if not final_summary.strip():
         logger.error("No summaries generated.")
         return jsonify({"error": "No summaries generated."}), 500
diff --git a/vibe/summarizer.py b/vibe/summarizer.py
index 60e3075..b582380 100644
--- a/vibe/summarizer.py
+++ b/vibe/summarizer.py
@@ -1,38 +1,24 @@
-import requests
 import logging
-from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+from .llm import chat_llm
 
 logger = logging.getLogger(__name__)
 
-def generate_article_summary(article, content, user_info, llm_url=None, model_name=None):
+def generate_article_summary(article, content, user_info, llm_level="medium"):
     """
     Generates a fluid, narrative summary for the article using the LLM.
     The summary starts with a connecting phrase.
     """
-    if llm_url is None or model_name is None:
-        llm_url = DEFAULT_LLM_URL
-        model_name = DEFAULT_MODEL_NAME
-
     prompt = (
         f"User info: {user_info}\n\n"
         f"Please summarize the following article titled '{article['title']}' in a fluid narrative prose style without lists or visual cues. "
         f"Begin the summary with a connecting segment like 'And now, Article: {article['title']}'.\n\n"
         f"Article Content:\n{content}"
     )
-    payload = {
-        "model": model_name,
-        "messages": [{"role": "user", "content": prompt}],
-    }
+
     logger.info("Generating summary for article '%s'.", article["id"])
     try:
-        response = requests.post(llm_url, json=payload)
-        if response.status_code != 200:
-            logger.error("LLM summarization failed for article '%s'. Status code: %d", article["id"], response.status_code)
-            return ""
-        data = response.json()
-        summary = data["choices"][0]["message"]["content"].strip()
-        logger.debug("Summary for article '%s': %s", article["id"], summary[:100])
-        return summary
+        response_text = chat_llm(prompt, level=llm_level)
+        return response_text
     except Exception as e:
         logger.exception("Error summarizing article '%s': %s", article["id"], e)
         return ""
\ No newline at end of file

From ef993f8693bd4f6803278342f616e8575f417877 Mon Sep 17 00:00:00 2001
From: Regis David Souza Mesquita <github@rdsm.dev>
Date: Sun, 2 Mar 2025 15:47:53 +0000
Subject: [PATCH 6/6] Test

---
 vibe/converter.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/vibe/converter.py b/vibe/converter.py
index 856b5c3..0d87de3 100644
--- a/vibe/converter.py
+++ b/vibe/converter.py
@@ -12,8 +12,12 @@ from .config import ARTICLES_CACHE_DIR
 
 logger = logging.getLogger(__name__)
 
-pdf_options = PdfFormatOption(pipeline_options=PdfPipelineOptions(generate_picture_images=True))
+pipeline_options = PdfPipelineOptions()
+pipeline_options.ocr_options.use_gpu = False
+pipeline_options.generate_picture_images = False
+pdf_options = PdfFormatOption(pipeline_options=pipeline_options)
 doc_converter = DocumentConverter(format_options={InputFormat.PDF: pdf_options})
+doc_converter = DocumentConverter()
 
 def fetch_and_convert_article(article):
     """
@@ -51,10 +55,13 @@ def fetch_and_convert_article(article):
             f.write(converted_text)
         logger.info("Conversion successful for article '%s'. Cached output.", article["id"])
         return converted_text
+    except SystemExit as se:
+        logger.exception("Docling conversion exited with error code %s for article '%s'. Skipping conversion.", se.code, article["id"])
+        return ""
     except Exception as e:
         logger.exception("Conversion failed for article '%s': %s", article["id"], e)
         return ""
     finally:
         if os.path.exists(tmp_pdf_path):
             os.unlink(tmp_pdf_path)
-            logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)
\ No newline at end of file
+            logger.debug("Temporary PDF file %s removed.", tmp_pdf_path)