More features

2025-03-02 12:47:03 +00:00 · 2025-03-02 12:47:03 +00:00 · 1d90930e29
commit 1d90930e29
parent 281fd03aa8
12 changed files with 165 additions and 97 deletions
--- a/16
+++ b/16
@ -1,27 +1,25 @@
 # Use an official Python runtime as a parent image
-FROM python:3.9-slim
+FROM python:3.12-slim

 # Install system dependencies including ffmpeg
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
+    espeak-ng \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

-# Set environment variables for litellm API key and model (users can override these)
-ENV LITELLM_API_KEY=""
-ENV MODEL_NAME="mistral-small-latest"
-
 # Set working directory
 WORKDIR /app

+COPY ./requirements.txt ./requirements.txt
+
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
 # Copy the current directory contents into the container at /app
 COPY . .

-# Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
-
 # Expose port 5000 for the Flask server
 EXPOSE 5000

 # Command to run the Flask server
-CMD ["python", "vibe/main.py", "--serve"]
+CMD ["python", "-m", "vibe.main", "--serve"]
--- a/README.md
+++ b/README.md
@ -70,6 +70,39 @@ http://127.0.0.1:5000

 ---

+## 🗄 API Documentation
+
+### Available Endpoints
+
+#### 1. `/process` (POST)
+
+**Description:** Generates a summary MP3 from provided user interests.
+
+**Request Body:**
+
+```json
+{
+  "user_info": "Your interests here",
+  "max_articles": 5,  // Number of articles to process
+  "new_only": true    // Fetch only new articles not in the cache
+}
+```
+
+**Response:**
+
+- **Success:** Returns a 200 status code with a generated MP3 file.
+- **Error:** Returns a 500 status code with an error message.
+
+**Example:**
+
+```bash
+curl -X POST http://localhost:5000/process \
+  -H 'Content-Type: application/json' \
+  -d '{"user_info": "AI, Machine Learning", "max_articles": 5, "new_only": true}'
+```
+
+---
+
 ## 🧪 Running Tests

 Ensure vibe stays reliable with the built-in test suite. Just run:
--- a/requirements.txt
+++ b/requirements.txt
@ -4,4 +4,6 @@ beautifulsoup4
 soundfile
 docling
 kokoro
-Flask-SocketIO
+Flask-SocketIO
+tomli
+litellm
--- a/vibe/config.py
+++ b/vibe/config.py
@ -19,5 +19,5 @@ if not os.path.exists(ARTICLES_CACHE_DIR):
    logger.debug("Created articles cache directory: %s", ARTICLES_CACHE_DIR)

 DEFAULT_ARXIV_URL = os.environ.get("ARXIV_URL", "https://arxiv.org/list/cs/new")
-DEFAULT_LLM_URL = os.environ.get("LLM_URL", "http://127.0.0.1:4000/v1/chat/completions")
-DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
+DEFAULT_LLM_URL = os.environ.get("LLM_URL", "https://api.mistral.ai/v1/chat/completions")
+DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
--- a/vibe/filter.py
+++ b/vibe/filter.py
@ -1,22 +1,18 @@
 import json
 import re
-import requests
 import logging
 import concurrent.futures

+from .llm import chat_llm
+
 logger = logging.getLogger(__name__)

-def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, model_name=None):
+def batch_relevance_filter(articles, user_info, batch_size=50, llm_level="medium"):
    """
    Sends articles to the LLM in batches to check their relevance.
    Expects a JSON response mapping article IDs to "yes" or "no".
-    This version parallelizes the batched requests.
+    This version parallelizes the batched requests using chat_llm.
    """
-    if llm_url is None or model_name is None:
-        from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
-        llm_url = llm_url or DEFAULT_LLM_URL
-        model_name = model_name or DEFAULT_MODEL_NAME
-
    relevant_article_ids = set()
    logger.info("Starting batched relevance check for %d articles.", len(articles))

@ -24,40 +20,32 @@ def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, mod
        local_relevant_ids = set()
        prompt_lines = [f"User info: {user_info}\n"]
        prompt_lines.append(
-            "For each of the following articles, determine if it is relevant to the user. Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. Do not add any extra text; the response must start with a '{'."
+            "For each of the following articles, determine if it is relevant to the user. "
+            "Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. "
+            "Do not add extra text; the response must start with '{'."
        )
        for article in batch:
            prompt_lines.append(
                f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
            )
        prompt = "\n".join(prompt_lines)
-        payload = {
-            "model": model_name,
-            "messages": [{"role": "user", "content": prompt}],
-        }
+
        try:
-            response = requests.post(llm_url, json=payload)
-            if response.status_code != 200:
-                logger.error("LLM batched relevance check failed for batch starting with article '%s' with status code: %d", batch[0]["id"], response.status_code)
+            response_text = chat_llm(prompt, level=llm_level)
+            match = re.search(r"\{.*\}", response_text, re.DOTALL)
+            if not match:
+                logger.error("No valid JSON object found in LLM response for relevance filter.")
                return local_relevant_ids
-            data = response.json()
-            text_response = data["choices"][0]["message"]["content"].strip()
-            try:
-                match = re.search(r"\{.*\}", text_response, re.DOTALL)
-                if not match:
-                    raise ValueError("No valid JSON object found in response")
-                json_str = match.group(0)
-                logger.debug("Batch response: %s", json_str[:200])
-                result = json.loads(json_str)
-                for article_id, verdict in result.items():
-                    if isinstance(verdict, str) and verdict.lower().strip() == "yes":
-                        local_relevant_ids.add(article_id)
-            except Exception as e:
-                logger.exception("Failed to parse JSON from LLM response: %s", e)
-            return local_relevant_ids
+            json_str = match.group(0)
+            logger.debug("Batch response: %s", json_str[:200])
+            result = json.loads(json_str)
+            for article_id, verdict in result.items():
+                if isinstance(verdict, str) and verdict.lower().strip() == "yes":
+                    local_relevant_ids.add(article_id)
        except Exception as e:
            logger.exception("Error during batched relevance check: %s", e)
-            return local_relevant_ids
+
+        return local_relevant_ids

    batches = [articles[i: i + batch_size] for i in range(0, len(articles), batch_size)]
    with concurrent.futures.ThreadPoolExecutor() as executor:
--- a/vibe/llm.py
+++ b/vibe/llm.py
@ -0,0 +1,38 @@
+import os
+import logging
+import litellm
+import tomli
+
+logger = logging.getLogger(__name__)
+CONFIG_PATH = os.path.join(os.path.dirname(__file__), "llm_config.toml")
+
+try:
+    with open(CONFIG_PATH, "rb") as f:
+        _CONFIG = tomli.load(f)
+except FileNotFoundError:
+    logger.warning("LLM config file llm_config.toml not found. Using default settings.")
+    exit(-1)
+
+
+def chat_llm(prompt: str, level: str = "medium") -> str:
+    """
+    Sends 'prompt' to the LLM defined by the 'level' block in llm_config.toml.
+    Returns the LLM's text output.
+    """
+    llm_settings = _CONFIG["llms"].get(level, {})
+    api_key = llm_settings.get("api_key", os.environ.get("MISTRAL_API_KEY"))
+    api_base = llm_settings.get("api_base", "https://api.mistral.ai")
+    model = llm_settings.get("model", "mistral/mistral-small-latest")
+
+    try:
+        # Using the litellm library to call the chat endpoint
+        response = litellm.completion(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            api_base=api_base,
+            api_key=api_key,
+        )
+        return response["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        logger.exception("Error calling LLM: %s", e)
+        return ""
--- a/vibe/llm_config.toml
+++ b/vibe/llm_config.toml
@ -0,0 +1,8 @@
+[llms.low]
+model = "mistral/mistral-small-latest"
+
+[llms.medium]
+model = "mistral/mistral-small-latest"
+
+[llms.high]
+model = "mistral/mistral-small-latest"
--- a/vibe/main.py
+++ b/vibe/main.py
@ -2,7 +2,7 @@ import argparse
 import logging
 from vibe.orchestrator import process_articles
 from vibe.tts import text_to_speech
-from vibe.config import DEFAULT_ARXIV_URL, DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+from vibe.config import DEFAULT_ARXIV_URL

 logging.basicConfig(
    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
@ -17,20 +17,28 @@ def main():
    parser.add_argument("--max-articles", type=int, default=5, help="Maximum articles to process in the pipeline.")
    parser.add_argument("--new-only", action="store_true", help="Only process articles newer than cached.")
    parser.add_argument("--arxiv-url", type=str, default=DEFAULT_ARXIV_URL, help="URL for fetching arXiv articles.")
-    parser.add_argument("--llm-url", type=str, default=DEFAULT_LLM_URL, help="URL of the LLM endpoint.")
-    parser.add_argument("--model-name", type=str, default=DEFAULT_MODEL_NAME, help="Name of model to pass to the LLM endpoint.")
    parser.add_argument("--output", type=str, default="final_output.mp3", help="Output path for the generated MP3 file.")
-    
+
+    # New: LLM Level
+    parser.add_argument("--llm-level", type=str, default="medium", choices=["low","medium","high"],
+                        help="Desired LLM quality level: low, medium, or high. Defaults to medium.")
+
    args = parser.parse_args()

    if args.serve:
        from vibe.server import app
        logger.info("Starting Flask server.")
-        app.run(debug=True)
+        app.run(host='0.0.0.0', port='14200', debug=True)
    elif args.generate:
        logger.info("Running pipeline in CLI mode.")
        user_info = args.prompt
-        final_summary = process_articles(user_info, arxiv_url=args.arxiv_url, llm_url=args.llm_url, model_name=args.model_name, max_articles=args.max_articles, new_only=args.new_only)
+        final_summary = process_articles(
+            user_info,
+            arxiv_url=args.arxiv_url,
+            max_articles=args.max_articles,
+            new_only=args.new_only,
+            llm_level=args.llm_level
+        )
        if not final_summary.strip():
            logger.error("No summaries generated.")
            exit(1)
@ -43,7 +51,7 @@ def main():
    else:
        logger.info("No mode specified; defaulting to Flask server.")
        from vibe.server import app
-        app.run(debug=True)
+        app.run(host='0.0.0.0', port='14200', debug=True)

 if __name__ == "__main__":
-    main()
+    main()
--- a/vibe/orchestrator.py
+++ b/vibe/orchestrator.py
@ -9,11 +9,17 @@ from .filter import batch_relevance_filter
 from .rerank import rerank_articles
 from .converter import fetch_and_convert_article
 from .summarizer import generate_article_summary
-from .tts import text_to_speech

 logger = logging.getLogger(__name__)

-def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
+def process_articles(
+    user_info,
+    arxiv_url=None,
+    max_articles=5,
+    new_only=False,
+    trace_callback=None,
+    llm_level="medium"
+):
    """
    Executes the full pipeline:
      1. Fetch arXiv articles.
@ -42,7 +48,10 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
                parts = id_str.split(".")
                return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
            most_recent = max(cached_articles, key=parse_id)
-            articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
+            articles = [
+                article for article in articles
+                if parse_id(article["id"]) > parse_id(most_recent)
+            ]
            if trace_callback:
                trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
        else:
@ -51,14 +60,14 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m

    if trace_callback:
        trace_callback("Performing relevance filtering via LLM...")
-    relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
+    relevant_ids = batch_relevance_filter(articles, user_info, llm_level=llm_level)
    relevant_articles = [article for article in articles if article["id"] in relevant_ids]
    if trace_callback:
        trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")

    if trace_callback:
        trace_callback("Reranking articles based on relevance...")
-    reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
+    reranked_articles = rerank_articles(relevant_articles, user_info, llm_level=llm_level)
    final_candidates = reranked_articles[:max_articles]

    if trace_callback:
@ -80,7 +89,7 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
    summaries = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_article = {
-            executor.submit(generate_article_summary, article, content, user_info, llm_url, model_name): article
+            executor.submit(generate_article_summary, article, content, user_info, llm_level): article
            for article, content in articles_with_content
        }
        for future in concurrent.futures.as_completed(future_to_article):
@ -103,6 +112,6 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
    final_summary = "\n\n".join(summaries)
    final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
    if trace_callback:
-        trace_callback("Final summary generated. Starting TTS conversion.")
+        trace_callback("Final summary generated.")
    logger.info("Final summary generated with length %d characters.", len(final_summary))
-    return final_summary
+    return final_summary
--- a/vibe/rerank.py
+++ b/vibe/rerank.py
@ -1,11 +1,12 @@
 import json
 import re
-import requests
 import logging

+from .llm import chat_llm
+
 logger = logging.getLogger(__name__)

-def rerank_articles(articles, user_info, llm_url=None, model_name=None):
+def rerank_articles(articles, user_info, llm_level="medium"):
    """
    Calls the LLM to reorder the articles by importance. Returns the reordered list.
    Expects a JSON response with a 'ranking' key pointing to a list of article IDs.
@ -13,31 +14,21 @@ def rerank_articles(articles, user_info, llm_url=None, model_name=None):
    if not articles:
        return []

-    if llm_url is None or model_name is None:
-        from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
-        llm_url = llm_url or DEFAULT_LLM_URL
-        model_name = model_name or DEFAULT_MODEL_NAME
-
    logger.info("Starting rerank for %d articles.", len(articles))
    prompt_lines = [
        f"User info: {user_info}\n",
-        'Please rank the following articles from most relevant to least relevant. Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.',
+        ('Please rank the following articles from most relevant to least relevant. '
+         'Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.')
    ]
    for article in articles:
        prompt_lines.append(
            f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
        )
    prompt = "\n".join(prompt_lines)
-    payload = {"model": model_name, "messages": [{"role": "user", "content": prompt}]}

    try:
-        response = requests.post(llm_url, json=payload)
-        if response.status_code != 200:
-            logger.error("LLM reranking request failed with status code: %d", response.status_code)
-            return articles
-        data = response.json()
-        text_response = data["choices"][0]["message"]["content"].strip()
-        match = re.search(r"\{.*\}", text_response, re.DOTALL)
+        response_text = chat_llm(prompt, level=llm_level)
+        match = re.search(r"\{.*\}", response_text, re.DOTALL)
        if not match:
            logger.error("No valid JSON found in rerank response.")
            return articles
--- a/vibe/server.py
+++ b/vibe/server.py
@ -2,7 +2,6 @@ from flask import Flask, send_file, request, jsonify, render_template
 import logging
 from vibe.orchestrator import process_articles
 from vibe.config import CACHE_DIR
-
 from flask_socketio import SocketIO, emit

 logger = logging.getLogger(__name__)
@ -24,7 +23,15 @@ def process_endpoint():
    # Define trace_callback to emit trace messages via WebSockets
    def trace_callback(message):
        socketio.emit("trace", {"message": message})
-    final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
+
+    final_summary = process_articles(
+        user_info,
+        arxiv_url=None,
+        max_articles=max_articles,
+        new_only=new_only,
+        trace_callback=trace_callback,
+        llm_level="medium"  # hard-coded here; could be user-configurable
+    )
    if not final_summary.strip():
        logger.error("No summaries generated.")
        return jsonify({"error": "No summaries generated."}), 500
--- a/vibe/summarizer.py
+++ b/vibe/summarizer.py
@ -1,38 +1,24 @@
-import requests
 import logging
-from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
+from .llm import chat_llm

 logger = logging.getLogger(__name__)

-def generate_article_summary(article, content, user_info, llm_url=None, model_name=None):
+def generate_article_summary(article, content, user_info, llm_level="medium"):
    """
    Generates a fluid, narrative summary for the article using the LLM.
    The summary starts with a connecting phrase.
    """
-    if llm_url is None or model_name is None:
-        llm_url = DEFAULT_LLM_URL
-        model_name = DEFAULT_MODEL_NAME
-
    prompt = (
        f"User info: {user_info}\n\n"
        f"Please summarize the following article titled '{article['title']}' in a fluid narrative prose style without lists or visual cues. "
        f"Begin the summary with a connecting segment like 'And now, Article: {article['title']}'.\n\n"
        f"Article Content:\n{content}"
    )
-    payload = {
-        "model": model_name,
-        "messages": [{"role": "user", "content": prompt}],
-    }
+
    logger.info("Generating summary for article '%s'.", article["id"])
    try:
-        response = requests.post(llm_url, json=payload)
-        if response.status_code != 200:
-            logger.error("LLM summarization failed for article '%s'. Status code: %d", article["id"], response.status_code)
-            return ""
-        data = response.json()
-        summary = data["choices"][0]["message"]["content"].strip()
-        logger.debug("Summary for article '%s': %s", article["id"], summary[:100])
-        return summary
+        response_text = chat_llm(prompt, level=llm_level)
+        return response_text
    except Exception as e:
        logger.exception("Error summarizing article '%s': %s", article["id"], e)
        return ""