More features

This commit is contained in:
Regis David Souza Mesquita 2025-03-02 12:47:03 +00:00
parent 281fd03aa8
commit 1d90930e29
12 changed files with 165 additions and 97 deletions

View file

@ -1,27 +1,25 @@
# Use an official Python runtime as a parent image
FROM python:3.9-slim
FROM python:3.12-slim
# Install system dependencies including ffmpeg
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
espeak-ng \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Set environment variables for litellm API key and model (users can override these)
ENV LITELLM_API_KEY=""
ENV MODEL_NAME="mistral-small-latest"
# Set working directory
WORKDIR /app
COPY ./requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
# Copy the current directory contents into the container at /app
COPY . .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Expose port 5000 for the Flask server
EXPOSE 5000
# Command to run the Flask server
CMD ["python", "vibe/main.py", "--serve"]
CMD ["python", "-m", "vibe.main", "--serve"]

View file

@ -70,6 +70,39 @@ http://127.0.0.1:5000
---
## 🗄 API Documentation
### Available Endpoints
#### 1. `/process` (POST)
**Description:** Generates a summary MP3 from provided user interests.
**Request Body:**
```json
{
"user_info": "Your interests here",
"max_articles": 5, // Number of articles to process
"new_only": true // Fetch only new articles not in the cache
}
```
**Response:**
- **Success:** Returns a 200 status code with a generated MP3 file.
- **Error:** Returns a 500 status code with an error message.
**Example:**
```bash
curl -X POST http://localhost:5000/process \
-H 'Content-Type: application/json' \
-d '{"user_info": "AI, Machine Learning", "max_articles": 5, "new_only": true}'
```
---
## 🧪 Running Tests
Ensure vibe stays reliable with the built-in test suite. Just run:

View file

@ -4,4 +4,6 @@ beautifulsoup4
soundfile
docling
kokoro
Flask-SocketIO
Flask-SocketIO
tomli
litellm

View file

@ -19,5 +19,5 @@ if not os.path.exists(ARTICLES_CACHE_DIR):
logger.debug("Created articles cache directory: %s", ARTICLES_CACHE_DIR)
DEFAULT_ARXIV_URL = os.environ.get("ARXIV_URL", "https://arxiv.org/list/cs/new")
DEFAULT_LLM_URL = os.environ.get("LLM_URL", "http://127.0.0.1:4000/v1/chat/completions")
DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
DEFAULT_LLM_URL = os.environ.get("LLM_URL", "https://api.mistral.ai/v1/chat/completions")
DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")

View file

@ -1,22 +1,18 @@
import json
import re
import requests
import logging
import concurrent.futures
from .llm import chat_llm
logger = logging.getLogger(__name__)
def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, model_name=None):
def batch_relevance_filter(articles, user_info, batch_size=50, llm_level="medium"):
"""
Sends articles to the LLM in batches to check their relevance.
Expects a JSON response mapping article IDs to "yes" or "no".
This version parallelizes the batched requests.
This version parallelizes the batched requests using chat_llm.
"""
if llm_url is None or model_name is None:
from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
llm_url = llm_url or DEFAULT_LLM_URL
model_name = model_name or DEFAULT_MODEL_NAME
relevant_article_ids = set()
logger.info("Starting batched relevance check for %d articles.", len(articles))
@ -24,40 +20,32 @@ def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, mod
local_relevant_ids = set()
prompt_lines = [f"User info: {user_info}\n"]
prompt_lines.append(
"For each of the following articles, determine if it is relevant to the user. Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. Do not add any extra text; the response must start with a '{'."
"For each of the following articles, determine if it is relevant to the user. "
"Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. "
"Do not add extra text; the response must start with '{'."
)
for article in batch:
prompt_lines.append(
f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
)
prompt = "\n".join(prompt_lines)
payload = {
"model": model_name,
"messages": [{"role": "user", "content": prompt}],
}
try:
response = requests.post(llm_url, json=payload)
if response.status_code != 200:
logger.error("LLM batched relevance check failed for batch starting with article '%s' with status code: %d", batch[0]["id"], response.status_code)
response_text = chat_llm(prompt, level=llm_level)
match = re.search(r"\{.*\}", response_text, re.DOTALL)
if not match:
logger.error("No valid JSON object found in LLM response for relevance filter.")
return local_relevant_ids
data = response.json()
text_response = data["choices"][0]["message"]["content"].strip()
try:
match = re.search(r"\{.*\}", text_response, re.DOTALL)
if not match:
raise ValueError("No valid JSON object found in response")
json_str = match.group(0)
logger.debug("Batch response: %s", json_str[:200])
result = json.loads(json_str)
for article_id, verdict in result.items():
if isinstance(verdict, str) and verdict.lower().strip() == "yes":
local_relevant_ids.add(article_id)
except Exception as e:
logger.exception("Failed to parse JSON from LLM response: %s", e)
return local_relevant_ids
json_str = match.group(0)
logger.debug("Batch response: %s", json_str[:200])
result = json.loads(json_str)
for article_id, verdict in result.items():
if isinstance(verdict, str) and verdict.lower().strip() == "yes":
local_relevant_ids.add(article_id)
except Exception as e:
logger.exception("Error during batched relevance check: %s", e)
return local_relevant_ids
return local_relevant_ids
batches = [articles[i: i + batch_size] for i in range(0, len(articles), batch_size)]
with concurrent.futures.ThreadPoolExecutor() as executor:

38
vibe/llm.py Normal file
View file

@ -0,0 +1,38 @@
import os
import logging
import litellm
import tomli
logger = logging.getLogger(__name__)
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "llm_config.toml")
try:
with open(CONFIG_PATH, "rb") as f:
_CONFIG = tomli.load(f)
except FileNotFoundError:
logger.warning("LLM config file llm_config.toml not found. Using default settings.")
exit(-1)
def chat_llm(prompt: str, level: str = "medium") -> str:
"""
Sends 'prompt' to the LLM defined by the 'level' block in llm_config.toml.
Returns the LLM's text output.
"""
llm_settings = _CONFIG["llms"].get(level, {})
api_key = llm_settings.get("api_key", os.environ.get("MISTRAL_API_KEY"))
api_base = llm_settings.get("api_base", "https://api.mistral.ai")
model = llm_settings.get("model", "mistral/mistral-small-latest")
try:
# Using the litellm library to call the chat endpoint
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": prompt}],
api_base=api_base,
api_key=api_key,
)
return response["choices"][0]["message"]["content"].strip()
except Exception as e:
logger.exception("Error calling LLM: %s", e)
return ""

8
vibe/llm_config.toml Normal file
View file

@ -0,0 +1,8 @@
[llms.low]
model = "mistral/mistral-small-latest"
[llms.medium]
model = "mistral/mistral-small-latest"
[llms.high]
model = "mistral/mistral-small-latest"

View file

@ -2,7 +2,7 @@ import argparse
import logging
from vibe.orchestrator import process_articles
from vibe.tts import text_to_speech
from vibe.config import DEFAULT_ARXIV_URL, DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
from vibe.config import DEFAULT_ARXIV_URL
logging.basicConfig(
level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
@ -17,20 +17,28 @@ def main():
parser.add_argument("--max-articles", type=int, default=5, help="Maximum articles to process in the pipeline.")
parser.add_argument("--new-only", action="store_true", help="Only process articles newer than cached.")
parser.add_argument("--arxiv-url", type=str, default=DEFAULT_ARXIV_URL, help="URL for fetching arXiv articles.")
parser.add_argument("--llm-url", type=str, default=DEFAULT_LLM_URL, help="URL of the LLM endpoint.")
parser.add_argument("--model-name", type=str, default=DEFAULT_MODEL_NAME, help="Name of model to pass to the LLM endpoint.")
parser.add_argument("--output", type=str, default="final_output.mp3", help="Output path for the generated MP3 file.")
# New: LLM Level
parser.add_argument("--llm-level", type=str, default="medium", choices=["low","medium","high"],
help="Desired LLM quality level: low, medium, or high. Defaults to medium.")
args = parser.parse_args()
if args.serve:
from vibe.server import app
logger.info("Starting Flask server.")
app.run(debug=True)
app.run(host='0.0.0.0', port='14200', debug=True)
elif args.generate:
logger.info("Running pipeline in CLI mode.")
user_info = args.prompt
final_summary = process_articles(user_info, arxiv_url=args.arxiv_url, llm_url=args.llm_url, model_name=args.model_name, max_articles=args.max_articles, new_only=args.new_only)
final_summary = process_articles(
user_info,
arxiv_url=args.arxiv_url,
max_articles=args.max_articles,
new_only=args.new_only,
llm_level=args.llm_level
)
if not final_summary.strip():
logger.error("No summaries generated.")
exit(1)
@ -43,7 +51,7 @@ def main():
else:
logger.info("No mode specified; defaulting to Flask server.")
from vibe.server import app
app.run(debug=True)
app.run(host='0.0.0.0', port='14200', debug=True)
if __name__ == "__main__":
main()
main()

View file

@ -9,11 +9,17 @@ from .filter import batch_relevance_filter
from .rerank import rerank_articles
from .converter import fetch_and_convert_article
from .summarizer import generate_article_summary
from .tts import text_to_speech
logger = logging.getLogger(__name__)
def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
def process_articles(
user_info,
arxiv_url=None,
max_articles=5,
new_only=False,
trace_callback=None,
llm_level="medium"
):
"""
Executes the full pipeline:
1. Fetch arXiv articles.
@ -42,7 +48,10 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
parts = id_str.split(".")
return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
most_recent = max(cached_articles, key=parse_id)
articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
articles = [
article for article in articles
if parse_id(article["id"]) > parse_id(most_recent)
]
if trace_callback:
trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
else:
@ -51,14 +60,14 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
if trace_callback:
trace_callback("Performing relevance filtering via LLM...")
relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
relevant_ids = batch_relevance_filter(articles, user_info, llm_level=llm_level)
relevant_articles = [article for article in articles if article["id"] in relevant_ids]
if trace_callback:
trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")
if trace_callback:
trace_callback("Reranking articles based on relevance...")
reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
reranked_articles = rerank_articles(relevant_articles, user_info, llm_level=llm_level)
final_candidates = reranked_articles[:max_articles]
if trace_callback:
@ -80,7 +89,7 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
summaries = []
with concurrent.futures.ThreadPoolExecutor() as executor:
future_to_article = {
executor.submit(generate_article_summary, article, content, user_info, llm_url, model_name): article
executor.submit(generate_article_summary, article, content, user_info, llm_level): article
for article, content in articles_with_content
}
for future in concurrent.futures.as_completed(future_to_article):
@ -103,6 +112,6 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
final_summary = "\n\n".join(summaries)
final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
if trace_callback:
trace_callback("Final summary generated. Starting TTS conversion.")
trace_callback("Final summary generated.")
logger.info("Final summary generated with length %d characters.", len(final_summary))
return final_summary
return final_summary

View file

@ -1,11 +1,12 @@
import json
import re
import requests
import logging
from .llm import chat_llm
logger = logging.getLogger(__name__)
def rerank_articles(articles, user_info, llm_url=None, model_name=None):
def rerank_articles(articles, user_info, llm_level="medium"):
"""
Calls the LLM to reorder the articles by importance. Returns the reordered list.
Expects a JSON response with a 'ranking' key pointing to a list of article IDs.
@ -13,31 +14,21 @@ def rerank_articles(articles, user_info, llm_url=None, model_name=None):
if not articles:
return []
if llm_url is None or model_name is None:
from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
llm_url = llm_url or DEFAULT_LLM_URL
model_name = model_name or DEFAULT_MODEL_NAME
logger.info("Starting rerank for %d articles.", len(articles))
prompt_lines = [
f"User info: {user_info}\n",
'Please rank the following articles from most relevant to least relevant. Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.',
('Please rank the following articles from most relevant to least relevant. '
'Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.')
]
for article in articles:
prompt_lines.append(
f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
)
prompt = "\n".join(prompt_lines)
payload = {"model": model_name, "messages": [{"role": "user", "content": prompt}]}
try:
response = requests.post(llm_url, json=payload)
if response.status_code != 200:
logger.error("LLM reranking request failed with status code: %d", response.status_code)
return articles
data = response.json()
text_response = data["choices"][0]["message"]["content"].strip()
match = re.search(r"\{.*\}", text_response, re.DOTALL)
response_text = chat_llm(prompt, level=llm_level)
match = re.search(r"\{.*\}", response_text, re.DOTALL)
if not match:
logger.error("No valid JSON found in rerank response.")
return articles

View file

@ -2,7 +2,6 @@ from flask import Flask, send_file, request, jsonify, render_template
import logging
from vibe.orchestrator import process_articles
from vibe.config import CACHE_DIR
from flask_socketio import SocketIO, emit
logger = logging.getLogger(__name__)
@ -24,7 +23,15 @@ def process_endpoint():
# Define trace_callback to emit trace messages via WebSockets
def trace_callback(message):
socketio.emit("trace", {"message": message})
final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
final_summary = process_articles(
user_info,
arxiv_url=None,
max_articles=max_articles,
new_only=new_only,
trace_callback=trace_callback,
llm_level="medium" # hard-coded here; could be user-configurable
)
if not final_summary.strip():
logger.error("No summaries generated.")
return jsonify({"error": "No summaries generated."}), 500

View file

@ -1,38 +1,24 @@
import requests
import logging
from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
from .llm import chat_llm
logger = logging.getLogger(__name__)
def generate_article_summary(article, content, user_info, llm_url=None, model_name=None):
def generate_article_summary(article, content, user_info, llm_level="medium"):
"""
Generates a fluid, narrative summary for the article using the LLM.
The summary starts with a connecting phrase.
"""
if llm_url is None or model_name is None:
llm_url = DEFAULT_LLM_URL
model_name = DEFAULT_MODEL_NAME
prompt = (
f"User info: {user_info}\n\n"
f"Please summarize the following article titled '{article['title']}' in a fluid narrative prose style without lists or visual cues. "
f"Begin the summary with a connecting segment like 'And now, Article: {article['title']}'.\n\n"
f"Article Content:\n{content}"
)
payload = {
"model": model_name,
"messages": [{"role": "user", "content": prompt}],
}
logger.info("Generating summary for article '%s'.", article["id"])
try:
response = requests.post(llm_url, json=payload)
if response.status_code != 200:
logger.error("LLM summarization failed for article '%s'. Status code: %d", article["id"], response.status_code)
return ""
data = response.json()
summary = data["choices"][0]["message"]["content"].strip()
logger.debug("Summary for article '%s': %s", article["id"], summary[:100])
return summary
response_text = chat_llm(prompt, level=llm_level)
return response_text
except Exception as e:
logger.exception("Error summarizing article '%s': %s", article["id"], e)
return ""