More features
This commit is contained in:
parent
281fd03aa8
commit
1d90930e29
12 changed files with 165 additions and 97 deletions
16
Dockerfile
16
Dockerfile
|
|
@ -1,27 +1,25 @@
|
|||
# Use an official Python runtime as a parent image
|
||||
FROM python:3.9-slim
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Install system dependencies including ffmpeg
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
espeak-ng \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set environment variables for litellm API key and model (users can override these)
|
||||
ENV LITELLM_API_KEY=""
|
||||
ENV MODEL_NAME="mistral-small-latest"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./requirements.txt ./requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
# Copy the current directory contents into the container at /app
|
||||
COPY . .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Expose port 5000 for the Flask server
|
||||
EXPOSE 5000
|
||||
|
||||
# Command to run the Flask server
|
||||
CMD ["python", "vibe/main.py", "--serve"]
|
||||
CMD ["python", "-m", "vibe.main", "--serve"]
|
||||
|
|
|
|||
33
README.md
33
README.md
|
|
@ -70,6 +70,39 @@ http://127.0.0.1:5000
|
|||
|
||||
---
|
||||
|
||||
## 🗄 API Documentation
|
||||
|
||||
### Available Endpoints
|
||||
|
||||
#### 1. `/process` (POST)
|
||||
|
||||
**Description:** Generates a summary MP3 from provided user interests.
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"user_info": "Your interests here",
|
||||
"max_articles": 5, // Number of articles to process
|
||||
"new_only": true // Fetch only new articles not in the cache
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
- **Success:** Returns a 200 status code with a generated MP3 file.
|
||||
- **Error:** Returns a 500 status code with an error message.
|
||||
|
||||
**Example:**
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:5000/process \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"user_info": "AI, Machine Learning", "max_articles": 5, "new_only": true}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Running Tests
|
||||
|
||||
Ensure vibe stays reliable with the built-in test suite. Just run:
|
||||
|
|
|
|||
|
|
@ -4,4 +4,6 @@ beautifulsoup4
|
|||
soundfile
|
||||
docling
|
||||
kokoro
|
||||
Flask-SocketIO
|
||||
Flask-SocketIO
|
||||
tomli
|
||||
litellm
|
||||
|
|
|
|||
|
|
@ -19,5 +19,5 @@ if not os.path.exists(ARTICLES_CACHE_DIR):
|
|||
logger.debug("Created articles cache directory: %s", ARTICLES_CACHE_DIR)
|
||||
|
||||
DEFAULT_ARXIV_URL = os.environ.get("ARXIV_URL", "https://arxiv.org/list/cs/new")
|
||||
DEFAULT_LLM_URL = os.environ.get("LLM_URL", "http://127.0.0.1:4000/v1/chat/completions")
|
||||
DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
|
||||
DEFAULT_LLM_URL = os.environ.get("LLM_URL", "https://api.mistral.ai/v1/chat/completions")
|
||||
DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "mistral-small-latest")
|
||||
|
|
|
|||
|
|
@ -1,22 +1,18 @@
|
|||
import json
|
||||
import re
|
||||
import requests
|
||||
import logging
|
||||
import concurrent.futures
|
||||
|
||||
from .llm import chat_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, model_name=None):
|
||||
def batch_relevance_filter(articles, user_info, batch_size=50, llm_level="medium"):
|
||||
"""
|
||||
Sends articles to the LLM in batches to check their relevance.
|
||||
Expects a JSON response mapping article IDs to "yes" or "no".
|
||||
This version parallelizes the batched requests.
|
||||
This version parallelizes the batched requests using chat_llm.
|
||||
"""
|
||||
if llm_url is None or model_name is None:
|
||||
from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
|
||||
llm_url = llm_url or DEFAULT_LLM_URL
|
||||
model_name = model_name or DEFAULT_MODEL_NAME
|
||||
|
||||
relevant_article_ids = set()
|
||||
logger.info("Starting batched relevance check for %d articles.", len(articles))
|
||||
|
||||
|
|
@ -24,40 +20,32 @@ def batch_relevance_filter(articles, user_info, batch_size=50, llm_url=None, mod
|
|||
local_relevant_ids = set()
|
||||
prompt_lines = [f"User info: {user_info}\n"]
|
||||
prompt_lines.append(
|
||||
"For each of the following articles, determine if it is relevant to the user. Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. Do not add any extra text; the response must start with a '{'."
|
||||
"For each of the following articles, determine if it is relevant to the user. "
|
||||
"Respond in JSON format with keys as the article IDs and values as 'yes' or 'no'. "
|
||||
"Do not add extra text; the response must start with '{'."
|
||||
)
|
||||
for article in batch:
|
||||
prompt_lines.append(
|
||||
f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
|
||||
)
|
||||
prompt = "\n".join(prompt_lines)
|
||||
payload = {
|
||||
"model": model_name,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(llm_url, json=payload)
|
||||
if response.status_code != 200:
|
||||
logger.error("LLM batched relevance check failed for batch starting with article '%s' with status code: %d", batch[0]["id"], response.status_code)
|
||||
response_text = chat_llm(prompt, level=llm_level)
|
||||
match = re.search(r"\{.*\}", response_text, re.DOTALL)
|
||||
if not match:
|
||||
logger.error("No valid JSON object found in LLM response for relevance filter.")
|
||||
return local_relevant_ids
|
||||
data = response.json()
|
||||
text_response = data["choices"][0]["message"]["content"].strip()
|
||||
try:
|
||||
match = re.search(r"\{.*\}", text_response, re.DOTALL)
|
||||
if not match:
|
||||
raise ValueError("No valid JSON object found in response")
|
||||
json_str = match.group(0)
|
||||
logger.debug("Batch response: %s", json_str[:200])
|
||||
result = json.loads(json_str)
|
||||
for article_id, verdict in result.items():
|
||||
if isinstance(verdict, str) and verdict.lower().strip() == "yes":
|
||||
local_relevant_ids.add(article_id)
|
||||
except Exception as e:
|
||||
logger.exception("Failed to parse JSON from LLM response: %s", e)
|
||||
return local_relevant_ids
|
||||
json_str = match.group(0)
|
||||
logger.debug("Batch response: %s", json_str[:200])
|
||||
result = json.loads(json_str)
|
||||
for article_id, verdict in result.items():
|
||||
if isinstance(verdict, str) and verdict.lower().strip() == "yes":
|
||||
local_relevant_ids.add(article_id)
|
||||
except Exception as e:
|
||||
logger.exception("Error during batched relevance check: %s", e)
|
||||
return local_relevant_ids
|
||||
|
||||
return local_relevant_ids
|
||||
|
||||
batches = [articles[i: i + batch_size] for i in range(0, len(articles), batch_size)]
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
|
|
|
|||
38
vibe/llm.py
Normal file
38
vibe/llm.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import os
|
||||
import logging
|
||||
import litellm
|
||||
import tomli
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "llm_config.toml")
|
||||
|
||||
try:
|
||||
with open(CONFIG_PATH, "rb") as f:
|
||||
_CONFIG = tomli.load(f)
|
||||
except FileNotFoundError:
|
||||
logger.warning("LLM config file llm_config.toml not found. Using default settings.")
|
||||
exit(-1)
|
||||
|
||||
|
||||
def chat_llm(prompt: str, level: str = "medium") -> str:
|
||||
"""
|
||||
Sends 'prompt' to the LLM defined by the 'level' block in llm_config.toml.
|
||||
Returns the LLM's text output.
|
||||
"""
|
||||
llm_settings = _CONFIG["llms"].get(level, {})
|
||||
api_key = llm_settings.get("api_key", os.environ.get("MISTRAL_API_KEY"))
|
||||
api_base = llm_settings.get("api_base", "https://api.mistral.ai")
|
||||
model = llm_settings.get("model", "mistral/mistral-small-latest")
|
||||
|
||||
try:
|
||||
# Using the litellm library to call the chat endpoint
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
)
|
||||
return response["choices"][0]["message"]["content"].strip()
|
||||
except Exception as e:
|
||||
logger.exception("Error calling LLM: %s", e)
|
||||
return ""
|
||||
8
vibe/llm_config.toml
Normal file
8
vibe/llm_config.toml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
[llms.low]
|
||||
model = "mistral/mistral-small-latest"
|
||||
|
||||
[llms.medium]
|
||||
model = "mistral/mistral-small-latest"
|
||||
|
||||
[llms.high]
|
||||
model = "mistral/mistral-small-latest"
|
||||
24
vibe/main.py
24
vibe/main.py
|
|
@ -2,7 +2,7 @@ import argparse
|
|||
import logging
|
||||
from vibe.orchestrator import process_articles
|
||||
from vibe.tts import text_to_speech
|
||||
from vibe.config import DEFAULT_ARXIV_URL, DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
|
||||
from vibe.config import DEFAULT_ARXIV_URL
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
|
|
@ -17,20 +17,28 @@ def main():
|
|||
parser.add_argument("--max-articles", type=int, default=5, help="Maximum articles to process in the pipeline.")
|
||||
parser.add_argument("--new-only", action="store_true", help="Only process articles newer than cached.")
|
||||
parser.add_argument("--arxiv-url", type=str, default=DEFAULT_ARXIV_URL, help="URL for fetching arXiv articles.")
|
||||
parser.add_argument("--llm-url", type=str, default=DEFAULT_LLM_URL, help="URL of the LLM endpoint.")
|
||||
parser.add_argument("--model-name", type=str, default=DEFAULT_MODEL_NAME, help="Name of model to pass to the LLM endpoint.")
|
||||
parser.add_argument("--output", type=str, default="final_output.mp3", help="Output path for the generated MP3 file.")
|
||||
|
||||
|
||||
# New: LLM Level
|
||||
parser.add_argument("--llm-level", type=str, default="medium", choices=["low","medium","high"],
|
||||
help="Desired LLM quality level: low, medium, or high. Defaults to medium.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.serve:
|
||||
from vibe.server import app
|
||||
logger.info("Starting Flask server.")
|
||||
app.run(debug=True)
|
||||
app.run(host='0.0.0.0', port='14200', debug=True)
|
||||
elif args.generate:
|
||||
logger.info("Running pipeline in CLI mode.")
|
||||
user_info = args.prompt
|
||||
final_summary = process_articles(user_info, arxiv_url=args.arxiv_url, llm_url=args.llm_url, model_name=args.model_name, max_articles=args.max_articles, new_only=args.new_only)
|
||||
final_summary = process_articles(
|
||||
user_info,
|
||||
arxiv_url=args.arxiv_url,
|
||||
max_articles=args.max_articles,
|
||||
new_only=args.new_only,
|
||||
llm_level=args.llm_level
|
||||
)
|
||||
if not final_summary.strip():
|
||||
logger.error("No summaries generated.")
|
||||
exit(1)
|
||||
|
|
@ -43,7 +51,7 @@ def main():
|
|||
else:
|
||||
logger.info("No mode specified; defaulting to Flask server.")
|
||||
from vibe.server import app
|
||||
app.run(debug=True)
|
||||
app.run(host='0.0.0.0', port='14200', debug=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -9,11 +9,17 @@ from .filter import batch_relevance_filter
|
|||
from .rerank import rerank_articles
|
||||
from .converter import fetch_and_convert_article
|
||||
from .summarizer import generate_article_summary
|
||||
from .tts import text_to_speech
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
|
||||
def process_articles(
|
||||
user_info,
|
||||
arxiv_url=None,
|
||||
max_articles=5,
|
||||
new_only=False,
|
||||
trace_callback=None,
|
||||
llm_level="medium"
|
||||
):
|
||||
"""
|
||||
Executes the full pipeline:
|
||||
1. Fetch arXiv articles.
|
||||
|
|
@ -42,7 +48,10 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
|||
parts = id_str.split(".")
|
||||
return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
|
||||
most_recent = max(cached_articles, key=parse_id)
|
||||
articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
|
||||
articles = [
|
||||
article for article in articles
|
||||
if parse_id(article["id"]) > parse_id(most_recent)
|
||||
]
|
||||
if trace_callback:
|
||||
trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
|
||||
else:
|
||||
|
|
@ -51,14 +60,14 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
|||
|
||||
if trace_callback:
|
||||
trace_callback("Performing relevance filtering via LLM...")
|
||||
relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
|
||||
relevant_ids = batch_relevance_filter(articles, user_info, llm_level=llm_level)
|
||||
relevant_articles = [article for article in articles if article["id"] in relevant_ids]
|
||||
if trace_callback:
|
||||
trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")
|
||||
|
||||
if trace_callback:
|
||||
trace_callback("Reranking articles based on relevance...")
|
||||
reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
|
||||
reranked_articles = rerank_articles(relevant_articles, user_info, llm_level=llm_level)
|
||||
final_candidates = reranked_articles[:max_articles]
|
||||
|
||||
if trace_callback:
|
||||
|
|
@ -80,7 +89,7 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
|||
summaries = []
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future_to_article = {
|
||||
executor.submit(generate_article_summary, article, content, user_info, llm_url, model_name): article
|
||||
executor.submit(generate_article_summary, article, content, user_info, llm_level): article
|
||||
for article, content in articles_with_content
|
||||
}
|
||||
for future in concurrent.futures.as_completed(future_to_article):
|
||||
|
|
@ -103,6 +112,6 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
|||
final_summary = "\n\n".join(summaries)
|
||||
final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
|
||||
if trace_callback:
|
||||
trace_callback("Final summary generated. Starting TTS conversion.")
|
||||
trace_callback("Final summary generated.")
|
||||
logger.info("Final summary generated with length %d characters.", len(final_summary))
|
||||
return final_summary
|
||||
return final_summary
|
||||
|
|
@ -1,11 +1,12 @@
|
|||
import json
|
||||
import re
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from .llm import chat_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def rerank_articles(articles, user_info, llm_url=None, model_name=None):
|
||||
def rerank_articles(articles, user_info, llm_level="medium"):
|
||||
"""
|
||||
Calls the LLM to reorder the articles by importance. Returns the reordered list.
|
||||
Expects a JSON response with a 'ranking' key pointing to a list of article IDs.
|
||||
|
|
@ -13,31 +14,21 @@ def rerank_articles(articles, user_info, llm_url=None, model_name=None):
|
|||
if not articles:
|
||||
return []
|
||||
|
||||
if llm_url is None or model_name is None:
|
||||
from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
|
||||
llm_url = llm_url or DEFAULT_LLM_URL
|
||||
model_name = model_name or DEFAULT_MODEL_NAME
|
||||
|
||||
logger.info("Starting rerank for %d articles.", len(articles))
|
||||
prompt_lines = [
|
||||
f"User info: {user_info}\n",
|
||||
'Please rank the following articles from most relevant to least relevant. Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.',
|
||||
('Please rank the following articles from most relevant to least relevant. '
|
||||
'Return your answer as valid JSON in the format: { "ranking": [ "id1", "id2", ... ] }.')
|
||||
]
|
||||
for article in articles:
|
||||
prompt_lines.append(
|
||||
f"Article ID: {article['id']}\nTitle: {article['title']}\nAbstract: {article['abstract']}\n"
|
||||
)
|
||||
prompt = "\n".join(prompt_lines)
|
||||
payload = {"model": model_name, "messages": [{"role": "user", "content": prompt}]}
|
||||
|
||||
try:
|
||||
response = requests.post(llm_url, json=payload)
|
||||
if response.status_code != 200:
|
||||
logger.error("LLM reranking request failed with status code: %d", response.status_code)
|
||||
return articles
|
||||
data = response.json()
|
||||
text_response = data["choices"][0]["message"]["content"].strip()
|
||||
match = re.search(r"\{.*\}", text_response, re.DOTALL)
|
||||
response_text = chat_llm(prompt, level=llm_level)
|
||||
match = re.search(r"\{.*\}", response_text, re.DOTALL)
|
||||
if not match:
|
||||
logger.error("No valid JSON found in rerank response.")
|
||||
return articles
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ from flask import Flask, send_file, request, jsonify, render_template
|
|||
import logging
|
||||
from vibe.orchestrator import process_articles
|
||||
from vibe.config import CACHE_DIR
|
||||
|
||||
from flask_socketio import SocketIO, emit
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -24,7 +23,15 @@ def process_endpoint():
|
|||
# Define trace_callback to emit trace messages via WebSockets
|
||||
def trace_callback(message):
|
||||
socketio.emit("trace", {"message": message})
|
||||
final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
|
||||
|
||||
final_summary = process_articles(
|
||||
user_info,
|
||||
arxiv_url=None,
|
||||
max_articles=max_articles,
|
||||
new_only=new_only,
|
||||
trace_callback=trace_callback,
|
||||
llm_level="medium" # hard-coded here; could be user-configurable
|
||||
)
|
||||
if not final_summary.strip():
|
||||
logger.error("No summaries generated.")
|
||||
return jsonify({"error": "No summaries generated."}), 500
|
||||
|
|
|
|||
|
|
@ -1,38 +1,24 @@
|
|||
import requests
|
||||
import logging
|
||||
from .config import DEFAULT_LLM_URL, DEFAULT_MODEL_NAME
|
||||
from .llm import chat_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def generate_article_summary(article, content, user_info, llm_url=None, model_name=None):
|
||||
def generate_article_summary(article, content, user_info, llm_level="medium"):
|
||||
"""
|
||||
Generates a fluid, narrative summary for the article using the LLM.
|
||||
The summary starts with a connecting phrase.
|
||||
"""
|
||||
if llm_url is None or model_name is None:
|
||||
llm_url = DEFAULT_LLM_URL
|
||||
model_name = DEFAULT_MODEL_NAME
|
||||
|
||||
prompt = (
|
||||
f"User info: {user_info}\n\n"
|
||||
f"Please summarize the following article titled '{article['title']}' in a fluid narrative prose style without lists or visual cues. "
|
||||
f"Begin the summary with a connecting segment like 'And now, Article: {article['title']}'.\n\n"
|
||||
f"Article Content:\n{content}"
|
||||
)
|
||||
payload = {
|
||||
"model": model_name,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
logger.info("Generating summary for article '%s'.", article["id"])
|
||||
try:
|
||||
response = requests.post(llm_url, json=payload)
|
||||
if response.status_code != 200:
|
||||
logger.error("LLM summarization failed for article '%s'. Status code: %d", article["id"], response.status_code)
|
||||
return ""
|
||||
data = response.json()
|
||||
summary = data["choices"][0]["message"]["content"].strip()
|
||||
logger.debug("Summary for article '%s': %s", article["id"], summary[:100])
|
||||
return summary
|
||||
response_text = chat_llm(prompt, level=llm_level)
|
||||
return response_text
|
||||
except Exception as e:
|
||||
logger.exception("Error summarizing article '%s': %s", article["id"], e)
|
||||
return ""
|
||||
Loading…
Add table
Reference in a new issue