From 5bd4cf40a408d447beb96fb93f133ef3331c6aa6 Mon Sep 17 00:00:00 2001 From: Regis David Souza Mesquita Date: Sun, 2 Mar 2025 03:53:46 +0000 Subject: [PATCH] Fixes --- requirements.txt | 3 ++- templates/index.html | 26 +++++++++++++++++++++----- vibe/config.py | 3 ++- vibe/orchestrator.py | 40 ++++++++++++++++++++++++++++++++++------ vibe/server.py | 28 +++++++++++++++++++++------- 5 files changed, 80 insertions(+), 20 deletions(-) diff --git a/requirements.txt b/requirements.txt index fc6e61a..03fc57c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ requests beautifulsoup4 soundfile docling -kokoro \ No newline at end of file +kokoro +Flask-SocketIO \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index d948cd0..6a97e48 100644 --- a/templates/index.html +++ b/templates/index.html @@ -8,8 +8,11 @@ .container { max-width: 600px; margin: auto; } input[type="text"], textarea { width: 100%; padding: 10px; margin: 8px 0; } button { padding: 10px 20px; font-size: 16px; } + #status { border: 1px solid #ccc; padding: 10px; margin-top: 20px; max-height: 200px; overflow-y: auto; } .hidden { display: none; } + +
@@ -22,15 +25,29 @@
diff --git a/vibe/config.py b/vibe/config.py index 51dc1ac..0f13110 100644 --- a/vibe/config.py +++ b/vibe/config.py @@ -6,7 +6,8 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -CACHE_DIR = "cache" +BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +CACHE_DIR = os.path.join(BASE_DIR, "cache") if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR) logger.debug("Created cache directory: %s", CACHE_DIR) diff --git a/vibe/orchestrator.py b/vibe/orchestrator.py index bd174fe..9272cc4 100644 --- a/vibe/orchestrator.py +++ b/vibe/orchestrator.py @@ -13,7 +13,7 @@ from .tts import text_to_speech logger = logging.getLogger(__name__) -def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False): +def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None): """ Executes the full pipeline: 1. Fetch arXiv articles. @@ -25,10 +25,15 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m 7. Generate narrative summaries. 8. Combine summaries into a final narrative. """ + if trace_callback: + trace_callback("Starting pipeline: fetching arXiv articles...") articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url) - logger.info("Total articles fetched: %d", len(articles)) + if trace_callback: + trace_callback(f"Fetched {len(articles)} articles from arXiv.") if new_only: + if trace_callback: + trace_callback("Filtering articles for new content based on cache...") cached_articles = [f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")] if cached_articles: def parse_id(id_str): @@ -38,25 +43,40 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1])) most_recent = max(cached_articles, key=parse_id) articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)] - logger.info("After filtering by most recent article id %s, %d articles remain.", most_recent, len(articles)) + if trace_callback: + trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.") else: - logger.info("No cached articles found, proceeding with all fetched articles.") + if trace_callback: + trace_callback("No cached articles found; processing all fetched articles.") + if trace_callback: + trace_callback("Performing relevance filtering via LLM...") relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name) relevant_articles = [article for article in articles if article["id"] in relevant_ids] - logger.info("Found %d relevant articles out of %d.", len(relevant_articles), len(articles)) + if trace_callback: + trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.") + if trace_callback: + trace_callback("Reranking articles based on relevance...") reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name) final_candidates = reranked_articles[:max_articles] + if trace_callback: + trace_callback("Converting article PDFs to Markdown...") articles_with_content = [] for article in final_candidates: content = fetch_and_convert_article(article) if content: articles_with_content.append((article, content)) + if trace_callback: + trace_callback(f"Converted article {article['id']} to Markdown.") else: logger.warning("No content obtained for article '%s'.", article["id"]) + if trace_callback: + trace_callback(f"Failed to convert article {article['id']}.") + if trace_callback: + trace_callback("Generating narrative summaries for articles...") summaries = [] with concurrent.futures.ThreadPoolExecutor() as executor: future_to_article = { @@ -69,12 +89,20 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m summary = future.result() if summary: summaries.append(summary) + if trace_callback: + trace_callback(f"Generated summary for article {article['id']}.") else: logger.warning("No summary generated for article '%s'.", article["id"]) + if trace_callback: + trace_callback(f"Summary generation failed for article {article['id']}.") except Exception as e: logger.exception("Error generating summary for article '%s': %s", article["id"], e) + if trace_callback: + trace_callback(f"Error generating summary for article {article['id']}.") final_summary = "\n\n".join(summaries) final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe." + if trace_callback: + trace_callback("Final summary generated. Starting TTS conversion.") logger.info("Final summary generated with length %d characters.", len(final_summary)) - return final_summary \ No newline at end of file + return final_summary diff --git a/vibe/server.py b/vibe/server.py index d7233e0..04f54b2 100644 --- a/vibe/server.py +++ b/vibe/server.py @@ -1,10 +1,13 @@ from flask import Flask, send_file, request, jsonify, render_template import logging -from .orchestrator import process_articles -from .config import CACHE_DIR +from vibe.orchestrator import process_articles +from vibe.config import CACHE_DIR + +from flask_socketio import SocketIO, emit logger = logging.getLogger(__name__) -app = Flask(__name__) +app = Flask(__name__, template_folder="../templates") +socketio = SocketIO(app) @app.route("/process", methods=["POST"]) def process_endpoint(): @@ -18,15 +21,22 @@ def process_endpoint(): new_only = data.get("new_only", False) logger.info("Processing request with user_info: %s, max_articles: %s, new_only: %s", user_info, max_articles, new_only) - final_summary = process_articles(user_info, max_articles=max_articles, new_only=new_only) + # Define trace_callback to emit trace messages via WebSockets + def trace_callback(message): + socketio.emit("trace", {"message": message}) + final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback) if not final_summary.strip(): logger.error("No summaries generated.") return jsonify({"error": "No summaries generated."}), 500 - output_mp3 = f"{CACHE_DIR}/final_output.mp3" + import uuid, os + mp3_filename = f"final_{uuid.uuid4().hex}.mp3" + output_mp3 = os.path.join(CACHE_DIR, mp3_filename) + try: - from .tts import text_to_speech + from vibe.tts import text_to_speech text_to_speech(final_summary, output_mp3) + trace_callback("Text-to-Speech conversion complete. MP3 file generated.") except Exception as e: logger.exception("TTS conversion failed: %s", e) return jsonify({"error": f"TTS conversion failed: {e}"}), 500 @@ -38,5 +48,9 @@ def process_endpoint(): def index(): return render_template("index.html") +@socketio.on("connect") +def handle_connect(): + emit("trace", {"message": "Connected to server. Ready to process your request."}) + if __name__ == "__main__": - app.run(debug=True) \ No newline at end of file + socketio.run(app, debug=True) \ No newline at end of file