Fixes

2025-03-02 03:53:46 +00:00 · 2025-03-02 03:53:46 +00:00 · 5bd4cf40a4
commit 5bd4cf40a4
parent 55215a0edb
5 changed files with 80 additions and 20 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -3,4 +3,5 @@ requests
 beautifulsoup4
 soundfile
 docling
-kokoro
+kokoro
+Flask-SocketIO
--- a/templates/index.html
+++ b/templates/index.html
@ -8,8 +8,11 @@
    .container { max-width: 600px; margin: auto; }
    input[type="text"], textarea { width: 100%; padding: 10px; margin: 8px 0; }
    button { padding: 10px 20px; font-size: 16px; }
+    #status { border: 1px solid #ccc; padding: 10px; margin-top: 20px; max-height: 200px; overflow-y: auto; }
    .hidden { display: none; }
  </style>
+  <!-- Include Socket.IO client library -->
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.6.1/socket.io.min.js"></script>
 </head>
 <body>
  <div class="container">
@ -22,15 +25,29 @@
      <button type="submit">Submit</button>
    </form>
    <div id="status" class="hidden">
-      <p>Processing your request, please wait...</p>
+      <p><strong>Status Updates:</strong></p>
    </div>
  </div>
  <script>
+    // Initialize Socket.IO connection and listen for trace messages
+    var socket = io();
+    socket.on('trace', function(data) {
+      var statusDiv = document.getElementById('status');
+      if (statusDiv.classList.contains('hidden')) {
+        statusDiv.classList.remove('hidden');
+      }
+      var p = document.createElement('p');
+      p.textContent = data.message;
+      statusDiv.appendChild(p);
+    });
+
    document.getElementById('interestForm').addEventListener('submit', async function(e) {
      e.preventDefault();
-      const userInfo = document.getElementById('user_info').value;
-      document.getElementById('status').classList.remove('hidden');
-      
+      var userInfo = document.getElementById('user_info').value;
+      var statusDiv = document.getElementById('status');
+      statusDiv.innerHTML = "<p><strong>Status Updates:</strong></p>";
+      statusDiv.classList.remove('hidden');
+
      try {
        const response = await fetch('/process', {
          method: 'POST',
@ -60,7 +77,6 @@
      } catch (error) {
        alert('An error occurred: ' + error);
      }
-      document.getElementById('status').classList.add('hidden');
    });
  </script>
 </body>
--- a/vibe/config.py
+++ b/vibe/config.py
@ -6,7 +6,8 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)

-CACHE_DIR = "cache"
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+CACHE_DIR = os.path.join(BASE_DIR, "cache")
 if not os.path.exists(CACHE_DIR):
    os.makedirs(CACHE_DIR)
    logger.debug("Created cache directory: %s", CACHE_DIR)
--- a/vibe/orchestrator.py
+++ b/vibe/orchestrator.py
@ -13,7 +13,7 @@ from .tts import text_to_speech

 logger = logging.getLogger(__name__)

-def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False):
+def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
    """
    Executes the full pipeline:
      1. Fetch arXiv articles.
@ -25,10 +25,15 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
      7. Generate narrative summaries.
      8. Combine summaries into a final narrative.
    """
+    if trace_callback:
+        trace_callback("Starting pipeline: fetching arXiv articles...")
    articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url)
-    logger.info("Total articles fetched: %d", len(articles))
+    if trace_callback:
+        trace_callback(f"Fetched {len(articles)} articles from arXiv.")

    if new_only:
+        if trace_callback:
+            trace_callback("Filtering articles for new content based on cache...")
        cached_articles = [f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")]
        if cached_articles:
            def parse_id(id_str):
@ -38,25 +43,40 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
                return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
            most_recent = max(cached_articles, key=parse_id)
            articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
-            logger.info("After filtering by most recent article id %s, %d articles remain.", most_recent, len(articles))
+            if trace_callback:
+                trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
        else:
-            logger.info("No cached articles found, proceeding with all fetched articles.")
+            if trace_callback:
+                trace_callback("No cached articles found; processing all fetched articles.")

+    if trace_callback:
+        trace_callback("Performing relevance filtering via LLM...")
    relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
    relevant_articles = [article for article in articles if article["id"] in relevant_ids]
-    logger.info("Found %d relevant articles out of %d.", len(relevant_articles), len(articles))
+    if trace_callback:
+        trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")

+    if trace_callback:
+        trace_callback("Reranking articles based on relevance...")
    reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
    final_candidates = reranked_articles[:max_articles]

+    if trace_callback:
+        trace_callback("Converting article PDFs to Markdown...")
    articles_with_content = []
    for article in final_candidates:
        content = fetch_and_convert_article(article)
        if content:
            articles_with_content.append((article, content))
+            if trace_callback:
+                trace_callback(f"Converted article {article['id']} to Markdown.")
        else:
            logger.warning("No content obtained for article '%s'.", article["id"])
+            if trace_callback:
+                trace_callback(f"Failed to convert article {article['id']}.")

+    if trace_callback:
+        trace_callback("Generating narrative summaries for articles...")
    summaries = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_article = {
@ -69,12 +89,20 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
                summary = future.result()
                if summary:
                    summaries.append(summary)
+                    if trace_callback:
+                        trace_callback(f"Generated summary for article {article['id']}.")
                else:
                    logger.warning("No summary generated for article '%s'.", article["id"])
+                    if trace_callback:
+                        trace_callback(f"Summary generation failed for article {article['id']}.")
            except Exception as e:
                logger.exception("Error generating summary for article '%s': %s", article["id"], e)
+                if trace_callback:
+                    trace_callback(f"Error generating summary for article {article['id']}.")

    final_summary = "\n\n".join(summaries)
    final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
+    if trace_callback:
+        trace_callback("Final summary generated. Starting TTS conversion.")
    logger.info("Final summary generated with length %d characters.", len(final_summary))
-    return final_summary
+    return final_summary
--- a/vibe/server.py
+++ b/vibe/server.py
@ -1,10 +1,13 @@
 from flask import Flask, send_file, request, jsonify, render_template
 import logging
-from .orchestrator import process_articles
-from .config import CACHE_DIR
+from vibe.orchestrator import process_articles
+from vibe.config import CACHE_DIR
+
+from flask_socketio import SocketIO, emit

 logger = logging.getLogger(__name__)
-app = Flask(__name__)
+app = Flask(__name__, template_folder="../templates")
+socketio = SocketIO(app)

@app.route("/process", methods=["POST"])
 def process_endpoint():
@ -18,15 +21,22 @@ def process_endpoint():
    new_only = data.get("new_only", False)

    logger.info("Processing request with user_info: %s, max_articles: %s, new_only: %s", user_info, max_articles, new_only)
-    final_summary = process_articles(user_info, max_articles=max_articles, new_only=new_only)
+    # Define trace_callback to emit trace messages via WebSockets
+    def trace_callback(message):
+        socketio.emit("trace", {"message": message})
+    final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
    if not final_summary.strip():
        logger.error("No summaries generated.")
        return jsonify({"error": "No summaries generated."}), 500

-    output_mp3 = f"{CACHE_DIR}/final_output.mp3"
+    import uuid, os
+    mp3_filename = f"final_{uuid.uuid4().hex}.mp3"
+    output_mp3 = os.path.join(CACHE_DIR, mp3_filename)
+
    try:
-        from .tts import text_to_speech
+        from vibe.tts import text_to_speech
        text_to_speech(final_summary, output_mp3)
+        trace_callback("Text-to-Speech conversion complete. MP3 file generated.")
    except Exception as e:
        logger.exception("TTS conversion failed: %s", e)
        return jsonify({"error": f"TTS conversion failed: {e}"}), 500
@ -38,5 +48,9 @@ def process_endpoint():
 def index():
    return render_template("index.html")

+@socketio.on("connect")
+def handle_connect():
+    emit("trace", {"message": "Connected to server. Ready to process your request."})
+
 if __name__ == "__main__":
-    app.run(debug=True)
+    socketio.run(app, debug=True)