Fixes
This commit is contained in:
parent
55215a0edb
commit
5bd4cf40a4
5 changed files with 80 additions and 20 deletions
|
|
@ -4,3 +4,4 @@ beautifulsoup4
|
||||||
soundfile
|
soundfile
|
||||||
docling
|
docling
|
||||||
kokoro
|
kokoro
|
||||||
|
Flask-SocketIO
|
||||||
|
|
@ -8,8 +8,11 @@
|
||||||
.container { max-width: 600px; margin: auto; }
|
.container { max-width: 600px; margin: auto; }
|
||||||
input[type="text"], textarea { width: 100%; padding: 10px; margin: 8px 0; }
|
input[type="text"], textarea { width: 100%; padding: 10px; margin: 8px 0; }
|
||||||
button { padding: 10px 20px; font-size: 16px; }
|
button { padding: 10px 20px; font-size: 16px; }
|
||||||
|
#status { border: 1px solid #ccc; padding: 10px; margin-top: 20px; max-height: 200px; overflow-y: auto; }
|
||||||
.hidden { display: none; }
|
.hidden { display: none; }
|
||||||
</style>
|
</style>
|
||||||
|
<!-- Include Socket.IO client library -->
|
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.6.1/socket.io.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="container">
|
<div class="container">
|
||||||
|
|
@ -22,14 +25,28 @@
|
||||||
<button type="submit">Submit</button>
|
<button type="submit">Submit</button>
|
||||||
</form>
|
</form>
|
||||||
<div id="status" class="hidden">
|
<div id="status" class="hidden">
|
||||||
<p>Processing your request, please wait...</p>
|
<p><strong>Status Updates:</strong></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
|
// Initialize Socket.IO connection and listen for trace messages
|
||||||
|
var socket = io();
|
||||||
|
socket.on('trace', function(data) {
|
||||||
|
var statusDiv = document.getElementById('status');
|
||||||
|
if (statusDiv.classList.contains('hidden')) {
|
||||||
|
statusDiv.classList.remove('hidden');
|
||||||
|
}
|
||||||
|
var p = document.createElement('p');
|
||||||
|
p.textContent = data.message;
|
||||||
|
statusDiv.appendChild(p);
|
||||||
|
});
|
||||||
|
|
||||||
document.getElementById('interestForm').addEventListener('submit', async function(e) {
|
document.getElementById('interestForm').addEventListener('submit', async function(e) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
const userInfo = document.getElementById('user_info').value;
|
var userInfo = document.getElementById('user_info').value;
|
||||||
document.getElementById('status').classList.remove('hidden');
|
var statusDiv = document.getElementById('status');
|
||||||
|
statusDiv.innerHTML = "<p><strong>Status Updates:</strong></p>";
|
||||||
|
statusDiv.classList.remove('hidden');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch('/process', {
|
const response = await fetch('/process', {
|
||||||
|
|
@ -60,7 +77,6 @@
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
alert('An error occurred: ' + error);
|
alert('An error occurred: ' + error);
|
||||||
}
|
}
|
||||||
document.getElementById('status').classList.add('hidden');
|
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,8 @@ logging.basicConfig(
|
||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
CACHE_DIR = "cache"
|
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
CACHE_DIR = os.path.join(BASE_DIR, "cache")
|
||||||
if not os.path.exists(CACHE_DIR):
|
if not os.path.exists(CACHE_DIR):
|
||||||
os.makedirs(CACHE_DIR)
|
os.makedirs(CACHE_DIR)
|
||||||
logger.debug("Created cache directory: %s", CACHE_DIR)
|
logger.debug("Created cache directory: %s", CACHE_DIR)
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from .tts import text_to_speech
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False):
|
def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=5, new_only=False, trace_callback=None):
|
||||||
"""
|
"""
|
||||||
Executes the full pipeline:
|
Executes the full pipeline:
|
||||||
1. Fetch arXiv articles.
|
1. Fetch arXiv articles.
|
||||||
|
|
@ -25,10 +25,15 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
||||||
7. Generate narrative summaries.
|
7. Generate narrative summaries.
|
||||||
8. Combine summaries into a final narrative.
|
8. Combine summaries into a final narrative.
|
||||||
"""
|
"""
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Starting pipeline: fetching arXiv articles...")
|
||||||
articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url)
|
articles = fetch_arxiv_list(force_refresh=new_only, arxiv_url=arxiv_url)
|
||||||
logger.info("Total articles fetched: %d", len(articles))
|
if trace_callback:
|
||||||
|
trace_callback(f"Fetched {len(articles)} articles from arXiv.")
|
||||||
|
|
||||||
if new_only:
|
if new_only:
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Filtering articles for new content based on cache...")
|
||||||
cached_articles = [f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")]
|
cached_articles = [f[:-4] for f in os.listdir(ARTICLES_CACHE_DIR) if f.endswith(".txt")]
|
||||||
if cached_articles:
|
if cached_articles:
|
||||||
def parse_id(id_str):
|
def parse_id(id_str):
|
||||||
|
|
@ -38,25 +43,40 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
||||||
return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
|
return (int(parts[0][:2]), int(parts[0][2:]), int(parts[1]))
|
||||||
most_recent = max(cached_articles, key=parse_id)
|
most_recent = max(cached_articles, key=parse_id)
|
||||||
articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
|
articles = [article for article in articles if parse_id(article["id"]) > parse_id(most_recent)]
|
||||||
logger.info("After filtering by most recent article id %s, %d articles remain.", most_recent, len(articles))
|
if trace_callback:
|
||||||
|
trace_callback(f"After filtering by most recent article id {most_recent}, {len(articles)} articles remain.")
|
||||||
else:
|
else:
|
||||||
logger.info("No cached articles found, proceeding with all fetched articles.")
|
if trace_callback:
|
||||||
|
trace_callback("No cached articles found; processing all fetched articles.")
|
||||||
|
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Performing relevance filtering via LLM...")
|
||||||
relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
|
relevant_ids = batch_relevance_filter(articles, user_info, llm_url=llm_url, model_name=model_name)
|
||||||
relevant_articles = [article for article in articles if article["id"] in relevant_ids]
|
relevant_articles = [article for article in articles if article["id"] in relevant_ids]
|
||||||
logger.info("Found %d relevant articles out of %d.", len(relevant_articles), len(articles))
|
if trace_callback:
|
||||||
|
trace_callback(f"Identified {len(relevant_articles)} relevant articles out of {len(articles)}.")
|
||||||
|
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Reranking articles based on relevance...")
|
||||||
reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
|
reranked_articles = rerank_articles(relevant_articles, user_info, llm_url=llm_url, model_name=model_name)
|
||||||
final_candidates = reranked_articles[:max_articles]
|
final_candidates = reranked_articles[:max_articles]
|
||||||
|
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Converting article PDFs to Markdown...")
|
||||||
articles_with_content = []
|
articles_with_content = []
|
||||||
for article in final_candidates:
|
for article in final_candidates:
|
||||||
content = fetch_and_convert_article(article)
|
content = fetch_and_convert_article(article)
|
||||||
if content:
|
if content:
|
||||||
articles_with_content.append((article, content))
|
articles_with_content.append((article, content))
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback(f"Converted article {article['id']} to Markdown.")
|
||||||
else:
|
else:
|
||||||
logger.warning("No content obtained for article '%s'.", article["id"])
|
logger.warning("No content obtained for article '%s'.", article["id"])
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback(f"Failed to convert article {article['id']}.")
|
||||||
|
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Generating narrative summaries for articles...")
|
||||||
summaries = []
|
summaries = []
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
future_to_article = {
|
future_to_article = {
|
||||||
|
|
@ -69,12 +89,20 @@ def process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, m
|
||||||
summary = future.result()
|
summary = future.result()
|
||||||
if summary:
|
if summary:
|
||||||
summaries.append(summary)
|
summaries.append(summary)
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback(f"Generated summary for article {article['id']}.")
|
||||||
else:
|
else:
|
||||||
logger.warning("No summary generated for article '%s'.", article["id"])
|
logger.warning("No summary generated for article '%s'.", article["id"])
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback(f"Summary generation failed for article {article['id']}.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Error generating summary for article '%s': %s", article["id"], e)
|
logger.exception("Error generating summary for article '%s': %s", article["id"], e)
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback(f"Error generating summary for article {article['id']}.")
|
||||||
|
|
||||||
final_summary = "\n\n".join(summaries)
|
final_summary = "\n\n".join(summaries)
|
||||||
final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
|
final_summary += f"\n\nThanks for listening to the report. Generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')} by vibe."
|
||||||
|
if trace_callback:
|
||||||
|
trace_callback("Final summary generated. Starting TTS conversion.")
|
||||||
logger.info("Final summary generated with length %d characters.", len(final_summary))
|
logger.info("Final summary generated with length %d characters.", len(final_summary))
|
||||||
return final_summary
|
return final_summary
|
||||||
|
|
@ -1,10 +1,13 @@
|
||||||
from flask import Flask, send_file, request, jsonify, render_template
|
from flask import Flask, send_file, request, jsonify, render_template
|
||||||
import logging
|
import logging
|
||||||
from .orchestrator import process_articles
|
from vibe.orchestrator import process_articles
|
||||||
from .config import CACHE_DIR
|
from vibe.config import CACHE_DIR
|
||||||
|
|
||||||
|
from flask_socketio import SocketIO, emit
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
app = Flask(__name__)
|
app = Flask(__name__, template_folder="../templates")
|
||||||
|
socketio = SocketIO(app)
|
||||||
|
|
||||||
@app.route("/process", methods=["POST"])
|
@app.route("/process", methods=["POST"])
|
||||||
def process_endpoint():
|
def process_endpoint():
|
||||||
|
|
@ -18,15 +21,22 @@ def process_endpoint():
|
||||||
new_only = data.get("new_only", False)
|
new_only = data.get("new_only", False)
|
||||||
|
|
||||||
logger.info("Processing request with user_info: %s, max_articles: %s, new_only: %s", user_info, max_articles, new_only)
|
logger.info("Processing request with user_info: %s, max_articles: %s, new_only: %s", user_info, max_articles, new_only)
|
||||||
final_summary = process_articles(user_info, max_articles=max_articles, new_only=new_only)
|
# Define trace_callback to emit trace messages via WebSockets
|
||||||
|
def trace_callback(message):
|
||||||
|
socketio.emit("trace", {"message": message})
|
||||||
|
final_summary = process_articles(user_info, arxiv_url=None, llm_url=None, model_name=None, max_articles=max_articles, new_only=new_only, trace_callback=trace_callback)
|
||||||
if not final_summary.strip():
|
if not final_summary.strip():
|
||||||
logger.error("No summaries generated.")
|
logger.error("No summaries generated.")
|
||||||
return jsonify({"error": "No summaries generated."}), 500
|
return jsonify({"error": "No summaries generated."}), 500
|
||||||
|
|
||||||
output_mp3 = f"{CACHE_DIR}/final_output.mp3"
|
import uuid, os
|
||||||
|
mp3_filename = f"final_{uuid.uuid4().hex}.mp3"
|
||||||
|
output_mp3 = os.path.join(CACHE_DIR, mp3_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from .tts import text_to_speech
|
from vibe.tts import text_to_speech
|
||||||
text_to_speech(final_summary, output_mp3)
|
text_to_speech(final_summary, output_mp3)
|
||||||
|
trace_callback("Text-to-Speech conversion complete. MP3 file generated.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("TTS conversion failed: %s", e)
|
logger.exception("TTS conversion failed: %s", e)
|
||||||
return jsonify({"error": f"TTS conversion failed: {e}"}), 500
|
return jsonify({"error": f"TTS conversion failed: {e}"}), 500
|
||||||
|
|
@ -38,5 +48,9 @@ def process_endpoint():
|
||||||
def index():
|
def index():
|
||||||
return render_template("index.html")
|
return render_template("index.html")
|
||||||
|
|
||||||
|
@socketio.on("connect")
|
||||||
|
def handle_connect():
|
||||||
|
emit("trace", {"message": "Connected to server. Ready to process your request."})
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(debug=True)
|
socketio.run(app, debug=True)
|
||||||
Loading…
Add table
Reference in a new issue