from flask import Flask, render_template, request, jsonify from llm.agents import query_moa_chain import os import dotenv import asyncio import json # Load secrets from .env dotenv.load_dotenv() app = Flask(__name__) @app.route("/") def index(): return render_template("index.html") @app.route("/docs") def docs(): return render_template("docs.html") # === New models endpoint === @app.route("/models", methods=["GET"]) def get_models(): try: with open("llm/model_config.json", "r") as f: config = json.load(f) models = [{"id": model_id, "name": model_id.split(":")[0].split("/")[-1].replace("-", " ").title()} for model_id in config["models"].keys()] return jsonify(models) except Exception as e: return jsonify({"error": str(e)}), 500 @app.route("/chat", methods=["POST"]) def chat(): data = request.get_json() user_input = data.get("prompt", "") settings = data.get("settings", {}) if not user_input: return jsonify({"error": "Empty prompt."}), 400 try: # Fully async call to query MoA chain final_response = asyncio.run(query_moa_chain(user_input, settings)) return jsonify({"response": final_response}) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=False) # Hugging Face uses port 7860