from flask import Flask, render_template, request, jsonify
from llm.agents import query_moa_chain
import os
import dotenv
import asyncio
import json

# Load secrets from .env
dotenv.load_dotenv()

app = Flask(__name__)

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/docs")
def docs():
    return render_template("docs.html")

# === New models endpoint ===
@app.route("/models", methods=["GET"])
def get_models():
    try:
        with open("llm/model_config.json", "r") as f:
            config = json.load(f)
        models = [{"id": model_id, "name": model_id.split(":")[0].split("/")[-1].replace("-", " ").title()} for model_id in config["models"].keys()]
        return jsonify(models)
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    user_input = data.get("prompt", "")
    settings = data.get("settings", {})

    if not user_input:
        return jsonify({"error": "Empty prompt."}), 400

    try:
        # Fully async call to query MoA chain
        final_response = asyncio.run(query_moa_chain(user_input, settings))

        return jsonify({"response": final_response})

    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=False)  # Hugging Face uses port 7860