from fastapi import FastAPI, Request from fastapi.responses import JSONResponse from typing import Dict, List from llama_cpp import Llama app = FastAPI() llm = Llama.from_pretrained( repo_id="microsoft/phi-4-gguf", filename="phi-4-q4.gguf", ) @app.get('/') def home(): return "

home

" @app.post("/chat") async def chat(request: Request): try: data = await request.json() messages = data.get("messages", []) response = llm.create_chat_completion( messages=messages ) return JSONResponse(content={"response": response['choices'][0]['message']['content']}) except Exception as e: return JSONResponse(content={"error": str(e)}, status_code=500) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)