Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request | |
from fastapi.responses import JSONResponse | |
from typing import Dict, List | |
from llama_cpp import Llama | |
app = FastAPI() | |
llm = Llama.from_pretrained( | |
repo_id="microsoft/phi-4-gguf", | |
filename="phi-4-q4.gguf", | |
) | |
def home(): | |
return "<h1>home</h1>" | |
async def chat(request: Request): | |
try: | |
data = await request.json() | |
messages = data.get("messages", []) | |
response = llm.create_chat_completion( | |
messages=messages | |
) | |
return JSONResponse(content={"response": response['choices'][0]['message']['content']}) | |
except Exception as e: | |
return JSONResponse(content={"error": str(e)}, status_code=500) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) |