Spaces:
Sleeping
Sleeping
File size: 830 Bytes
b785739 7def0b3 b785739 7def0b3 b785739 7def0b3 b785739 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from typing import Dict, List
from llama_cpp import Llama
app = FastAPI()
llm = Llama.from_pretrained(
repo_id="microsoft/phi-4-gguf",
filename="phi-4-q4.gguf",
)
@app.get('/')
def home():
return "<h1>home</h1>"
@app.post("/chat")
async def chat(request: Request):
try:
data = await request.json()
messages = data.get("messages", [])
response = llm.create_chat_completion(
messages=messages
)
return JSONResponse(content={"response": response['choices'][0]['message']['content']})
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |