File size: 830 Bytes
b785739
 
 
7def0b3
 
 
 
b785739
 
 
 
 
 
 
 
 
 
 
 
7def0b3
b785739
 
 
 
 
 
 
 
7def0b3
b785739
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from typing import Dict, List
from llama_cpp import Llama

app = FastAPI()

llm = Llama.from_pretrained(
    repo_id="microsoft/phi-4-gguf",
    filename="phi-4-q4.gguf",
)

@app.get('/')
def home():
    return "<h1>home</h1>"


@app.post("/chat")
async def chat(request: Request):
    try:
        data = await request.json()
        messages = data.get("messages", [])

        response = llm.create_chat_completion(
            messages=messages
        )

        return JSONResponse(content={"response": response['choices'][0]['message']['content']})
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)