Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,49 +1,35 @@
|
|
1 |
-
from fastapi import FastAPI,
|
|
|
|
|
2 |
from llama_cpp import Llama
|
3 |
-
import os
|
4 |
-
import requests
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
if chunk:
|
21 |
-
f.write(chunk)
|
22 |
-
print("Download complete.")
|
23 |
-
else:
|
24 |
-
raise HTTPException(status_code=500, detail="Failed to download model.")
|
25 |
-
else:
|
26 |
-
print("Model already downloaded.")
|
27 |
-
|
28 |
-
# Download the model before loading
|
29 |
-
download_model(MODEL_URL, MODEL_PATH)
|
30 |
-
|
31 |
-
# Load the model
|
32 |
-
try:
|
33 |
-
model = Llama(model_path=MODEL_PATH)
|
34 |
-
print("Model Loaded Successfully")
|
35 |
-
except Exception as e:
|
36 |
-
raise HTTPException(status_code=500, detail=f"Model loading failed: {str(e)}")
|
37 |
-
|
38 |
-
|
39 |
-
@app.get("/")
|
40 |
-
def root():
|
41 |
-
return {"message": "Phi-4 GGUF Model Inference API"}
|
42 |
-
|
43 |
-
@app.post("/generate/")
|
44 |
-
def generate(prompt: str):
|
45 |
try:
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
except Exception as e:
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request
|
2 |
+
from fastapi.responses import JSONResponse
|
3 |
+
from typing import Dict, List
|
4 |
from llama_cpp import Llama
|
|
|
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
8 |
+
llm = Llama.from_pretrained(
|
9 |
+
repo_id="microsoft/phi-4-gguf",
|
10 |
+
filename="phi-4-q4.gguf",
|
11 |
+
)
|
12 |
+
|
13 |
+
@app.get('/')
|
14 |
+
def home():
|
15 |
+
return "<h1>home</h1>"
|
16 |
+
|
17 |
+
|
18 |
+
@app.post("/chat")
|
19 |
+
async def chat(request: Request):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
try:
|
21 |
+
data = await request.json()
|
22 |
+
messages = data.get("messages", [])
|
23 |
+
|
24 |
+
response = llm.create_chat_completion(
|
25 |
+
messages=messages
|
26 |
+
)
|
27 |
+
|
28 |
+
return JSONResponse(content={"response": response['choices'][0]['message']['content']})
|
29 |
except Exception as e:
|
30 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
31 |
+
|
32 |
+
|
33 |
+
if __name__ == "__main__":
|
34 |
+
import uvicorn
|
35 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|