Rsnarsna commited on
Commit
b785739
·
verified ·
1 Parent(s): 0248255

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -43
app.py CHANGED
@@ -1,49 +1,35 @@
1
- from fastapi import FastAPI, HTTPException
 
 
2
  from llama_cpp import Llama
3
- import os
4
- import requests
5
 
6
  app = FastAPI()
7
 
8
- MODEL_URL = "https://huggingface.co/microsoft/phi-4-gguf/resolve/main/phi-4-q4.gguf"
9
- MODEL_PATH = "model/phi-4-q4.gguf"
10
-
11
- # Function to download the GGUF model
12
- def download_model(url, path):
13
- if not os.path.exists(path):
14
- os.makedirs(os.path.dirname(path), exist_ok=True)
15
- print(f"Downloading model from {url}...")
16
- response = requests.get(url, stream=True)
17
- if response.status_code == 200:
18
- with open(path, "wb") as f:
19
- for chunk in response.iter_content(chunk_size=1024):
20
- if chunk:
21
- f.write(chunk)
22
- print("Download complete.")
23
- else:
24
- raise HTTPException(status_code=500, detail="Failed to download model.")
25
- else:
26
- print("Model already downloaded.")
27
-
28
- # Download the model before loading
29
- download_model(MODEL_URL, MODEL_PATH)
30
-
31
- # Load the model
32
- try:
33
- model = Llama(model_path=MODEL_PATH)
34
- print("Model Loaded Successfully")
35
- except Exception as e:
36
- raise HTTPException(status_code=500, detail=f"Model loading failed: {str(e)}")
37
-
38
-
39
- @app.get("/")
40
- def root():
41
- return {"message": "Phi-4 GGUF Model Inference API"}
42
-
43
- @app.post("/generate/")
44
- def generate(prompt: str):
45
  try:
46
- output = model(prompt, max_tokens=200)
47
- return {"response": output["choices"][0]["text"]}
 
 
 
 
 
 
48
  except Exception as e:
49
- raise HTTPException(status_code=500, detail=f"Inference failed: {str(e)}")
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import JSONResponse
3
+ from typing import Dict, List
4
  from llama_cpp import Llama
 
 
5
 
6
  app = FastAPI()
7
 
8
+ llm = Llama.from_pretrained(
9
+ repo_id="microsoft/phi-4-gguf",
10
+ filename="phi-4-q4.gguf",
11
+ )
12
+
13
+ @app.get('/')
14
+ def home():
15
+ return "<h1>home</h1>"
16
+
17
+
18
+ @app.post("/chat")
19
+ async def chat(request: Request):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  try:
21
+ data = await request.json()
22
+ messages = data.get("messages", [])
23
+
24
+ response = llm.create_chat_completion(
25
+ messages=messages
26
+ )
27
+
28
+ return JSONResponse(content={"response": response['choices'][0]['message']['content']})
29
  except Exception as e:
30
+ return JSONResponse(content={"error": str(e)}, status_code=500)
31
+
32
+
33
+ if __name__ == "__main__":
34
+ import uvicorn
35
+ uvicorn.run(app, host="0.0.0.0", port=7860)