Spaces:

Rsnarsna
/

phi4-gguf-model

Sleeping

App Files Files Community

Rsnarsna commited on Feb 28

Commit

b785739

verified ·

1 Parent(s): 0248255

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -43

app.py CHANGED Viewed

@@ -1,49 +1,35 @@
-from fastapi import FastAPI, HTTPException
 from llama_cpp import Llama
-import os
-import requests
 app = FastAPI()
-MODEL_URL = "https://huggingface.co/microsoft/phi-4-gguf/resolve/main/phi-4-q4.gguf"
-MODEL_PATH = "model/phi-4-q4.gguf"
-# Function to download the GGUF model
-def download_model(url, path):
-    if not os.path.exists(path):
-        os.makedirs(os.path.dirname(path), exist_ok=True)
-        print(f"Downloading model from {url}...")
-        response = requests.get(url, stream=True)
-        if response.status_code == 200:
-            with open(path, "wb") as f:
-                for chunk in response.iter_content(chunk_size=1024):
-                    if chunk:
-                        f.write(chunk)
-            print("Download complete.")
-        else:
-            raise HTTPException(status_code=500, detail="Failed to download model.")
-    else:
-        print("Model already downloaded.")
-# Download the model before loading
-download_model(MODEL_URL, MODEL_PATH)
-# Load the model
-try:
-    model = Llama(model_path=MODEL_PATH)
-    print("Model Loaded Successfully")
-except Exception as e:
-    raise HTTPException(status_code=500, detail=f"Model loading failed: {str(e)}")
-@app.get("/")
-def root():
-    return {"message": "Phi-4 GGUF Model Inference API"}
-@app.post("/generate/")
-def generate(prompt: str):
     try:
-        output = model(prompt, max_tokens=200)
-        return {"response": output["choices"][0]["text"]}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Inference failed: {str(e)}")

+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from typing import Dict, List
 from llama_cpp import Llama
 app = FastAPI()
+llm = Llama.from_pretrained(
+    repo_id="microsoft/phi-4-gguf",
+    filename="phi-4-q4.gguf",
+)
+@app.get('/')
+def home():
+    return "<h1>home</h1>"
+@app.post("/chat")
+async def chat(request: Request):
     try:
+        data = await request.json()
+        messages = data.get("messages", [])
+        response = llm.create_chat_completion(
+            messages=messages
+        )
+        return JSONResponse(content={"response": response['choices'][0]['message']['content']})
     except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)