Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
5 |
import ollama
|
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
@@ -14,8 +15,6 @@ if hf_token:
|
|
14 |
else:
|
15 |
raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
|
16 |
|
17 |
-
|
18 |
-
|
19 |
model = ollama.pull('llama3.2')
|
20 |
|
21 |
class ChatRequest(BaseModel):
|
@@ -28,27 +27,21 @@ class ChatRequest(BaseModel):
|
|
28 |
class ChatResponse(BaseModel):
|
29 |
model_status: str
|
30 |
response: str
|
|
|
31 |
|
32 |
@app.post("/chat", response_model=ChatResponse)
|
33 |
async def chat(request: ChatRequest):
|
34 |
try:
|
35 |
-
|
36 |
-
|
37 |
-
#
|
38 |
-
# {"role": "user", "content": request.message},
|
39 |
-
# ]
|
40 |
-
|
41 |
-
# response = client.chat_completion(
|
42 |
-
# messages=messages,
|
43 |
-
# max_tokens=request.max_tokens,
|
44 |
-
# temperature=request.temperature,
|
45 |
-
# top_p=request.top_p,
|
46 |
-
# )
|
47 |
response = ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Hello!'}])
|
48 |
response = str(response)
|
49 |
|
|
|
|
|
50 |
|
51 |
-
#
|
52 |
-
return {"model_status": model.status, "response": response}
|
53 |
except Exception as e:
|
54 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
5 |
import ollama
|
6 |
+
import time # Import time module for measuring response time
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
|
|
15 |
else:
|
16 |
raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
|
17 |
|
|
|
|
|
18 |
model = ollama.pull('llama3.2')
|
19 |
|
20 |
class ChatRequest(BaseModel):
|
|
|
27 |
class ChatResponse(BaseModel):
|
28 |
model_status: str
|
29 |
response: str
|
30 |
+
response_time: float # Add field for response time
|
31 |
|
32 |
@app.post("/chat", response_model=ChatResponse)
|
33 |
async def chat(request: ChatRequest):
|
34 |
try:
|
35 |
+
start_time = time.time() # Start measuring time
|
36 |
+
|
37 |
+
# Simulate getting the response from the model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
response = ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Hello!'}])
|
39 |
response = str(response)
|
40 |
|
41 |
+
end_time = time.time() # End measuring time
|
42 |
+
response_time = end_time - start_time # Calculate the response time
|
43 |
|
44 |
+
# Return response with model status, response, and response time
|
45 |
+
return {"model_status": model.status, "response": response, "response_time": response_time}
|
46 |
except Exception as e:
|
47 |
+
raise HTTPException(status_code=500, detail=str(e))
|