Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,8 @@ if hf_token:
|
|
15 |
else:
|
16 |
raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
|
17 |
|
18 |
-
model = ollama.pull('llama3.2')
|
|
|
19 |
|
20 |
class ChatRequest(BaseModel):
|
21 |
message: str
|
@@ -35,13 +36,14 @@ async def chat(request: ChatRequest):
|
|
35 |
start_time = time.time() # Start measuring time
|
36 |
|
37 |
# Simulate getting the response from the model
|
38 |
-
response = ollama.chat(model='
|
|
|
39 |
response = str(response)
|
40 |
|
41 |
end_time = time.time() # End measuring time
|
42 |
response_time = end_time - start_time # Calculate the response time
|
43 |
|
44 |
-
# Return response with model status, response, and response time
|
45 |
return {"model_status": model.status, "response": response, "response_time": response_time}
|
|
|
46 |
except Exception as e:
|
47 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
15 |
else:
|
16 |
raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
|
17 |
|
18 |
+
# model = ollama.pull('llama3.2')
|
19 |
+
model = ollama.pull('hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M')
|
20 |
|
21 |
class ChatRequest(BaseModel):
|
22 |
message: str
|
|
|
36 |
start_time = time.time() # Start measuring time
|
37 |
|
38 |
# Simulate getting the response from the model
|
39 |
+
# response = ollama.chat(model='Llama-3.2-3B-Instruct-GGUF:Q4_K_M', messages=[{'role': 'user', 'content': request.message}])
|
40 |
+
response = "pending"
|
41 |
response = str(response)
|
42 |
|
43 |
end_time = time.time() # End measuring time
|
44 |
response_time = end_time - start_time # Calculate the response time
|
45 |
|
|
|
46 |
return {"model_status": model.status, "response": response, "response_time": response_time}
|
47 |
+
|
48 |
except Exception as e:
|
49 |
raise HTTPException(status_code=500, detail=str(e))
|