Spaces:

Lhumpal
/

hb-llm

Sleeping

Lhumpal commited on Mar 24

Commit

1080421

verified ·

1 Parent(s): 722ee20

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,8 @@ if hf_token:
 else:
     raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
-model = ollama.pull('llama3.2')
 class ChatRequest(BaseModel):
     message: str
@@ -35,13 +36,14 @@ async def chat(request: ChatRequest):
         start_time = time.time()  # Start measuring time
         # Simulate getting the response from the model
-        response = ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': request.message}])
         response = str(response)
         end_time = time.time()  # End measuring time
         response_time = end_time - start_time  # Calculate the response time
-        # Return response with model status, response, and response time
         return {"model_status": model.status, "response": response, "response_time": response_time}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 else:
     raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
+# model = ollama.pull('llama3.2')
+model = ollama.pull('hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M')
 class ChatRequest(BaseModel):
     message: str
         start_time = time.time()  # Start measuring time
         # Simulate getting the response from the model
+        # response = ollama.chat(model='Llama-3.2-3B-Instruct-GGUF:Q4_K_M', messages=[{'role': 'user', 'content': request.message}])
+        response = "pending"
         response = str(response)
         end_time = time.time()  # End measuring time
         response_time = end_time - start_time  # Calculate the response time
         return {"model_status": model.status, "response": response, "response_time": response_time}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))