Lhumpal commited on
Commit
1080421
·
verified ·
1 Parent(s): 722ee20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -15,7 +15,8 @@ if hf_token:
15
  else:
16
  raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
17
 
18
- model = ollama.pull('llama3.2')
 
19
 
20
  class ChatRequest(BaseModel):
21
  message: str
@@ -35,13 +36,14 @@ async def chat(request: ChatRequest):
35
  start_time = time.time() # Start measuring time
36
 
37
  # Simulate getting the response from the model
38
- response = ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': request.message}])
 
39
  response = str(response)
40
 
41
  end_time = time.time() # End measuring time
42
  response_time = end_time - start_time # Calculate the response time
43
 
44
- # Return response with model status, response, and response time
45
  return {"model_status": model.status, "response": response, "response_time": response_time}
 
46
  except Exception as e:
47
  raise HTTPException(status_code=500, detail=str(e))
 
15
  else:
16
  raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
17
 
18
+ # model = ollama.pull('llama3.2')
19
+ model = ollama.pull('hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M')
20
 
21
  class ChatRequest(BaseModel):
22
  message: str
 
36
  start_time = time.time() # Start measuring time
37
 
38
  # Simulate getting the response from the model
39
+ # response = ollama.chat(model='Llama-3.2-3B-Instruct-GGUF:Q4_K_M', messages=[{'role': 'user', 'content': request.message}])
40
+ response = "pending"
41
  response = str(response)
42
 
43
  end_time = time.time() # End measuring time
44
  response_time = end_time - start_time # Calculate the response time
45
 
 
46
  return {"model_status": model.status, "response": response, "response_time": response_time}
47
+
48
  except Exception as e:
49
  raise HTTPException(status_code=500, detail=str(e))