Lhumpal commited on
Commit
f49dbfe
·
verified ·
1 Parent(s): f35591b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -3,6 +3,7 @@ from pydantic import BaseModel
3
  from huggingface_hub import InferenceClient
4
  import os
5
  import ollama
 
6
 
7
  app = FastAPI()
8
 
@@ -14,8 +15,6 @@ if hf_token:
14
  else:
15
  raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
16
 
17
-
18
-
19
  model = ollama.pull('llama3.2')
20
 
21
  class ChatRequest(BaseModel):
@@ -28,27 +27,21 @@ class ChatRequest(BaseModel):
28
  class ChatResponse(BaseModel):
29
  model_status: str
30
  response: str
 
31
 
32
  @app.post("/chat", response_model=ChatResponse)
33
  async def chat(request: ChatRequest):
34
  try:
35
- # response = "".join(reversed(request.message))
36
- # messages = [
37
- # {"role": "system", "content": request.system_message},
38
- # {"role": "user", "content": request.message},
39
- # ]
40
-
41
- # response = client.chat_completion(
42
- # messages=messages,
43
- # max_tokens=request.max_tokens,
44
- # temperature=request.temperature,
45
- # top_p=request.top_p,
46
- # )
47
  response = ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Hello!'}])
48
  response = str(response)
49
 
 
 
50
 
51
- # return {"response": response.choices[0].message.content}
52
- return {"model_status": model.status, "response": response}
53
  except Exception as e:
54
- raise HTTPException(status_code=500, detail=str(e))
 
3
  from huggingface_hub import InferenceClient
4
  import os
5
  import ollama
6
+ import time # Import time module for measuring response time
7
 
8
  app = FastAPI()
9
 
 
15
  else:
16
  raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
17
 
 
 
18
  model = ollama.pull('llama3.2')
19
 
20
  class ChatRequest(BaseModel):
 
27
  class ChatResponse(BaseModel):
28
  model_status: str
29
  response: str
30
+ response_time: float # Add field for response time
31
 
32
  @app.post("/chat", response_model=ChatResponse)
33
  async def chat(request: ChatRequest):
34
  try:
35
+ start_time = time.time() # Start measuring time
36
+
37
+ # Simulate getting the response from the model
 
 
 
 
 
 
 
 
 
38
  response = ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Hello!'}])
39
  response = str(response)
40
 
41
+ end_time = time.time() # End measuring time
42
+ response_time = end_time - start_time # Calculate the response time
43
 
44
+ # Return response with model status, response, and response time
45
+ return {"model_status": model.status, "response": response, "response_time": response_time}
46
  except Exception as e:
47
+ raise HTTPException(status_code=500, detail=str(e))