Lhumpal commited on
Commit
4083e2d
·
verified ·
1 Parent(s): 53e3550

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -7
app.py CHANGED
@@ -1,3 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from huggingface_hub import InferenceClient
@@ -8,10 +72,16 @@ app = FastAPI()
8
  # Get the token from the environment variable
9
  hf_token = os.environ.get("HF_TOKEN")
10
 
11
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=hf_token)
 
 
 
 
 
12
 
13
  class ChatRequest(BaseModel):
14
  message: str
 
15
  system_message: str = "You are a friendly Chatbot."
16
  max_tokens: int = 512
17
  temperature: float = 0.7
@@ -23,18 +93,25 @@ class ChatResponse(BaseModel):
23
  @app.post("/chat", response_model=ChatResponse)
24
  async def chat(request: ChatRequest):
25
  try:
26
- messages = []
27
- messages.append({"role": "system", "content": request.system_message})
 
 
 
 
28
  messages.append({"role": "user", "content": request.message})
29
 
30
- response = client.chat_completion(
 
31
  messages,
32
  max_tokens=request.max_tokens,
 
33
  temperature=request.temperature,
34
  top_p=request.top_p,
35
- )
36
-
 
 
37
  return {"response": response}
38
-
39
  except Exception as e:
40
  raise HTTPException(status_code=500, detail=str(e))
 
1
+ # from fastapi import FastAPI, HTTPException
2
+ # from pydantic import BaseModel
3
+ # from huggingface_hub import InferenceClient
4
+ # import os
5
+
6
+ # app = FastAPI()
7
+
8
+ # # Get the token from the environment variable
9
+ # hf_token = os.environ.get("HF_TOKEN")
10
+
11
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=hf_token)
12
+
13
+ # class ChatRequest(BaseModel):
14
+ # message: str
15
+ # system_message: str = "You are a friendly Chatbot."
16
+ # max_tokens: int = 512
17
+ # temperature: float = 0.7
18
+ # top_p: float = 0.95
19
+
20
+ # class ChatResponse(BaseModel):
21
+ # response: str
22
+
23
+ # @app.post("/chat", response_model=ChatResponse)
24
+ # async def chat(request: ChatRequest):
25
+ # try:
26
+ # messages = []
27
+ # messages.append({"role": "system", "content": request.system_message})
28
+ # messages.append({"role": "user", "content": request.message})
29
+
30
+ # response = client.chat_completion(
31
+ # messages,
32
+ # max_tokens=request.max_tokens,
33
+ # temperature=request.temperature,
34
+ # top_p=request.top_p,
35
+ # )
36
+
37
+ # return {"response": response}
38
+
39
+ # except Exception as e:
40
+ # raise HTTPException(status_code=500, detail=str(e))
41
+
42
+ # from fastapi import FastAPI
43
+ # from fastapi.responses import JSONResponse
44
+ # from fastapi import Request
45
+ # from huggingface_hub import InferenceClient
46
+
47
+ # app = FastAPI()
48
+
49
+ # @app.post("/")
50
+ # async def greet_json(request: Request):
51
+ # input_data = await request.json()
52
+ # # number = input_data.get("number")
53
+
54
+ # # tripled_number = number * 2
55
+ # # return {"message": f"Your input number is: {number}, your doubled number is: {tripled_number}"}
56
+ # user_input = input_data.get("user_input")
57
+
58
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
59
+ # # Get the response from the model
60
+ # response = client(user_input)
61
+
62
+ # # assistant_response = client.text_generation(user_input)
63
+ # assistant_response = "I am assistant."
64
+ # return {"assistant_message": f"Your input message is: {user_input}, assistant_response is: {response}"}
65
  from fastapi import FastAPI, HTTPException
66
  from pydantic import BaseModel
67
  from huggingface_hub import InferenceClient
 
72
  # Get the token from the environment variable
73
  hf_token = os.environ.get("HF_TOKEN")
74
 
75
+ if hf_token:
76
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=hf_token)
77
+ else:
78
+ raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
79
+
80
+ # Rest of your code...
81
 
82
  class ChatRequest(BaseModel):
83
  message: str
84
+ history: list[tuple[str, str]] = []
85
  system_message: str = "You are a friendly Chatbot."
86
  max_tokens: int = 512
87
  temperature: float = 0.7
 
93
  @app.post("/chat", response_model=ChatResponse)
94
  async def chat(request: ChatRequest):
95
  try:
96
+ messages = [{"role": "system", "content": request.system_message}]
97
+ for val in request.history:
98
+ if val[0]:
99
+ messages.append({"role": "user", "content": val[0]})
100
+ if val[1]:
101
+ messages.append({"role": "assistant", "content": val[1]})
102
  messages.append({"role": "user", "content": request.message})
103
 
104
+ response = ""
105
+ for message in client.chat_completion(
106
  messages,
107
  max_tokens=request.max_tokens,
108
+ stream=True,
109
  temperature=request.temperature,
110
  top_p=request.top_p,
111
+ ):
112
+ token = message.choices[0].delta.content
113
+ response += token
114
+
115
  return {"response": response}
 
116
  except Exception as e:
117
  raise HTTPException(status_code=500, detail=str(e))