|
import time
|
|
import gradio as gr
|
|
import requests
|
|
from client import respond
|
|
from huggingface_hub.errors import HfHubHTTPError
|
|
|
|
|
|
"""
|
|
API Huggingface some time return 503 error, so we need to retry multiple times
|
|
"""
|
|
|
|
|
|
def robust_respond(*args, **kwargs):
|
|
max_retries = 10
|
|
wait_time = 2
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
yield from respond(*args, **kwargs)
|
|
return
|
|
except HfHubHTTPError as e:
|
|
if "503" in str(e):
|
|
print(
|
|
f"Attempt {attempt+1}: Hugging Face API is down. Retrying in {wait_time}s..."
|
|
)
|
|
time.sleep(wait_time)
|
|
wait_time *= 2
|
|
else:
|
|
yield f"Error: {str(e)}"
|
|
return
|
|
|
|
yield "Server busy right now !"
|
|
|
|
|
|
chatbot = gr.Chatbot(height=600)
|
|
|
|
demo = gr.ChatInterface(
|
|
robust_respond,
|
|
additional_inputs=[
|
|
gr.Textbox(value="", label="System message"),
|
|
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
|
gr.Slider(
|
|
minimum=0.1,
|
|
maximum=1.0,
|
|
value=0.95,
|
|
step=0.05,
|
|
label="Top-P",
|
|
),
|
|
],
|
|
fill_height=True,
|
|
chatbot=chatbot,
|
|
theme="Nymbo/Nymbo_Theme",
|
|
)
|
|
|