Spaces:
Running
Running
File size: 3,055 Bytes
038f313 050af7a 038f313 050af7a 038f313 3a64d68 038f313 050af7a 8beac4b 038f313 050af7a 038f313 3a64d68 038f313 3a64d68 038f313 3a64d68 050af7a 038f313 050af7a 038f313 3a64d68 050af7a 3a64d68 050af7a 3a64d68 050af7a 3a64d68 038f313 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
from openai import OpenAI
import os
# Load the Hugging Face access token from environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
# Initialize the OpenAI client with Hugging Face's serverless API
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Frequency penalty: {frequency_penalty}, Seed: {seed}")
# Construct the messages list for the conversation context
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
print(f"Added user message to context: {val[0]}")
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
print(f"Added assistant message to context: {val[1]}")
messages.append({"role": "user", "content": message})
response = ""
print("Sending request to OpenAI API.")
for message in client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
token = message.choices[0].delta.content
print(f"Received token: {token}")
response += token
yield response
print("Completed response generation.")
# Initialize the chatbot interface
chatbot = gr.Chatbot(height=600)
print("Chatbot interface created.")
# Create the Gradio interface with additional inputs for the new parameters
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty",
),
gr.Slider(
minimum=-1,
maximum=2**31 - 1,
value=-1,
step=1,
label="Seed",
),
],
fill_height=True,
chatbot=chatbot,
theme="Nymbo/Nymbo_Theme",
)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch() |