File size: 3,055 Bytes
038f313
 
 
 
050af7a
038f313
 
 
 
050af7a
038f313
 
 
 
 
 
 
 
 
 
 
 
 
 
3a64d68
 
038f313
 
 
 
050af7a
8beac4b
038f313
050af7a
038f313
 
 
 
 
 
 
 
 
 
 
 
 
 
3a64d68
038f313
 
 
 
 
 
3a64d68
 
038f313
 
 
 
 
 
 
 
3a64d68
050af7a
038f313
 
 
 
050af7a
038f313
 
 
 
 
 
 
 
 
 
 
 
 
3a64d68
050af7a
3a64d68
 
 
 
 
 
 
050af7a
3a64d68
 
050af7a
3a64d68
038f313
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from openai import OpenAI
import os

# Load the Hugging Face access token from environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")

print("Access token loaded.")

# Initialize the OpenAI client with Hugging Face's serverless API
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency penalty: {frequency_penalty}, Seed: {seed}")

    # Construct the messages list for the conversation context
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
            print(f"Added user message to context: {val[0]}")
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
            print(f"Added assistant message to context: {val[1]}")

    messages.append({"role": "user", "content": message})

    response = ""
    print("Sending request to OpenAI API.")

    for message in client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        messages=messages,
    ):
        token = message.choices[0].delta.content
        print(f"Received token: {token}")
        response += token
        yield response

    print("Completed response generation.")

# Initialize the chatbot interface
chatbot = gr.Chatbot(height=600)

print("Chatbot interface created.")

# Create the Gradio interface with additional inputs for the new parameters
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P",
        ),
        gr.Slider(
            minimum=-2.0,
            maximum=2.0,
            value=0.0,
            step=0.1,
            label="Frequency Penalty",
        ),
        gr.Slider(
            minimum=-1,
            maximum=2**31 - 1,
            value=-1,
            step=1,
            label="Seed",
        ),
    ],
    fill_height=True,
    chatbot=chatbot,
    theme="Nymbo/Nymbo_Theme",
)
print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch()