File size: 1,965 Bytes
afcda5c
ab1bdf3
afcda5c
ab1bdf3
afcda5c
03cc6aa
8cfb246
afcda5c
 
3a1c398
8cfb246
afcda5c
 
 
372b193
afcda5c
8cfb246
ab1bdf3
 
 
 
 
 
 
 
 
 
 
6d6790a
ab1bdf3
 
afcda5c
8cfb246
 
 
 
 
 
 
 
 
 
 
 
 
 
ab1bdf3
 
 
 
03cc6aa
372b193
afcda5c
ab1bdf3
 
 
afcda5c
ab1bdf3
 
 
 
8cfb246
 
03cc6aa
3a1c398
ab1bdf3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import gradio as gr
from openai import OpenAI


title = "ERNIE 4.5 Turbo: BAIDU's LLM"
description = """
- Official Website: <https://yiyan.baidu.com/> (UI in Chinese)
- API services: [Qianfan Large Model Platform](https://cloud.baidu.com/product-s/qianfan_home) (cloud platform providing LLM services, UI in Chinese)
- [ERNIE 4.5 Turbo Demo](https://huggingface.co/spaces/PaddlePaddle/ernie_4.5_turbo_demo) |  [ERNIE X1 Turbo Demo](https://huggingface.co/spaces/PaddlePaddle/ernie_x1_turbo_demo)
"""


qianfan_api_key = os.getenv("QIANFAN_TOKEN")
qianfan_model = "ernie-4.5-turbo-32k"

client = OpenAI(base_url="https://qianfan.baidubce.com/v2", api_key=qianfan_api_key)


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]
    messages.extend(history)
    messages.append({"role": "user", "content": message})

    response = client.chat.completions.create(
        model=qianfan_model,
        messages=messages,
        max_completion_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True,
    )

    output_message = ""
    for chunk in response:
        token = chunk.choices[0].delta.content
        output_message += token
        yield output_message


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=2, maximum=12288, value=2048, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.7,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title=title,
    description=description,
    type='messages',
    concurrency_limit=50
)

if __name__ == "__main__":
    demo.launch()