Spaces:
Running
Running
File size: 5,495 Bytes
7831eba 9d49e57 76a7d46 7831eba a7d91d4 37a3c87 a7d91d4 b752df1 b035ea0 3a0295e 3017ebf 3a0295e f071706 6fafd7a 19778bd 3a0295e b752df1 b035ea0 3017ebf b035ea0 b752df1 3017ebf b752df1 76a7d46 b752df1 7831eba 555ac42 7831eba c7fd9ac 7831eba b5fab19 8baca64 7831eba 0cd27a0 7831eba 555ac42 8baca64 408d3e1 7831eba 408d3e1 8baca64 408d3e1 890c8a8 408d3e1 890c8a8 7831eba 97f173f 7831eba 97f173f 7831eba 6fafd7a d5b1c0a 70ac69e 9436706 051148e 3910665 7831eba d5b1c0a 6fafd7a 7f56d3f 4323b57 7831eba 793da93 7831eba 3910665 555ac42 7831eba 408d3e1 d8d19ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
import requests
from openai import OpenAI, AsyncOpenAI
clients = {}
token = os.getenv('API_KEY')
#try:
clients['32B-Pro (beta)'] = [
OpenAI(api_key=token, base_url=os.getenv('RUADAPT_PRO_PATH')),
requests.get(os.getenv('RUADAPT_PRO_PATH') + '/models', headers={"Authorization": f"Bearer {token}"}).json()['data'][0]['id']
]
clients['14B-R1 (preview)'] = [
OpenAI(api_key=token, base_url=os.getenv('RUADAPT_QWEN_14_R1_PATH')),
requests.get(os.getenv('RUADAPT_QWEN_14_R1_PATH') + '/models', headers={"Authorization": f"Bearer {token}"}).json()['data'][0]['id']
]
#clients['7B-Lite (beta)'] = [
# OpenAI(api_key=token, base_url=os.getenv('RUADAPT_LITE_PATH')),
# requests.get(os.getenv('RUADAPT_LITE_PATH') + '/models', headers={"Authorization": f"Bearer {token}"}).json()['data'][0]['id']
#]
#except:
# pass
try:
clients['32B QWQ (experimental, without any additional tuning after LEP!)'] = [
OpenAI(api_key=token,
base_url=os.getenv('MODEL_NAME_OR_PATH_QWQ')),
requests.get(os.getenv('MODEL_NAME_OR_PATH_QWQ') + '/models', headers={"Authorization": f"Bearer {token}"}).json()['data'][0]['id']]
except:
pass
try:
clients['7B (work in progress)'] = [OpenAI(api_key=os.getenv('API_KEY'), base_url=os.getenv('MODEL_NAME_OR_PATH_7B')), requests.get(os.getenv('MODEL_NAME_OR_PATH_7B') + '/models').json()['data'][0]['id']]
except:
pass
try:
clients['3B'] = [OpenAI(api_key=os.getenv('API_KEY'), base_url=os.getenv('MODEL_NAME_OR_PATH_3B')), requests.get(os.getenv('MODEL_NAME_OR_PATH_3B') + '/models').json()['data'][0]['id']]
except:
pass
def respond(
message,
history: list[tuple[str, str]],
model_name,
system_message,
max_tokens,
temperature,
top_p,
repetition_penalty
):
messages = []
if len(system_message.strip()) > 0:
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
res = clients[model_name][0].chat.completions.create(
model=clients[model_name][1],
messages=messages,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
stream=True,
extra_body={
"repetition_penalty": repetition_penalty,
"add_generation_prompt": True,
}
)
#print(res)
for message in res:
#print(message)
token = message.choices[0].delta.content
print(type(token))
response += token
print(response)
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
options = ["32B-Pro (beta)", '14B-R1 (preview)', "32B QWQ (experimental, without any additional tuning after LEP!)", "7B (work in progress)", "3B"]
options = options[:2]
system_old = "You are a helpful and harmless assistant. You should think step-by-step. First, reason (the user does not see your reasoning), then give your final answer."
system_new = "Ты Руадапт - полезный и дружелюбный интеллектуальный ассистент для помощи пользователям в их вопросах."
system_new2 = "Ты — Руадапт, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
latex_delimiters = [{
"left": "\\(",
"right": "\\)",
"display": True
}, {
"left": "\\begin\{equation\}",
"right": "\\end\{equation\}",
"display": True
}, {
"left": "\\begin\{align\}",
"right": "\\end\{align\}",
"display": True
}, {
"left": "\\begin\{alignat\}",
"right": "\\end\{alignat\}",
"display": True
}, {
"left": "\\begin\{gather\}",
"right": "\\end\{gather\}",
"display": True
}, {
"left": "\\begin\{CD\}",
"right": "\\end\{CD\}",
"display": True
}, {
"left": "\\[",
"right": "\\]",
"display": True
}, {"left": "$$", "right": "$$", "display": True}]
chatbot = gr.Chatbot(label="Chatbot",
scale=1,
height=400,
latex_delimiters=latex_delimiters)
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=options, label="Model:", value=options[0]),
gr.Textbox(value="", label="System message"),
gr.Slider(minimum=1, maximum=4096*2, value=4096, step=2, label="Max new tokens"),
gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
],
chatbot=chatbot,
concurrency_limit=10
)
if __name__ == "__main__":
#print(requests.get(os.getenv('MODEL_NAME_OR_PATH')[:-3] + '/docs'))
demo.launch(share=True)
|