Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,18 +6,18 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
6 |
"""
|
7 |
import requests
|
8 |
|
9 |
-
from openai import OpenAI
|
10 |
|
11 |
clients = {
|
12 |
-
'32B (work in progress)': [
|
13 |
-
'32B QWQ (experimental, without any additional tuning after LEP!)': [
|
14 |
-
'7B (work in progress)': [
|
15 |
-
'3B': [
|
16 |
}
|
17 |
#client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
|
18 |
|
19 |
|
20 |
-
def respond(
|
21 |
message,
|
22 |
history: list[tuple[str, str]],
|
23 |
model_name,
|
@@ -41,7 +41,7 @@ def respond(
|
|
41 |
|
42 |
response = ""
|
43 |
|
44 |
-
res = clients[model_name][0].chat.completions.create(
|
45 |
model=clients[model_name][1],
|
46 |
messages=messages,
|
47 |
temperature=temperature,
|
|
|
6 |
"""
|
7 |
import requests
|
8 |
|
9 |
+
from openai import OpenAI, AsyncOpenAI
|
10 |
|
11 |
clients = {
|
12 |
+
'32B (work in progress)': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_32B')), os.getenv('MODEL_NAME_32B')],
|
13 |
+
'32B QWQ (experimental, without any additional tuning after LEP!)': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_QWQ')), os.getenv('MODEL_NAME_QWQ')],
|
14 |
+
'7B (work in progress)': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_7B')), 'RefalMachine/ruadapt_qwen2.5_7B_ext_u48_instruct'],
|
15 |
+
'3B': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_3B')), 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4']
|
16 |
}
|
17 |
#client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
|
18 |
|
19 |
|
20 |
+
async def respond(
|
21 |
message,
|
22 |
history: list[tuple[str, str]],
|
23 |
model_name,
|
|
|
41 |
|
42 |
response = ""
|
43 |
|
44 |
+
res = await clients[model_name][0].chat.completions.create(
|
45 |
model=clients[model_name][1],
|
46 |
messages=messages,
|
47 |
temperature=temperature,
|