Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,11 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
6 |
"""
|
7 |
import requests
|
8 |
|
|
|
9 |
|
10 |
-
client =
|
|
|
|
|
11 |
|
12 |
|
13 |
def respond(
|
@@ -33,14 +36,20 @@ def respond(
|
|
33 |
|
34 |
response = ""
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
stream=True,
|
40 |
temperature=temperature,
|
41 |
top_p=top_p,
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
token = message.choices[0].delta.content
|
45 |
|
46 |
response += token
|
@@ -69,5 +78,5 @@ demo = gr.ChatInterface(
|
|
69 |
|
70 |
|
71 |
if __name__ == "__main__":
|
72 |
-
print(requests.get(os.getenv('MODEL_NAME_OR_PATH')[:-3] + '/docs'))
|
73 |
demo.launch(share=True)
|
|
|
6 |
"""
|
7 |
import requests
|
8 |
|
9 |
+
from openai import OpenAI
|
10 |
|
11 |
+
client = OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH'))
|
12 |
+
|
13 |
+
#client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
|
14 |
|
15 |
|
16 |
def respond(
|
|
|
36 |
|
37 |
response = ""
|
38 |
|
39 |
+
res = client.chat.completions.create(
|
40 |
+
model='RefalMachine/ruadapt_qwen2.5_7B_ext_u48_instruct',
|
41 |
+
messages=messages,
|
|
|
42 |
temperature=temperature,
|
43 |
top_p=top_p,
|
44 |
+
max_tokens=max_tokens,
|
45 |
+
stream=True,
|
46 |
+
extra_body={
|
47 |
+
"repetition_penalty": 1.0,
|
48 |
+
"add_generation_prompt": True,
|
49 |
+
}
|
50 |
+
)
|
51 |
+
|
52 |
+
for message in res:
|
53 |
token = message.choices[0].delta.content
|
54 |
|
55 |
response += token
|
|
|
78 |
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
+
#print(requests.get(os.getenv('MODEL_NAME_OR_PATH')[:-3] + '/docs'))
|
82 |
demo.launch(share=True)
|