RefalMachine commited on
Commit
408d3e1
·
verified ·
1 Parent(s): a7d91d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -6,8 +6,11 @@ For more information on `huggingface_hub` Inference API support, please check th
6
  """
7
  import requests
8
 
 
9
 
10
- client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
 
 
11
 
12
 
13
  def respond(
@@ -33,14 +36,20 @@ def respond(
33
 
34
  response = ""
35
 
36
- for message in client.chat_completion(
37
- messages,
38
- max_tokens=max_tokens,
39
- stream=True,
40
  temperature=temperature,
41
  top_p=top_p,
42
- #repetition_penalty=repetition_penalty
43
- ):
 
 
 
 
 
 
 
44
  token = message.choices[0].delta.content
45
 
46
  response += token
@@ -69,5 +78,5 @@ demo = gr.ChatInterface(
69
 
70
 
71
  if __name__ == "__main__":
72
- print(requests.get(os.getenv('MODEL_NAME_OR_PATH')[:-3] + '/docs'))
73
  demo.launch(share=True)
 
6
  """
7
  import requests
8
 
9
+ from openai import OpenAI
10
 
11
+ client = OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH'))
12
+
13
+ #client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
14
 
15
 
16
  def respond(
 
36
 
37
  response = ""
38
 
39
+ res = client.chat.completions.create(
40
+ model='RefalMachine/ruadapt_qwen2.5_7B_ext_u48_instruct',
41
+ messages=messages,
 
42
  temperature=temperature,
43
  top_p=top_p,
44
+ max_tokens=max_tokens,
45
+ stream=True,
46
+ extra_body={
47
+ "repetition_penalty": 1.0,
48
+ "add_generation_prompt": True,
49
+ }
50
+ )
51
+
52
+ for message in res:
53
  token = message.choices[0].delta.content
54
 
55
  response += token
 
78
 
79
 
80
  if __name__ == "__main__":
81
+ #print(requests.get(os.getenv('MODEL_NAME_OR_PATH')[:-3] + '/docs'))
82
  demo.launch(share=True)