BloodRain666 commited on
Commit
68581e5
·
verified ·
1 Parent(s): 2d4950f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -1,4 +1,32 @@
 
 
 
 
 
 
1
  import gradio as gr
2
- import os
3
 
4
- gr.load("models/google/gemma-1.1-7b-it", hf_token=os.environ.get("YOUR_API_TOKEN"), streaming=True).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
+ # import os
3
+
4
+ # gr.load("models/google/gemma-1.1-7b-it", hf_token=os.environ.get("YOUR_API_TOKEN"), streaming=True).launch()
5
+
6
+ from openai import OpenAI
7
  import gradio as gr
 
8
 
9
+ client = OpenAI(
10
+ base_url="https://api-inference.huggingface.co/v1",
11
+ api_key=os.environ.get('YOUR_API_TOKEN')
12
+ )
13
+
14
+ def predict(message, history):
15
+ history_openai_format = []
16
+ for human, assistant in history:
17
+ history_openai_format.append({"role": "user", "content": human })
18
+ history_openai_format.append({"role": "assistant", "content":assistant})
19
+ history_openai_format.append({"role": "user", "content": message})
20
+
21
+ response = client.chat.completions.create(model='google/gemma-1.1-7b-it',
22
+ messages= history_openai_format,
23
+ temperature=0.7,
24
+ stream=True)
25
+
26
+ partial_message = ""
27
+ for chunk in response:
28
+ if chunk.choices[0].delta.content is not None:
29
+ partial_message = partial_message + chunk.choices[0].delta.content
30
+ yield partial_message
31
+
32
+ gr.ChatInterface(predict).launch()