Spaces:
Sleeping
Sleeping
# import gradio as gr | |
# import os | |
# gr.load("models/google/gemma-1.1-7b-it", hf_token=os.environ.get("YOUR_API_TOKEN"), streaming=True).launch() | |
import gradio as gr | |
import os | |
os.system('pip install openai') | |
from openai import OpenAI | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1", | |
api_key=os.environ.get('YOUR_API_TOKEN') | |
) | |
def predict(message, history, test=""): | |
print("1 ", message) | |
print("2 ", history) | |
history_openai_format = [] | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human }) | |
history_openai_format.append({"role": "assistant", "content":assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
response = client.chat.completions.create(model='google/gemma-1.1-7b-it', | |
messages= history_openai_format, | |
temperature=0.7, | |
stream=True, | |
max_tokens=3000) | |
partial_message = "" | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
partial_message = partial_message + chunk.choices[0].delta.content | |
yield partial_message | |
gr.ChatInterface(predict).launch() |