Spaces:
Running
Running
File size: 3,640 Bytes
afec331 80b54e9 afec331 80b54e9 afec331 80b54e9 8d35409 80b54e9 31391ab 80b54e9 31391ab 80b54e9 afec331 80b54e9 c95ef21 be4a377 c95ef21 80b54e9 9015f33 80b54e9 1a8afea 80b54e9 e7db5e3 80b54e9 db912f7 3368887 db912f7 80b54e9 e7db5e3 80b54e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
from huggingface_hub import InferenceClient
import string
import numpy as np
from transformers import AutoTokenizer
import onnxruntime as ort
import os
# Initialize client and models
client = InferenceClient(api_key=os.environ.get('HF_TOKEN'))
# Constants for EOU calculation
PUNCS = string.punctuation.replace("'", "")
MAX_HISTORY = 4
MAX_HISTORY_TOKENS = 1024
EOU_THRESHOLD = 0.5
# Initialize tokenizer and ONNX session
HG_MODEL = "livekit/turn-detector"
ONNX_FILENAME = "model_quantized.onnx"
tokenizer = AutoTokenizer.from_pretrained(HG_MODEL)
onnx_session = ort.InferenceSession(ONNX_FILENAME, providers=["CPUExecutionProvider"])
# Helper functions for EOU
def softmax(logits):
exp_logits = np.exp(logits - np.max(logits))
return exp_logits / np.sum(exp_logits)
def normalize_text(text):
def strip_puncs(text):
return text.translate(str.maketrans("", "", PUNCS))
return " ".join(strip_puncs(text).lower().split())
def format_chat_ctx(chat_ctx):
new_chat_ctx = []
for msg in chat_ctx:
if msg["role"] in ("user", "assistant"):
content = normalize_text(msg["content"])
if content:
msg["content"] = content
new_chat_ctx.append(msg)
convo_text = tokenizer.apply_chat_template(
new_chat_ctx, add_generation_prompt=False, add_special_tokens=False, tokenize=False
)
ix = convo_text.rfind("<|im_end|>")
return convo_text[:ix]
def calculate_eou(chat_ctx, session):
formatted_text = format_chat_ctx(chat_ctx[-MAX_HISTORY:])
inputs = tokenizer(
formatted_text,
return_tensors="np",
truncation=True,
max_length=MAX_HISTORY_TOKENS,
)
input_ids = np.array(inputs["input_ids"], dtype=np.int64)
outputs = session.run(["logits"], {"input_ids": input_ids})
logits = outputs[0][0, -1, :]
probs = softmax(logits)
eou_token_id = tokenizer.encode("<|im_end|>")[-1]
return probs[eou_token_id]
messages = []
def chatbot(user_input):
global messages
# Exit condition
if user_input.lower() == "exit":
messages = [] # Reset conversation history
return "Chat ended. Refresh the page to start again."
# Add user message to conversation history
messages.append({"role": "user", "content": user_input})
# Calculate EOU to determine if user has finished typing
eou_prob = calculate_eou(messages, onnx_session)
if eou_prob < EOU_THRESHOLD:
yield "[I'm waiting for you to complete the sentence...]"
return
# Stream the chatbot's response
stream = client.chat.completions.create(
model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
messages=messages,
temperature=0.6,
max_tokens=2200,
top_p=0.95,
stream=True
)
bot_response = ""
for chunk in stream:
bot_response += chunk.choices[0].delta.content
yield bot_response
# Add final bot response to conversation history
messages.append({"role": "assistant", "content": bot_response})
# Create Gradio interface
with gr.Blocks(theme='darkdefault') as demo:
gr.Markdown("""# Chat with DeepSeek""")
with gr.Row():
with gr.Column():
user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
submit_button = gr.Button("Send")
with gr.Column():
chat_output = gr.Textbox(label="Chatbot Response", interactive=False)
# Define interactions
submit_button.click(chatbot, inputs=[user_input], outputs=[chat_output])
# Launch the app
demo.launch()
|