saneowl's picture
Update app.py
280275c verified
raw
history blame contribute delete
6.57 kB
import gradio as gr
import requests
import re
import os
# Load from environment or fallback to default
API_ENDPOINT = os.getenv("API_ENDPOINT")
API_TOKEN = os.getenv("API_TOKEN")
MODEL_ID = os.getenv("MODEL_ID", "none")
def get_ai_response(message, history):
"""Fetch AI response from the API using the modern messages format."""
messages = [{"role": "system", "content": "You are a helpful assistant."}]
for user_msg, ai_msg in history:
if ai_msg != "⏳ Thinking...":
# Clean HTML from AI messages to avoid nesting artifacts
clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": clean_ai_msg})
# Add latest user message
messages.append({"role": "user", "content": message})
payload = {
"model": MODEL_ID,
"messages": messages,
"stream": False,
"max_tokens": 10000,
"temperature": 0.7
}
headers = {
"Authorization": f"Bearer {API_TOKEN}",
"Content-Type": "application/json"
}
try:
response = requests.post(API_ENDPOINT, headers=headers, json=payload)
response.raise_for_status()
raw_response = response.json()["choices"][0]["message"]["content"]
html_response = convert_reasoning_to_collapsible(raw_response)
return html_response
except Exception as e:
return f"Error: {str(e)}"
def convert_reasoning_to_collapsible(text):
"""Convert <reasoning> tags into collapsible HTML elements."""
reasoning_pattern = re.compile(r'<reasoning>(.*?)</reasoning>', re.DOTALL)
def replace_with_collapsible(match):
reasoning_content = match.group(1).strip()
return (
f'<details>'
f'<summary><strong>See reasoning</strong></summary>'
f'<div class="reasoning-content">{reasoning_content}</div>'
f'</details>'
)
html_response = reasoning_pattern.sub(replace_with_collapsible, text)
html_response = re.sub(r'<sep>.*?</sep>', '', html_response, flags=re.DOTALL)
html_response = html_response.replace('<sep>', '').replace('</sep>', '')
return html_response
def add_user_message(message, history):
"""Add user message with a placeholder AI response ('⏳ Thinking...')."""
if history is None:
history = []
history.append((message, "⏳ Thinking..."))
return history, history
def generate_response_from_history(history):
"""Replace last '⏳ Thinking...' with real assistant response."""
if not history:
return history, history
last_user_message = history[-1][0]
api_history = []
for user_msg, ai_msg in history:
if ai_msg != "⏳ Thinking...":
clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
api_history.append({"role": "user", "content": user_msg})
api_history.append({"role": "assistant", "content": clean_ai_msg})
api_history.append({"role": "user", "content": last_user_message})
ai_response = get_ai_response(last_user_message, api_history)
history[-1] = (last_user_message, ai_response)
return history, history
# CSS for dark mode + collapsible sections
custom_css = """
body { background-color: #1a1a1a; color: #ffffff; font-family: 'Arial', sans-serif; }
#chatbot { height: 80vh; background-color: #2d2d2d; border: 1px solid #404040; border-radius: 8px; }
input, button { background-color: #333333; color: #ffffff; border: 1px solid #404040; border-radius: 5px; }
button:hover { background-color: #404040; }
details { background-color: #333333; padding: 10px; margin: 5px 0; border-radius: 5px; }
summary { cursor: pointer; color: #70a9e6; }
.reasoning-content { padding: 10px; margin-top: 5px; background-color: #404040; border-radius: 5px; }
"""
# Set model name for UI title
model_display_name = MODEL_ID
# Gradio UI definition
with gr.Blocks(css=custom_css, title=model_display_name) as demo:
with gr.Column():
gr.Markdown("## nvidia-Llama-3_1-Nemotron-Ultra-253B-v1 Demo")
gr.Markdown("This is a demo of nvidia-Llama-3_1-Nemotron-Ultra-253B-v1")
chatbot = gr.Chatbot(elem_id="chatbot", render_markdown=False, bubble_full_width=True)
with gr.Row():
message = gr.Textbox(placeholder="Type your message...", show_label=False, container=False)
submit_btn = gr.Button("Send", size="lg")
clear_chat_btn = gr.Button("Clear Chat")
chat_state = gr.State([])
# JS to allow rendering HTML in the chat
js = """
function() {
const observer = new MutationObserver(function(mutations) {
mutations.forEach(function(mutation) {
if (mutation.addedNodes.length) {
document.querySelectorAll('#chatbot .message:not(.processed)').forEach(msg => {
msg.classList.add('processed');
const content = msg.querySelector('.content');
if (content) {
content.innerHTML = content.textContent;
}
});
}
});
});
const chatbot = document.getElementById('chatbot');
if (chatbot) {
observer.observe(chatbot, { childList: true, subtree: true });
}
return [];
}
"""
# Event: Send button clicked
submit_btn.click(
add_user_message,
[message, chat_state],
[chat_state, chatbot]
).then(
generate_response_from_history,
chat_state,
[chat_state, chatbot]
).then(
lambda: "", None, message # clear textbox
)
# Event: Pressing Enter key in Textbox
message.submit(
add_user_message,
[message, chat_state],
[chat_state, chatbot]
).then(
generate_response_from_history,
chat_state,
[chat_state, chatbot]
).then(
lambda: "", None, message
)
# Clear chat
clear_chat_btn.click(
lambda: ([], []),
None,
[chat_state, chatbot]
)
# Load JS on UI load
demo.load(
fn=lambda: None,
inputs=None,
outputs=None,
js=js
)
# Launch Gradio interface
demo.queue()
demo.launch()