Scripts / lmstudio_gradio.py
Aborman's picture
Upload folder using huggingface_hub
465fe5b verified
raw
history blame
9.85 kB
import gradio as gr
import requests
import logging
import json
import os
import numpy as np
# Set up logging to help troubleshoot issues
logging.basicConfig(level=logging.DEBUG)
# LM Studio REST API base URL
BASE_URL = "http://localhost:1234/v1"
# Function to handle chat completions with streaming support
def chat_with_lmstudio(messages):
url = f"{BASE_URL}/chat/completions"
payload = {
"model": "bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-IQ2_M.gguf", # Replace with your chat model
"messages": messages,
"temperature": 0.7,
"max_tokens": 4096,
"stream": True
}
logging.debug(f"Sending POST request to URL: {url}")
logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
try:
with requests.post(url, json=payload, stream=True) as response:
logging.debug(f"Response Status Code: {response.status_code}")
response.raise_for_status()
collected_response = ""
for chunk in response.iter_lines():
if chunk:
chunk_data = chunk.decode('utf-8').strip()
if chunk_data == "[DONE]":
logging.debug("Received [DONE] signal. Ending stream.")
break
if chunk_data.startswith("data: "):
chunk_data = chunk_data[6:].strip()
logging.debug(f"Received Chunk: {chunk_data}")
try:
response_data = json.loads(chunk_data)
if "choices" in response_data and len(response_data["choices"]) > 0:
content = response_data['choices'][0].get('delta', {}).get('content', "")
collected_response += content
yield content
except json.JSONDecodeError:
logging.error(f"Failed to decode JSON from chunk: {chunk_data}")
if not collected_response:
yield "I'm sorry, I couldn't generate a response. Could you please try again?"
except requests.exceptions.RequestException as e:
logging.error(f"Request to LM Studio failed: {e}")
yield "An error occurred while connecting to LM Studio. Please try again later."
# Function to get embeddings from LM Studio
def get_embeddings(text):
url = f"{BASE_URL}/embeddings"
payload = {
"model": "nomad_embed_text_v1_5_Q8_0", # Use the exact model name registered in LM Studio
"input": text
}
logging.debug(f"Sending POST request to URL: {url}")
logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
try:
response = requests.post(url, json=payload)
response.raise_for_status()
data = response.json()
embedding = data['data'][0]['embedding']
logging.debug(f"Received Embedding: {embedding}")
return embedding
except requests.exceptions.RequestException as e:
logging.error(f"Request to LM Studio for embeddings failed: {e}")
return None
# Function to calculate cosine similarity
def cosine_similarity(vec1, vec2):
if not vec1 or not vec2:
return 0
vec1 = np.array(vec1)
vec2 = np.array(vec2)
if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
return 0
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
# Gradio Blocks interface for chat with file upload and embeddings
def gradio_chat_interface():
with gr.Blocks() as iface:
gr.Markdown("# Chat with LM Studio πŸš€")
gr.Markdown("A chat interface powered by LM Studio. You can send text messages or upload files (e.g., `.txt`) to include in the conversation.")
chatbot = gr.Chatbot(type='messages') # Specify 'messages' type to avoid deprecated tuple format
state = gr.State([]) # To store conversation history as list of dicts
embeddings_state = gr.State([]) # To store embeddings
with gr.Row():
with gr.Column(scale=4):
user_input = gr.Textbox(
label="Type your message here",
placeholder="Enter text and press enter",
lines=1
)
with gr.Column(scale=1):
file_input = gr.File(
label="Upload a file",
file_types=[".txt"], # Restrict to text files; modify as needed
type="binary" # Corrected from 'file' to 'binary'
)
send_button = gr.Button("Send")
# Function to handle chat interactions
def chat_interface(user_message, uploaded_file, history, embeddings):
# Initialize history and embeddings if None
if history is None:
history = []
if embeddings is None:
embeddings = []
# Process uploaded file if present
if uploaded_file is not None:
try:
# Read the uploaded file's content
file_content = uploaded_file.read().decode('utf-8')
user_message += f"\n\n[File Content]:\n{file_content}"
logging.debug(f"Processed uploaded file: {uploaded_file.name}")
# Generate embedding for the file content
file_embedding = get_embeddings(file_content)
if file_embedding:
embeddings.append((file_content, file_embedding))
logging.debug(f"Stored embedding for uploaded file: {uploaded_file.name}")
except Exception as e:
logging.error(f"Error reading uploaded file: {e}")
user_message += "\n\n[Error reading the uploaded file.]"
# Generate embedding for the user message
user_embedding = get_embeddings(user_message)
if user_embedding:
embeddings.append((user_message, user_embedding))
logging.debug("Stored embedding for user message.")
# Retrieve relevant context based on embeddings (optional)
# For demonstration, we'll retrieve top 2 similar past messages
context_messages = []
if embeddings:
similarities = []
for idx, (text, embed) in enumerate(embeddings[:-1]): # Exclude the current user message
sim = cosine_similarity(user_embedding, embed)
similarities.append((sim, idx))
# Sort by similarity
similarities.sort(reverse=True, key=lambda x: x[0])
top_n = 2
top_indices = [idx for (_, idx) in similarities[:top_n]]
for idx in top_indices:
context_messages.append(history[idx]['content']) # Append user messages as context
# Append user message to history
history.append({"role": "user", "content": user_message})
logging.debug(f"Updated History: {history}")
# Format history with additional context
messages = []
if context_messages:
messages.append({"role": "system", "content": "You have the following context:"})
for ctx in context_messages:
messages.append({"role": "user", "content": ctx})
messages.append({"role": "system", "content": "Use this context to assist the user."})
# Append all messages from history
messages.extend(history)
# Get response from LM Studio
response_stream = chat_with_lmstudio(messages)
response = ""
# To handle streaming, we'll initialize the assistant message and update it incrementally
assistant_message = {"role": "assistant", "content": ""}
history.append(assistant_message)
logging.debug(f"Appended empty assistant message: {assistant_message}")
for chunk in response_stream:
response += chunk
# Update the assistant message content
assistant_message['content'] = response
logging.debug(f"Updated assistant message: {assistant_message}")
# Yield the updated history and embeddings
yield history, embeddings
# Finalize the history with the complete response
assistant_message['content'] = response
logging.debug(f"Final assistant message: {assistant_message}")
yield history, embeddings
# Connect the send button to the chat function
send_button.click(
fn=chat_interface,
inputs=[user_input, file_input, state, embeddings_state],
outputs=[chatbot, embeddings_state],
queue=True # Enable queuing for handling multiple requests
)
# Also allow pressing Enter in the textbox to send the message
user_input.submit(
fn=chat_interface,
inputs=[user_input, file_input, state, embeddings_state],
outputs=[chatbot, embeddings_state],
queue=True
)
# Add debug statements to determine file pattern issues
logging.debug(f"Current working directory: {os.getcwd()}")
logging.debug(f"Files in current directory: {os.listdir(os.getcwd())}")
iface.launch(share=True)
# Main function to launch the chat interface
if __name__ == "__main__":
gradio_chat_interface()