Spaces:

Aborman
/

Scripts

Sleeping

App Files Files Community

Scripts / lmstudio_gradio.py

Aborman

Upload folder using huggingface_hub

465fe5b verified 5 months ago

raw

history blame

9.85 kB

	import gradio as gr
	import requests
	import logging
	import json
	import os
	import numpy as np

	# Set up logging to help troubleshoot issues
	logging.basicConfig(level=logging.DEBUG)

	# LM Studio REST API base URL
	BASE_URL = "http://localhost:1234/v1"

	# Function to handle chat completions with streaming support
	def chat_with_lmstudio(messages):
	url = f"{BASE_URL}/chat/completions"
	payload = {
	"model": "bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-IQ2_M.gguf", # Replace with your chat model
	"messages": messages,
	"temperature": 0.7,
	"max_tokens": 4096,
	"stream": True
	}
	logging.debug(f"Sending POST request to URL: {url}")
	logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
	try:
	with requests.post(url, json=payload, stream=True) as response:
	logging.debug(f"Response Status Code: {response.status_code}")
	response.raise_for_status()
	collected_response = ""
	for chunk in response.iter_lines():
	if chunk:
	chunk_data = chunk.decode('utf-8').strip()
	if chunk_data == "[DONE]":
	logging.debug("Received [DONE] signal. Ending stream.")
	break
	if chunk_data.startswith("data: "):
	chunk_data = chunk_data[6:].strip()
	logging.debug(f"Received Chunk: {chunk_data}")
	try:
	response_data = json.loads(chunk_data)
	if "choices" in response_data and len(response_data["choices"]) > 0:
	content = response_data['choices'][0].get('delta', {}).get('content', "")
	collected_response += content
	yield content
	except json.JSONDecodeError:
	logging.error(f"Failed to decode JSON from chunk: {chunk_data}")
	if not collected_response:
	yield "I'm sorry, I couldn't generate a response. Could you please try again?"
	except requests.exceptions.RequestException as e:
	logging.error(f"Request to LM Studio failed: {e}")
	yield "An error occurred while connecting to LM Studio. Please try again later."

	# Function to get embeddings from LM Studio
	def get_embeddings(text):
	url = f"{BASE_URL}/embeddings"
	payload = {
	"model": "nomad_embed_text_v1_5_Q8_0", # Use the exact model name registered in LM Studio
	"input": text
	}
	logging.debug(f"Sending POST request to URL: {url}")
	logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
	try:
	response = requests.post(url, json=payload)
	response.raise_for_status()
	data = response.json()
	embedding = data['data'][0]['embedding']
	logging.debug(f"Received Embedding: {embedding}")
	return embedding
	except requests.exceptions.RequestException as e:
	logging.error(f"Request to LM Studio for embeddings failed: {e}")
	return None

	# Function to calculate cosine similarity
	def cosine_similarity(vec1, vec2):
	if not vec1 or not vec2:
	return 0
	vec1 = np.array(vec1)
	vec2 = np.array(vec2)
	if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
	return 0
	return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

	# Gradio Blocks interface for chat with file upload and embeddings
	def gradio_chat_interface():
	with gr.Blocks() as iface:
	gr.Markdown("# Chat with LM Studio 🚀")
	gr.Markdown("A chat interface powered by LM Studio. You can send text messages or upload files (e.g., `.txt`) to include in the conversation.")

	chatbot = gr.Chatbot(type='messages') # Specify 'messages' type to avoid deprecated tuple format
	state = gr.State([]) # To store conversation history as list of dicts
	embeddings_state = gr.State([]) # To store embeddings

	with gr.Row():
	with gr.Column(scale=4):
	user_input = gr.Textbox(
	label="Type your message here",
	placeholder="Enter text and press enter",
	lines=1
	)
	with gr.Column(scale=1):
	file_input = gr.File(
	label="Upload a file",
	file_types=[".txt"], # Restrict to text files; modify as needed
	type="binary" # Corrected from 'file' to 'binary'
	)

	send_button = gr.Button("Send")

	# Function to handle chat interactions
	def chat_interface(user_message, uploaded_file, history, embeddings):
	# Initialize history and embeddings if None
	if history is None:
	history = []
	if embeddings is None:
	embeddings = []

	# Process uploaded file if present
	if uploaded_file is not None:
	try:
	# Read the uploaded file's content
	file_content = uploaded_file.read().decode('utf-8')
	user_message += f"\n\n[File Content]:\n{file_content}"
	logging.debug(f"Processed uploaded file: {uploaded_file.name}")

	# Generate embedding for the file content
	file_embedding = get_embeddings(file_content)
	if file_embedding:
	embeddings.append((file_content, file_embedding))
	logging.debug(f"Stored embedding for uploaded file: {uploaded_file.name}")
	except Exception as e:
	logging.error(f"Error reading uploaded file: {e}")
	user_message += "\n\n[Error reading the uploaded file.]"

	# Generate embedding for the user message
	user_embedding = get_embeddings(user_message)
	if user_embedding:
	embeddings.append((user_message, user_embedding))
	logging.debug("Stored embedding for user message.")

	# Retrieve relevant context based on embeddings (optional)
	# For demonstration, we'll retrieve top 2 similar past messages
	context_messages = []
	if embeddings:
	similarities = []
	for idx, (text, embed) in enumerate(embeddings[:-1]): # Exclude the current user message
	sim = cosine_similarity(user_embedding, embed)
	similarities.append((sim, idx))
	# Sort by similarity
	similarities.sort(reverse=True, key=lambda x: x[0])
	top_n = 2
	top_indices = [idx for (_, idx) in similarities[:top_n]]
	for idx in top_indices:
	context_messages.append(history[idx]['content']) # Append user messages as context

	# Append user message to history
	history.append({"role": "user", "content": user_message})
	logging.debug(f"Updated History: {history}")

	# Format history with additional context
	messages = []
	if context_messages:
	messages.append({"role": "system", "content": "You have the following context:"})
	for ctx in context_messages:
	messages.append({"role": "user", "content": ctx})
	messages.append({"role": "system", "content": "Use this context to assist the user."})

	# Append all messages from history
	messages.extend(history)

	# Get response from LM Studio
	response_stream = chat_with_lmstudio(messages)
	response = ""

	# To handle streaming, we'll initialize the assistant message and update it incrementally
	assistant_message = {"role": "assistant", "content": ""}
	history.append(assistant_message)
	logging.debug(f"Appended empty assistant message: {assistant_message}")

	for chunk in response_stream:
	response += chunk
	# Update the assistant message content
	assistant_message['content'] = response
	logging.debug(f"Updated assistant message: {assistant_message}")
	# Yield the updated history and embeddings
	yield history, embeddings

	# Finalize the history with the complete response
	assistant_message['content'] = response
	logging.debug(f"Final assistant message: {assistant_message}")
	yield history, embeddings

	# Connect the send button to the chat function
	send_button.click(
	fn=chat_interface,
	inputs=[user_input, file_input, state, embeddings_state],
	outputs=[chatbot, embeddings_state],
	queue=True # Enable queuing for handling multiple requests
	)

	# Also allow pressing Enter in the textbox to send the message
	user_input.submit(
	fn=chat_interface,
	inputs=[user_input, file_input, state, embeddings_state],
	outputs=[chatbot, embeddings_state],
	queue=True
	)

	# Add debug statements to determine file pattern issues
	logging.debug(f"Current working directory: {os.getcwd()}")
	logging.debug(f"Files in current directory: {os.listdir(os.getcwd())}")

	iface.launch(share=True)

	# Main function to launch the chat interface
	if __name__ == "__main__":
	gradio_chat_interface()