File size: 9,848 Bytes
542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b 542f845 465fe5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import gradio as gr
import requests
import logging
import json
import os
import numpy as np
# Set up logging to help troubleshoot issues
logging.basicConfig(level=logging.DEBUG)
# LM Studio REST API base URL
BASE_URL = "http://localhost:1234/v1"
# Function to handle chat completions with streaming support
def chat_with_lmstudio(messages):
url = f"{BASE_URL}/chat/completions"
payload = {
"model": "bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-IQ2_M.gguf", # Replace with your chat model
"messages": messages,
"temperature": 0.7,
"max_tokens": 4096,
"stream": True
}
logging.debug(f"Sending POST request to URL: {url}")
logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
try:
with requests.post(url, json=payload, stream=True) as response:
logging.debug(f"Response Status Code: {response.status_code}")
response.raise_for_status()
collected_response = ""
for chunk in response.iter_lines():
if chunk:
chunk_data = chunk.decode('utf-8').strip()
if chunk_data == "[DONE]":
logging.debug("Received [DONE] signal. Ending stream.")
break
if chunk_data.startswith("data: "):
chunk_data = chunk_data[6:].strip()
logging.debug(f"Received Chunk: {chunk_data}")
try:
response_data = json.loads(chunk_data)
if "choices" in response_data and len(response_data["choices"]) > 0:
content = response_data['choices'][0].get('delta', {}).get('content', "")
collected_response += content
yield content
except json.JSONDecodeError:
logging.error(f"Failed to decode JSON from chunk: {chunk_data}")
if not collected_response:
yield "I'm sorry, I couldn't generate a response. Could you please try again?"
except requests.exceptions.RequestException as e:
logging.error(f"Request to LM Studio failed: {e}")
yield "An error occurred while connecting to LM Studio. Please try again later."
# Function to get embeddings from LM Studio
def get_embeddings(text):
url = f"{BASE_URL}/embeddings"
payload = {
"model": "nomad_embed_text_v1_5_Q8_0", # Use the exact model name registered in LM Studio
"input": text
}
logging.debug(f"Sending POST request to URL: {url}")
logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
try:
response = requests.post(url, json=payload)
response.raise_for_status()
data = response.json()
embedding = data['data'][0]['embedding']
logging.debug(f"Received Embedding: {embedding}")
return embedding
except requests.exceptions.RequestException as e:
logging.error(f"Request to LM Studio for embeddings failed: {e}")
return None
# Function to calculate cosine similarity
def cosine_similarity(vec1, vec2):
if not vec1 or not vec2:
return 0
vec1 = np.array(vec1)
vec2 = np.array(vec2)
if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
return 0
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
# Gradio Blocks interface for chat with file upload and embeddings
def gradio_chat_interface():
with gr.Blocks() as iface:
gr.Markdown("# Chat with LM Studio 🚀")
gr.Markdown("A chat interface powered by LM Studio. You can send text messages or upload files (e.g., `.txt`) to include in the conversation.")
chatbot = gr.Chatbot(type='messages') # Specify 'messages' type to avoid deprecated tuple format
state = gr.State([]) # To store conversation history as list of dicts
embeddings_state = gr.State([]) # To store embeddings
with gr.Row():
with gr.Column(scale=4):
user_input = gr.Textbox(
label="Type your message here",
placeholder="Enter text and press enter",
lines=1
)
with gr.Column(scale=1):
file_input = gr.File(
label="Upload a file",
file_types=[".txt"], # Restrict to text files; modify as needed
type="binary" # Corrected from 'file' to 'binary'
)
send_button = gr.Button("Send")
# Function to handle chat interactions
def chat_interface(user_message, uploaded_file, history, embeddings):
# Initialize history and embeddings if None
if history is None:
history = []
if embeddings is None:
embeddings = []
# Process uploaded file if present
if uploaded_file is not None:
try:
# Read the uploaded file's content
file_content = uploaded_file.read().decode('utf-8')
user_message += f"\n\n[File Content]:\n{file_content}"
logging.debug(f"Processed uploaded file: {uploaded_file.name}")
# Generate embedding for the file content
file_embedding = get_embeddings(file_content)
if file_embedding:
embeddings.append((file_content, file_embedding))
logging.debug(f"Stored embedding for uploaded file: {uploaded_file.name}")
except Exception as e:
logging.error(f"Error reading uploaded file: {e}")
user_message += "\n\n[Error reading the uploaded file.]"
# Generate embedding for the user message
user_embedding = get_embeddings(user_message)
if user_embedding:
embeddings.append((user_message, user_embedding))
logging.debug("Stored embedding for user message.")
# Retrieve relevant context based on embeddings (optional)
# For demonstration, we'll retrieve top 2 similar past messages
context_messages = []
if embeddings:
similarities = []
for idx, (text, embed) in enumerate(embeddings[:-1]): # Exclude the current user message
sim = cosine_similarity(user_embedding, embed)
similarities.append((sim, idx))
# Sort by similarity
similarities.sort(reverse=True, key=lambda x: x[0])
top_n = 2
top_indices = [idx for (_, idx) in similarities[:top_n]]
for idx in top_indices:
context_messages.append(history[idx]['content']) # Append user messages as context
# Append user message to history
history.append({"role": "user", "content": user_message})
logging.debug(f"Updated History: {history}")
# Format history with additional context
messages = []
if context_messages:
messages.append({"role": "system", "content": "You have the following context:"})
for ctx in context_messages:
messages.append({"role": "user", "content": ctx})
messages.append({"role": "system", "content": "Use this context to assist the user."})
# Append all messages from history
messages.extend(history)
# Get response from LM Studio
response_stream = chat_with_lmstudio(messages)
response = ""
# To handle streaming, we'll initialize the assistant message and update it incrementally
assistant_message = {"role": "assistant", "content": ""}
history.append(assistant_message)
logging.debug(f"Appended empty assistant message: {assistant_message}")
for chunk in response_stream:
response += chunk
# Update the assistant message content
assistant_message['content'] = response
logging.debug(f"Updated assistant message: {assistant_message}")
# Yield the updated history and embeddings
yield history, embeddings
# Finalize the history with the complete response
assistant_message['content'] = response
logging.debug(f"Final assistant message: {assistant_message}")
yield history, embeddings
# Connect the send button to the chat function
send_button.click(
fn=chat_interface,
inputs=[user_input, file_input, state, embeddings_state],
outputs=[chatbot, embeddings_state],
queue=True # Enable queuing for handling multiple requests
)
# Also allow pressing Enter in the textbox to send the message
user_input.submit(
fn=chat_interface,
inputs=[user_input, file_input, state, embeddings_state],
outputs=[chatbot, embeddings_state],
queue=True
)
# Add debug statements to determine file pattern issues
logging.debug(f"Current working directory: {os.getcwd()}")
logging.debug(f"Files in current directory: {os.listdir(os.getcwd())}")
iface.launch(share=True)
# Main function to launch the chat interface
if __name__ == "__main__":
gradio_chat_interface()
|