Spaces:

Aborman
/

Scripts

Sleeping

File size: 9,848 Bytes

import gradio as gr
import requests
import logging
import json
import os
import numpy as np

# Set up logging to help troubleshoot issues
logging.basicConfig(level=logging.DEBUG)

# LM Studio REST API base URL
BASE_URL = "http://localhost:1234/v1"

# Function to handle chat completions with streaming support
def chat_with_lmstudio(messages):
    url = f"{BASE_URL}/chat/completions"
    payload = {
        "model": "bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-IQ2_M.gguf",  # Replace with your chat model
        "messages": messages,
        "temperature": 0.7,
        "max_tokens": 4096,
        "stream": True
    }
    logging.debug(f"Sending POST request to URL: {url}")
    logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
    try:
        with requests.post(url, json=payload, stream=True) as response:
            logging.debug(f"Response Status Code: {response.status_code}")
            response.raise_for_status()
            collected_response = ""
            for chunk in response.iter_lines():
                if chunk:
                    chunk_data = chunk.decode('utf-8').strip()
                    if chunk_data == "[DONE]":
                        logging.debug("Received [DONE] signal. Ending stream.")
                        break
                    if chunk_data.startswith("data: "):
                        chunk_data = chunk_data[6:].strip()
                    logging.debug(f"Received Chunk: {chunk_data}")
                    try:
                        response_data = json.loads(chunk_data)
                        if "choices" in response_data and len(response_data["choices"]) > 0:
                            content = response_data['choices'][0].get('delta', {}).get('content', "")
                            collected_response += content
                            yield content
                    except json.JSONDecodeError:
                        logging.error(f"Failed to decode JSON from chunk: {chunk_data}")
            if not collected_response:
                yield "I'm sorry, I couldn't generate a response. Could you please try again?"
    except requests.exceptions.RequestException as e:
        logging.error(f"Request to LM Studio failed: {e}")
        yield "An error occurred while connecting to LM Studio. Please try again later."

# Function to get embeddings from LM Studio
def get_embeddings(text):
    url = f"{BASE_URL}/embeddings"
    payload = {
        "model": "nomad_embed_text_v1_5_Q8_0",  # Use the exact model name registered in LM Studio
        "input": text
    }
    logging.debug(f"Sending POST request to URL: {url}")
    logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
    try:
        response = requests.post(url, json=payload)
        response.raise_for_status()
        data = response.json()
        embedding = data['data'][0]['embedding']
        logging.debug(f"Received Embedding: {embedding}")
        return embedding
    except requests.exceptions.RequestException as e:
        logging.error(f"Request to LM Studio for embeddings failed: {e}")
        return None

# Function to calculate cosine similarity
def cosine_similarity(vec1, vec2):
    if not vec1 or not vec2:
        return 0
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
        return 0
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Gradio Blocks interface for chat with file upload and embeddings
def gradio_chat_interface():
    with gr.Blocks() as iface:
        gr.Markdown("# Chat with LM Studio 🚀")
        gr.Markdown("A chat interface powered by LM Studio. You can send text messages or upload files (e.g., `.txt`) to include in the conversation.")
        
        chatbot = gr.Chatbot(type='messages')  # Specify 'messages' type to avoid deprecated tuple format
        state = gr.State([])  # To store conversation history as list of dicts
        embeddings_state = gr.State([])  # To store embeddings

        with gr.Row():
            with gr.Column(scale=4):
                user_input = gr.Textbox(
                    label="Type your message here",
                    placeholder="Enter text and press enter",
                    lines=1
                )
            with gr.Column(scale=1):
                file_input = gr.File(
                    label="Upload a file",
                    file_types=[".txt"],  # Restrict to text files; modify as needed
                    type="binary"  # Corrected from 'file' to 'binary'
                )

        send_button = gr.Button("Send")

        # Function to handle chat interactions
        def chat_interface(user_message, uploaded_file, history, embeddings):
            # Initialize history and embeddings if None
            if history is None:
                history = []
            if embeddings is None:
                embeddings = []

            # Process uploaded file if present
            if uploaded_file is not None:
                try:
                    # Read the uploaded file's content
                    file_content = uploaded_file.read().decode('utf-8')
                    user_message += f"\n\n[File Content]:\n{file_content}"
                    logging.debug(f"Processed uploaded file: {uploaded_file.name}")
                    
                    # Generate embedding for the file content
                    file_embedding = get_embeddings(file_content)
                    if file_embedding:
                        embeddings.append((file_content, file_embedding))
                        logging.debug(f"Stored embedding for uploaded file: {uploaded_file.name}")
                except Exception as e:
                    logging.error(f"Error reading uploaded file: {e}")
                    user_message += "\n\n[Error reading the uploaded file.]"

            # Generate embedding for the user message
            user_embedding = get_embeddings(user_message)
            if user_embedding:
                embeddings.append((user_message, user_embedding))
                logging.debug("Stored embedding for user message.")

            # Retrieve relevant context based on embeddings (optional)
            # For demonstration, we'll retrieve top 2 similar past messages
            context_messages = []
            if embeddings:
                similarities = []
                for idx, (text, embed) in enumerate(embeddings[:-1]):  # Exclude the current user message
                    sim = cosine_similarity(user_embedding, embed)
                    similarities.append((sim, idx))
                # Sort by similarity
                similarities.sort(reverse=True, key=lambda x: x[0])
                top_n = 2
                top_indices = [idx for (_, idx) in similarities[:top_n]]
                for idx in top_indices:
                    context_messages.append(history[idx]['content'])  # Append user messages as context

            # Append user message to history
            history.append({"role": "user", "content": user_message})
            logging.debug(f"Updated History: {history}")

            # Format history with additional context
            messages = []
            if context_messages:
                messages.append({"role": "system", "content": "You have the following context:"})
                for ctx in context_messages:
                    messages.append({"role": "user", "content": ctx})
                messages.append({"role": "system", "content": "Use this context to assist the user."})

            # Append all messages from history
            messages.extend(history)

            # Get response from LM Studio
            response_stream = chat_with_lmstudio(messages)
            response = ""

            # To handle streaming, we'll initialize the assistant message and update it incrementally
            assistant_message = {"role": "assistant", "content": ""}
            history.append(assistant_message)
            logging.debug(f"Appended empty assistant message: {assistant_message}")

            for chunk in response_stream:
                response += chunk
                # Update the assistant message content
                assistant_message['content'] = response
                logging.debug(f"Updated assistant message: {assistant_message}")
                # Yield the updated history and embeddings
                yield history, embeddings

            # Finalize the history with the complete response
            assistant_message['content'] = response
            logging.debug(f"Final assistant message: {assistant_message}")
            yield history, embeddings

        # Connect the send button to the chat function
        send_button.click(
            fn=chat_interface,
            inputs=[user_input, file_input, state, embeddings_state],
            outputs=[chatbot, embeddings_state],
            queue=True  # Enable queuing for handling multiple requests
        )

        # Also allow pressing Enter in the textbox to send the message
        user_input.submit(
            fn=chat_interface,
            inputs=[user_input, file_input, state, embeddings_state],
            outputs=[chatbot, embeddings_state],
            queue=True
        )

        # Add debug statements to determine file pattern issues
        logging.debug(f"Current working directory: {os.getcwd()}")
        logging.debug(f"Files in current directory: {os.listdir(os.getcwd())}")

    iface.launch(share=True)

# Main function to launch the chat interface
if __name__ == "__main__":
    gradio_chat_interface()