Spaces:

Papaya-Voldemort
/

chat-with-any-model

Sleeping

App Files Files Community

Papaya-Voldemort commited on Mar 10

Commit

66a12b0

verified ·

1 Parent(s): 502c1eb

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -9

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import gradio as gr
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-# Function to load the model and tokenizer
-def load_model(model_name):
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(model_name)
@@ -11,18 +13,45 @@ def load_model(model_name):
         return f"Error loading model: {str(e)}"
 # Function to generate a response from the model
-def chat(model_name, user_input, chat_history):
     if model_name.strip() == "":
         return "Please enter a valid model name.", chat_history
-    # Load the model
-    generator = load_model(model_name)
     if isinstance(generator, str):  # If there was an error loading the model
         return generator, chat_history
     # Generate a response
     try:
-        response = generator(user_input, max_length=500, num_return_sequences=1, do_sample=True, top_p=0.95, top_k=60)[0]['generated_text']
         chat_history.append((user_input, response))
         return "", chat_history
     except Exception as e:
@@ -30,17 +59,26 @@ def chat(model_name, user_input, chat_history):
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Chat with Any Hugging Face Model")
     with gr.Row():
-        model_name = gr.Textbox(label="Enter Hugging Face Model Name", placeholder="e.g., gpt2, facebook/opt-125m")
     chatbot = gr.Chatbot(label="Chat")
     user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
     clear_button = gr.Button("Clear Chat")
     # Define the chat function
-    user_input.submit(chat, [model_name, user_input, chatbot], [user_input, chatbot])
     clear_button.click(lambda: [], None, chatbot, queue=False)
 # Launch the app

 import gradio as gr
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+from functools import lru_cache
+# Cache the model and tokenizer to avoid reloading
+@lru_cache(maxsize=1)
+def load_model_cached(model_name):
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(model_name)
         return f"Error loading model: {str(e)}"
 # Function to generate a response from the model
+def chat(model_name, user_input, chat_history, system_prompt=""):
     if model_name.strip() == "":
         return "Please enter a valid model name.", chat_history
+    # Load the model (cached)
+    generator = load_model_cached(model_name)
     if isinstance(generator, str):  # If there was an error loading the model
         return generator, chat_history
+    # Prepare the input with an optional system prompt
+    full_input = f"{system_prompt}\n\n{user_input}" if system_prompt else user_input
     # Generate a response
     try:
+        # Get the model's maximum context length
+        max_context_length = generator.model.config.max_position_embeddings
+        max_length = min(500, max_context_length)  # Ensure we don't exceed the model's limit
+        # Truncate the input if it's too long
+        inputs = generator.tokenizer(
+            full_input,
+            return_tensors="pt",
+            max_length=max_length,
+            truncation=True
+        )
+        # Generate the response with a progress indicator
+        with gr.Progress() as progress:
+            progress(0.5, desc="Generating response...")
+            response = generator(
+                inputs['input_ids'],
+                max_length=max_length,
+                num_return_sequences=1,
+                do_sample=True,
+                top_p=0.95,
+                top_k=60
+            )[0]['generated_text']
+        # Append the interaction to the chat history
         chat_history.append((user_input, response))
         return "", chat_history
     except Exception as e:
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Chat with SmolLM2-135M-Instruct")
     with gr.Row():
+        model_name = gr.Textbox(
+            label="Enter Hugging Face Model Name",
+            value="HuggingFaceTB/SmolLM2-135M-Instruct",  # Default model
+            placeholder="e.g., HuggingFaceTB/SmolLM2-135M-Instruct"
+        )
     chatbot = gr.Chatbot(label="Chat")
     user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
+    system_prompt = gr.Textbox(
+        label="System Prompt (Optional)",
+        placeholder="e.g., You are a helpful AI assistant.",
+        lines=2
+    )
     clear_button = gr.Button("Clear Chat")
     # Define the chat function
+    user_input.submit(chat, [model_name, user_input, chatbot, system_prompt], [user_input, chatbot])
     clear_button.click(lambda: [], None, chatbot, queue=False)
 # Launch the app