Spaces:

drwlf
/

PsychoQwenDemo

Runtime error

App Files Files Community

drwlf commited on 12 days ago

Commit

2dd258a

verified ·

1 Parent(s): 5e9fdb5

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -0

app.py CHANGED Viewed

@@ -1,5 +1,128 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference

 import gradio as gr
 from huggingface_hub import InferenceClient
+import os # Import os to potentially get token from environment
+"""
+For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+"""
+# !! REPLACE THIS WITH YOUR HUGGING FACE MODEL ID !!
+MODEL_ID = "drwlf/PsychoQwen14b"
+# It's recommended to use HF_TOKEN from environment/secrets
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Initialize client, handle potential missing token
+try:
+    client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
+    print("InferenceClient initialized successfully.")
+except Exception as e:
+    print(f"Error initializing InferenceClient: {e}")
+    print("Please ensure HF_TOKEN is set in your environment/secrets.")
+    client = None # Set client to None if initialization fails
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    top_k # Added top_k parameter
+):
+    """
+    Generator function to stream responses from the HF Inference API.
+    """
+    if not client:
+         yield "Error: Inference Client not initialized. Check HF_TOKEN."
+         return
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    response = ""
+    stream = None # Initialize stream variable
+    # Handle Top-K value (API often expects None to disable, not 0)
+    top_k_val = top_k if top_k > 0 else None
+    try:
+        stream = client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k_val # Pass the adjusted top_k value
+        )
+        for message_chunk in stream:
+            # Check for content and delta before accessing
+            if (hasattr(message_chunk, 'choices') and
+                len(message_chunk.choices) > 0 and
+                hasattr(message_chunk.choices[0], 'delta') and
+                message_chunk.choices[0].delta and
+                hasattr(message_chunk.choices[0].delta, 'content')):
+                token = message_chunk.choices[0].delta.content
+                if token: # Ensure token is not None or empty
+                    response += token
+                    yield response
+            # Optional: Add error checking within the loop if needed
+    except Exception as e:
+         print(f"Error during chat completion: {e}")
+         yield f"Sorry, an error occurred: {str(e)}"
+    finally:
+        # Ensure the stream object is properly handled if it exists
+        # (Though InferenceClient might handle cleanup internally)
+        if stream is not None:
+            # Potential cleanup if required by the library, often not needed explicitly
+            pass
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
+demo = gr.ChatInterface(
+    respond,
+    chatbot=gr.Chatbot(height=500), # Set chatbot height
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly psychotherapy AI capable of thinking.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), # Adjusted max temp based on common usage
+        gr.Slider(
+            minimum=0.05, # Min Top-P often > 0
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P (nucleus sampling)",
+        ),
+        # Added Top-K slider
+        gr.Slider(
+            minimum=0, # 0 disables Top-K
+            maximum=100, # Common range, adjust if needed
+            value=0, # Default to disabled
+            step=1,
+            label="Top-K (0 = disabled)",
+        ),
+    ],
+     title="PsychoQwen Chat",
+     description=f"Chat with {MODEL_ID}. Adjust generation parameters below.",
+     retry_btn="Retry",
+     undo_btn="Undo",
+     clear_btn="Clear Chat",
+)
+if __name__ == "__main__":
+    demo.queue().launch(debug=True) # Add queue() for streaming
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference