Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,13 +11,17 @@ MODEL_ID = "drwlf/PsychoQwen14b"
|
|
11 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
12 |
|
13 |
# Initialize client, handle potential missing token
|
14 |
-
|
15 |
-
|
16 |
-
print("
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
def respond(
|
@@ -33,7 +37,10 @@ def respond(
|
|
33 |
Generator function to stream responses from the HF Inference API.
|
34 |
"""
|
35 |
if not client:
|
36 |
-
yield "Error: Inference Client not initialized. Check HF_TOKEN."
|
|
|
|
|
|
|
37 |
return
|
38 |
|
39 |
messages = [{"role": "system", "content": system_message}]
|
@@ -52,6 +59,14 @@ def respond(
|
|
52 |
# Handle Top-K value (API often expects None to disable, not 0)
|
53 |
top_k_val = top_k if top_k > 0 else None
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
try:
|
56 |
stream = client.chat_completion(
|
57 |
messages,
|
@@ -73,16 +88,50 @@ def respond(
|
|
73 |
token = message_chunk.choices[0].delta.content
|
74 |
if token: # Ensure token is not None or empty
|
75 |
response += token
|
|
|
76 |
yield response
|
77 |
# Optional: Add error checking within the loop if needed
|
78 |
|
79 |
except Exception as e:
|
80 |
print(f"Error during chat completion: {e}")
|
81 |
yield f"Sorry, an error occurred: {str(e)}"
|
82 |
-
finally
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
|
|
11 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
12 |
|
13 |
# Initialize client, handle potential missing token
|
14 |
+
client = None # Initialize client to None
|
15 |
+
if not HF_TOKEN:
|
16 |
+
print("Warning: HF_TOKEN secret not found. Cannot initialize InferenceClient.")
|
17 |
+
# Optionally raise an error or handle this case in the respond function
|
18 |
+
else:
|
19 |
+
try:
|
20 |
+
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
|
21 |
+
print("InferenceClient initialized successfully.")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Error initializing InferenceClient: {e}")
|
24 |
+
# Client remains None
|
25 |
|
26 |
|
27 |
def respond(
|
|
|
37 |
Generator function to stream responses from the HF Inference API.
|
38 |
"""
|
39 |
if not client:
|
40 |
+
yield "Error: Inference Client not initialized. Check HF_TOKEN secret."
|
41 |
+
return
|
42 |
+
if not message or not message.strip():
|
43 |
+
yield "Please enter a message."
|
44 |
return
|
45 |
|
46 |
messages = [{"role": "system", "content": system_message}]
|
|
|
59 |
# Handle Top-K value (API often expects None to disable, not 0)
|
60 |
top_k_val = top_k if top_k > 0 else None
|
61 |
|
62 |
+
# Debugging: Print parameters being sent
|
63 |
+
print(f"--- Sending Request ---")
|
64 |
+
print(f"Model: {MODEL_ID}")
|
65 |
+
print(f"Messages: {messages}")
|
66 |
+
print(f"Max Tokens: {max_tokens}, Temp: {temperature}, Top-P: {top_p}, Top-K: {top_k_val}")
|
67 |
+
print(f"-----------------------")
|
68 |
+
|
69 |
+
|
70 |
try:
|
71 |
stream = client.chat_completion(
|
72 |
messages,
|
|
|
88 |
token = message_chunk.choices[0].delta.content
|
89 |
if token: # Ensure token is not None or empty
|
90 |
response += token
|
91 |
+
# print(token, end="") # Debugging stream locally
|
92 |
yield response
|
93 |
# Optional: Add error checking within the loop if needed
|
94 |
|
95 |
except Exception as e:
|
96 |
print(f"Error during chat completion: {e}")
|
97 |
yield f"Sorry, an error occurred: {str(e)}"
|
98 |
+
# No finally block needed unless specific cleanup is required
|
99 |
+
|
100 |
+
|
101 |
+
"""
|
102 |
+
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
103 |
+
"""
|
104 |
+
demo = gr.ChatInterface(
|
105 |
+
respond,
|
106 |
+
chatbot=gr.Chatbot(height=500), # Set chatbot height
|
107 |
+
additional_inputs=[
|
108 |
+
gr.Textbox(value="You are a friendly psychotherapy AI capable of thinking.", label="System message"),
|
109 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
110 |
+
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), # Adjusted max temp based on common usage
|
111 |
+
gr.Slider(
|
112 |
+
minimum=0.05, # Min Top-P often > 0
|
113 |
+
maximum=1.0,
|
114 |
+
value=0.95,
|
115 |
+
step=0.05,
|
116 |
+
label="Top-P (nucleus sampling)",
|
117 |
+
),
|
118 |
+
# Added Top-K slider
|
119 |
+
gr.Slider(
|
120 |
+
minimum=0, # 0 disables Top-K
|
121 |
+
maximum=100, # Common range, adjust if needed
|
122 |
+
value=0, # Default to disabled
|
123 |
+
step=1,
|
124 |
+
label="Top-K (0 = disabled)",
|
125 |
+
),
|
126 |
+
],
|
127 |
+
title="PsychoQwen Chat",
|
128 |
+
description=f"Chat with {MODEL_ID}. Adjust generation parameters below.",
|
129 |
+
retry_btn="Retry",
|
130 |
+
undo_btn="Undo",
|
131 |
+
clear_btn="Clear Chat",
|
132 |
+
)
|
133 |
+
|
134 |
+
# --- Launch the app directly ---
|
135 |
+
# The if __name__ == "__main__": block is removed or commented out
|
136 |
+
demo.queue().launch(debug=True) # debug=True is useful for seeing logs in the Space
|
137 |
|