drwlf commited on
Commit
fd7f475
·
verified ·
1 Parent(s): 7f97613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -14
app.py CHANGED
@@ -11,13 +11,17 @@ MODEL_ID = "drwlf/PsychoQwen14b"
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
 
13
  # Initialize client, handle potential missing token
14
- try:
15
- client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
16
- print("InferenceClient initialized successfully.")
17
- except Exception as e:
18
- print(f"Error initializing InferenceClient: {e}")
19
- print("Please ensure HF_TOKEN is set in your environment/secrets.")
20
- client = None # Set client to None if initialization fails
 
 
 
 
21
 
22
 
23
  def respond(
@@ -33,7 +37,10 @@ def respond(
33
  Generator function to stream responses from the HF Inference API.
34
  """
35
  if not client:
36
- yield "Error: Inference Client not initialized. Check HF_TOKEN."
 
 
 
37
  return
38
 
39
  messages = [{"role": "system", "content": system_message}]
@@ -52,6 +59,14 @@ def respond(
52
  # Handle Top-K value (API often expects None to disable, not 0)
53
  top_k_val = top_k if top_k > 0 else None
54
 
 
 
 
 
 
 
 
 
55
  try:
56
  stream = client.chat_completion(
57
  messages,
@@ -73,16 +88,50 @@ def respond(
73
  token = message_chunk.choices[0].delta.content
74
  if token: # Ensure token is not None or empty
75
  response += token
 
76
  yield response
77
  # Optional: Add error checking within the loop if needed
78
 
79
  except Exception as e:
80
  print(f"Error during chat completion: {e}")
81
  yield f"Sorry, an error occurred: {str(e)}"
82
- finally:
83
- # Ensure the stream object is properly handled if it exists
84
- # (Though InferenceClient might handle cleanup internally)
85
- if stream is not None:
86
- # Potential cleanup if required by the library, often not needed explicitly
87
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
 
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
 
13
  # Initialize client, handle potential missing token
14
+ client = None # Initialize client to None
15
+ if not HF_TOKEN:
16
+ print("Warning: HF_TOKEN secret not found. Cannot initialize InferenceClient.")
17
+ # Optionally raise an error or handle this case in the respond function
18
+ else:
19
+ try:
20
+ client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
21
+ print("InferenceClient initialized successfully.")
22
+ except Exception as e:
23
+ print(f"Error initializing InferenceClient: {e}")
24
+ # Client remains None
25
 
26
 
27
  def respond(
 
37
  Generator function to stream responses from the HF Inference API.
38
  """
39
  if not client:
40
+ yield "Error: Inference Client not initialized. Check HF_TOKEN secret."
41
+ return
42
+ if not message or not message.strip():
43
+ yield "Please enter a message."
44
  return
45
 
46
  messages = [{"role": "system", "content": system_message}]
 
59
  # Handle Top-K value (API often expects None to disable, not 0)
60
  top_k_val = top_k if top_k > 0 else None
61
 
62
+ # Debugging: Print parameters being sent
63
+ print(f"--- Sending Request ---")
64
+ print(f"Model: {MODEL_ID}")
65
+ print(f"Messages: {messages}")
66
+ print(f"Max Tokens: {max_tokens}, Temp: {temperature}, Top-P: {top_p}, Top-K: {top_k_val}")
67
+ print(f"-----------------------")
68
+
69
+
70
  try:
71
  stream = client.chat_completion(
72
  messages,
 
88
  token = message_chunk.choices[0].delta.content
89
  if token: # Ensure token is not None or empty
90
  response += token
91
+ # print(token, end="") # Debugging stream locally
92
  yield response
93
  # Optional: Add error checking within the loop if needed
94
 
95
  except Exception as e:
96
  print(f"Error during chat completion: {e}")
97
  yield f"Sorry, an error occurred: {str(e)}"
98
+ # No finally block needed unless specific cleanup is required
99
+
100
+
101
+ """
102
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
103
+ """
104
+ demo = gr.ChatInterface(
105
+ respond,
106
+ chatbot=gr.Chatbot(height=500), # Set chatbot height
107
+ additional_inputs=[
108
+ gr.Textbox(value="You are a friendly psychotherapy AI capable of thinking.", label="System message"),
109
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
110
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), # Adjusted max temp based on common usage
111
+ gr.Slider(
112
+ minimum=0.05, # Min Top-P often > 0
113
+ maximum=1.0,
114
+ value=0.95,
115
+ step=0.05,
116
+ label="Top-P (nucleus sampling)",
117
+ ),
118
+ # Added Top-K slider
119
+ gr.Slider(
120
+ minimum=0, # 0 disables Top-K
121
+ maximum=100, # Common range, adjust if needed
122
+ value=0, # Default to disabled
123
+ step=1,
124
+ label="Top-K (0 = disabled)",
125
+ ),
126
+ ],
127
+ title="PsychoQwen Chat",
128
+ description=f"Chat with {MODEL_ID}. Adjust generation parameters below.",
129
+ retry_btn="Retry",
130
+ undo_btn="Undo",
131
+ clear_btn="Clear Chat",
132
+ )
133
+
134
+ # --- Launch the app directly ---
135
+ # The if __name__ == "__main__": block is removed or commented out
136
+ demo.queue().launch(debug=True) # debug=True is useful for seeing logs in the Space
137