Spaces:

Maximofn
/

IriusRiskTestChallenge

Sleeping

App Files Files Community

Maximofn commited on Mar 19

Commit

fe46a24

1 Parent(s): 494b36f

refactor(app.py): :sparkles: Improve response handling and streamline configuration for model invocation

Browse files

Files changed (1) hide show

app.py +23 -11

app.py CHANGED Viewed

@@ -78,13 +78,15 @@ def call_model(state: MessagesState, system_prompt: str):
         pad_token_id=tokenizer.eos_token_id
     )
-    # Decode and clean the response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the assistant's response (after the last user message)
-    response = response.split("Assistant:")[-1].strip()
     # Convert the response to LangChain format
-    ai_message = AIMessage(content=response)
     return {"messages": state["messages"] + [ai_message]}
 # Define the graph
@@ -92,7 +94,6 @@ workflow = StateGraph(state_schema=MessagesState)
 # Define the node in the graph
 workflow.add_edge(START, "model")
-workflow.add_node("model", call_model)
 # Add memory
 memory = MemorySaver()
@@ -149,10 +150,16 @@ async def generate(request: QueryRequest):
         input_messages = [HumanMessage(content=request.query)]
         # Invoke the graph with custom system prompt
         output = graph_app.invoke(
             {"messages": input_messages},
-            config,
-            {"model": {"system_prompt": request.system_prompt}}
         )
         # Get the model response
@@ -189,11 +196,16 @@ async def summarize(request: SummaryRequest):
         # Create the input message
         input_messages = [HumanMessage(content=request.text)]
-        # Invoke the graph with summarization system prompt
         output = graph_app.invoke(
             {"messages": input_messages},
-            config,
-            {"model": {"system_prompt": summary_system_prompt}}
         )
         # Get the model response

         pad_token_id=tokenizer.eos_token_id
     )
+    # Get just the new tokens (excluding the input prompt tokens)
+    input_length = inputs.shape[1]
+    generated_tokens = outputs[0][input_length:]
+    # Decode only the new tokens to get just the assistant's response
+    assistant_response = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
     # Convert the response to LangChain format
+    ai_message = AIMessage(content=assistant_response)
     return {"messages": state["messages"] + [ai_message]}
 # Define the graph
 # Define the node in the graph
 workflow.add_edge(START, "model")
 # Add memory
 memory = MemorySaver()
         input_messages = [HumanMessage(content=request.query)]
         # Invoke the graph with custom system prompt
+        # Combine config parameters into a single dictionary
+        combined_config = {
+            **config,
+            "model": {"system_prompt": request.system_prompt}
+        }
+        # Invoke the graph with proper argument count
         output = graph_app.invoke(
             {"messages": input_messages},
+            combined_config
         )
         # Get the model response
         # Create the input message
         input_messages = [HumanMessage(content=request.text)]
+        # Combine config parameters into a single dictionary
+        combined_config = {
+            **config,
+            "model": {"system_prompt": summary_system_prompt}
+        }
+        # Invoke the graph with proper argument count
         output = graph_app.invoke(
             {"messages": input_messages},
+            combined_config
         )
         # Get the model response