Spaces:

Looker01202
/

hotel-chat

Sleeping

App Files Files Community

looker01202 commited on 14 days ago

Commit

daecd7b

1 Parent(s): 66ab75c

Gemini changes added 16

Browse files

Files changed (1) hide show

app.py +39 -79

app.py CHANGED Viewed

@@ -220,94 +220,54 @@ def chat(message, history, hotel_id):
         print(input_text)
         print("-----------------------------")
-        inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
         with torch.no_grad():
-            # Using do_sample=False for more deterministic RAG based on context
-            outputs = model.generate(inputs, max_new_tokens=1024, do_sample=False)
-        decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
-        print("--- Granite Raw Output ---")
-        print(decoded)
-        print("--------------------------")
-    # --- Start: UPDATED Response Parsing for Granite (v7 - Improved Regex & Fallback) ---
-    try:
-        assistant_marker = "<|start_of_role|>assistant"
-        end_role_marker_literal = "<|end_of_role|>" # Use for fallback
-        end_text_marker_literal = "<|end_of_text|>" # Use for fallback
-        start_index = decoded.rfind(assistant_marker)
-        if start_index != -1:
-            search_area = decoded[start_index:]
-            # Regex pattern v7: Focus on capturing group between role and text markers
-            # - .*? : Match assistant start tag + controls non-greedily
-            # - <\|           : Match literal <|
-            # - [^>]*?        : Match any non-> characters non-greedily (lenient end_of_role)
-            # - \|>           : Match literal |>
-            # - \s*           : Match whitespace
-            # - (.*?)         : Capture content non-greedily (GROUP 1)
-            # - <\|           : Match literal <|
-            # - [^>]*?        : Match any non-> characters non-greedily (lenient end_of_text)
-            # - \|>           : Match literal |>
-            pattern = re.compile(r".*?<\|[^>]*?role[^>]*?\|>\s*(.*?)<\|[^>]*?text[^>]*?\|>", re.DOTALL)
-            match = pattern.search(search_area)
-            if match:
-                response = match.group(1).strip()
-                if not response:
-                    response = "Sorry, I encountered an issue generating a response (empty)."
-            else:
-                # If the pattern still didn't match - IMPROVED FALLBACK
-                print(f"❌ Error: Regex pattern v7 did not match structure. Search area started with: {repr(search_area[:150])}")
-                try:
-                    # Fallback: Find the first end_role marker *after* the start_index
-                    end_role_fb_index = decoded.find(end_role_marker_literal, start_index)
-                    if end_role_fb_index != -1:
-                        # Find the first end_text marker *after* the end_role marker
-                        content_start_fb_index = end_role_fb_index + len(end_role_marker_literal)
-                        end_text_fb_index = decoded.find(end_text_marker_literal, content_start_fb_index)
-                        if end_text_fb_index != -1:
-                            # Extract between them
-                            potential_response = decoded[content_start_fb_index:end_text_fb_index].strip()
-                            if potential_response:
-                                print("⚠️ WARNING: Using improved fallback parsing due to regex failure.")
-                                response = potential_response
-                            else:
-                                raise ValueError("Improved fallback parsing yielded empty string.")
-                        else:
-                            raise ValueError("Improved fallback parsing failed: end_text marker not found after end_role marker.")
-                    else:
-                        raise ValueError("Improved fallback parsing failed: end_role marker not found after assistant start.")
-                                        # --- ADD THIS DEBUG LINE ---
-                    print(f"DEBUG: Final response variable before UI append = {repr(response)}")
-                    # --- END ADD THIS DEBUG LINE ---
-                except Exception as fallback_e:
-                    print(f"Improved fallback parsing also failed: {fallback_e}")
-                    response = "Sorry, I couldn't parse the model's response structure (regex & fallback failed)."
-        else:
-            print("❌ Error: Assistant start marker not found in decoded output.")
-            response = "Sorry, I couldn't find the start of the assistant's response."
-    except Exception as e:
-        print(f"❌ Unexpected Error during response parsing: {e}")
-        response = "Sorry, an unexpected error occurred while parsing the response."
-    # --- End: UPDATED Response Parsing for Granite (v7 - Improved Regex & Fallback) ---
-    # Add the final assistant reply to the UI history
-    ui_history.append({"role": "assistant", "content": response})
-    # Final yield with assistant reply
-    yield ui_history, "" # Update chat, keep textbox cleared
 # --- Start: Dynamic Hotel ID Detection ---

         print(input_text)
         print("-----------------------------")
+        # --- Tokenize AND get input length/attention mask ---
+        inputs = tokenizer(input_text, return_tensors="pt").to(device) # Use tokenizer()
+        input_length = inputs.input_ids.shape[1] # Define input_length using input_ids
+        print(f"DEBUG: Input token length = {input_length}") # Keep this debug print
+        # --- Generate using input_ids and attention_mask ---
         with torch.no_grad():
+            outputs = model.generate(
+                inputs.input_ids, # Pass input_ids explicitly
+                attention_mask=inputs.attention_mask, # Pass attention_mask
+                max_new_tokens=1024,
+                do_sample=False
+            )
+        # --- Raw output shape printing (keep) ---
+        print("--- Granite Raw Output Tokens (Shape) ---")
+        print(outputs.shape)
+        print("-----------------------------------------")
+        # --- Start: NEW Decoding Strategy (like IBM example) ---
+        try:
+            # Get only the newly generated token IDs
+            new_token_ids = outputs[0][input_length:]
+            print(f"DEBUG: Number of new tokens generated = {len(new_token_ids)}") # Debug print
+            # Decode only the new tokens, skipping special tokens like <|end_of_text|>
+            response = tokenizer.decode(new_token_ids, skip_special_tokens=True).strip()
+            print(f"DEBUG: Decoded response (skip_special_tokens=True) = {repr(response)}") # Debug print
+            if not response:
+                 response = "Sorry, I encountered an issue generating a response (empty)."
+        except Exception as e:
+            print(f"❌ Unexpected Error during NEW decoding: {e}")
+            response = "Sorry, an unexpected error occurred during decoding."
+        # --- End: NEW Decoding Strategy ---
+        # --- ADD THIS DEBUG LINE (if not already present) ---
+        print(f"DEBUG: Final response variable before UI append = {repr(response)}")
+        # --- END ADD THIS DEBUG LINE ---
+        # Add the final assistant reply to the UI history
+        ui_history.append({"role": "assistant", "content": response})
+        # Final yield with assistant reply
+        yield ui_history, "" # Update chat, keep textbox cleared
 # --- Start: Dynamic Hotel ID Detection ---