MyPod_10

Running

App Files Files Community

siddhartharyaai commited on Jan 13

Commit

4df1c08

verified ·

1 Parent(s): 9456088

Update utils.py

Browse files

Files changed (1) hide show

utils.py +15 -3

utils.py CHANGED Viewed

@@ -268,13 +268,16 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
     """
     Sends the system_prompt plus input_text to the Groq LLM to generate a
     multi-speaker Dialogue in JSON. We parse and return it as a Dialogue object.
     """
     print("[LOG] Generating script with tone:", tone, "and length:", target_length)
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
     # Instead of a fixed mapping, parse the numeric minutes from target_length if possible
     # E.g. "3 Mins" -> 3 -> approximate word range
-    # We'll keep a baseline: ~150 words per minute as a rough estimate
     words_per_minute = 150
     numeric_minutes = 3
     match = re.search(r"(\d+)", target_length)
@@ -333,9 +336,19 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
         raise ValueError("Failed to parse dialogue: No JSON found.")
     json_str = raw_content[start_index:end_index+1].strip()
     try:
         data = json.loads(json_str)
         return Dialogue(**data)
     except Exception as e:
         print("[ERROR] JSON decoding failed:", e)
         raise ValueError(f"Failed to parse dialogue: {str(e)}")
@@ -484,6 +497,7 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     # 3) Abbreviations (e.g., NASA -> N A S A)
     def expand_abbreviations(match):
         abbrev = match.group()
         if abbrev.endswith('s') and abbrev[:-1].isupper():
             singular = abbrev[:-1]
             expanded = " ".join(list(singular)) + "s"
@@ -498,8 +512,6 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
     text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
-    # 4) Removed ellipsis insertion after punctuation (no longer applying that transformation here)
     # 5) Intelligent filler insertion after specific keywords (skip for Jane)
     if speaker != "Jane":
         def insert_thinking_pause(m):

     """
     Sends the system_prompt plus input_text to the Groq LLM to generate a
     multi-speaker Dialogue in JSON. We parse and return it as a Dialogue object.
+    QUICK FIX ADDED:
+      - If the LLM returns speakers other than "Jane" or "John,"
+        we force them to "Jane" to satisfy the Pydantic literal constraint.
     """
     print("[LOG] Generating script with tone:", tone, "and length:", target_length)
     groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
     # Instead of a fixed mapping, parse the numeric minutes from target_length if possible
     # E.g. "3 Mins" -> 3 -> approximate word range
     words_per_minute = 150
     numeric_minutes = 3
     match = re.search(r"(\d+)", target_length)
         raise ValueError("Failed to parse dialogue: No JSON found.")
     json_str = raw_content[start_index:end_index+1].strip()
+    # --- QUICK FIX: Post-process to ensure only "Jane"/"John" as speakers ---
     try:
         data = json.loads(json_str)
+        for d in data.get("dialogue", []):
+            if d.get("speaker") not in ["Jane", "John"]:
+                d["speaker"] = "Jane"  # Force to "Jane" or "John" (you could alternate if desired)
         return Dialogue(**data)
+    except json.JSONDecodeError as e:
+        print("[ERROR] JSON decoding (format) failed:", e)
+        raise ValueError(f"Failed to parse dialogue: {str(e)}")
     except Exception as e:
         print("[ERROR] JSON decoding failed:", e)
         raise ValueError(f"Failed to parse dialogue: {str(e)}")
     # 3) Abbreviations (e.g., NASA -> N A S A)
     def expand_abbreviations(match):
         abbrev = match.group()
+        # Check if it's plural
         if abbrev.endswith('s') and abbrev[:-1].isupper():
             singular = abbrev[:-1]
             expanded = " ".join(list(singular)) + "s"
     text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
     # 5) Intelligent filler insertion after specific keywords (skip for Jane)
     if speaker != "Jane":
         def insert_thinking_pause(m):