siddhartharyaai commited on
Commit
4df1c08
·
verified ·
1 Parent(s): 9456088

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +15 -3
utils.py CHANGED
@@ -268,13 +268,16 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
268
  """
269
  Sends the system_prompt plus input_text to the Groq LLM to generate a
270
  multi-speaker Dialogue in JSON. We parse and return it as a Dialogue object.
 
 
 
 
271
  """
272
  print("[LOG] Generating script with tone:", tone, "and length:", target_length)
273
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
274
 
275
  # Instead of a fixed mapping, parse the numeric minutes from target_length if possible
276
  # E.g. "3 Mins" -> 3 -> approximate word range
277
- # We'll keep a baseline: ~150 words per minute as a rough estimate
278
  words_per_minute = 150
279
  numeric_minutes = 3
280
  match = re.search(r"(\d+)", target_length)
@@ -333,9 +336,19 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
333
  raise ValueError("Failed to parse dialogue: No JSON found.")
334
 
335
  json_str = raw_content[start_index:end_index+1].strip()
 
 
336
  try:
337
  data = json.loads(json_str)
 
 
 
 
338
  return Dialogue(**data)
 
 
 
 
339
  except Exception as e:
340
  print("[ERROR] JSON decoding failed:", e)
341
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
@@ -484,6 +497,7 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
484
  # 3) Abbreviations (e.g., NASA -> N A S A)
485
  def expand_abbreviations(match):
486
  abbrev = match.group()
 
487
  if abbrev.endswith('s') and abbrev[:-1].isupper():
488
  singular = abbrev[:-1]
489
  expanded = " ".join(list(singular)) + "s"
@@ -498,8 +512,6 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
498
 
499
  text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
500
 
501
- # 4) Removed ellipsis insertion after punctuation (no longer applying that transformation here)
502
-
503
  # 5) Intelligent filler insertion after specific keywords (skip for Jane)
504
  if speaker != "Jane":
505
  def insert_thinking_pause(m):
 
268
  """
269
  Sends the system_prompt plus input_text to the Groq LLM to generate a
270
  multi-speaker Dialogue in JSON. We parse and return it as a Dialogue object.
271
+
272
+ QUICK FIX ADDED:
273
+ - If the LLM returns speakers other than "Jane" or "John,"
274
+ we force them to "Jane" to satisfy the Pydantic literal constraint.
275
  """
276
  print("[LOG] Generating script with tone:", tone, "and length:", target_length)
277
  groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
278
 
279
  # Instead of a fixed mapping, parse the numeric minutes from target_length if possible
280
  # E.g. "3 Mins" -> 3 -> approximate word range
 
281
  words_per_minute = 150
282
  numeric_minutes = 3
283
  match = re.search(r"(\d+)", target_length)
 
336
  raise ValueError("Failed to parse dialogue: No JSON found.")
337
 
338
  json_str = raw_content[start_index:end_index+1].strip()
339
+
340
+ # --- QUICK FIX: Post-process to ensure only "Jane"/"John" as speakers ---
341
  try:
342
  data = json.loads(json_str)
343
+ for d in data.get("dialogue", []):
344
+ if d.get("speaker") not in ["Jane", "John"]:
345
+ d["speaker"] = "Jane" # Force to "Jane" or "John" (you could alternate if desired)
346
+
347
  return Dialogue(**data)
348
+
349
+ except json.JSONDecodeError as e:
350
+ print("[ERROR] JSON decoding (format) failed:", e)
351
+ raise ValueError(f"Failed to parse dialogue: {str(e)}")
352
  except Exception as e:
353
  print("[ERROR] JSON decoding failed:", e)
354
  raise ValueError(f"Failed to parse dialogue: {str(e)}")
 
497
  # 3) Abbreviations (e.g., NASA -> N A S A)
498
  def expand_abbreviations(match):
499
  abbrev = match.group()
500
+ # Check if it's plural
501
  if abbrev.endswith('s') and abbrev[:-1].isupper():
502
  singular = abbrev[:-1]
503
  expanded = " ".join(list(singular)) + "s"
 
512
 
513
  text = re.sub(r"\b[A-Z]{2,}s?\b", expand_abbreviations, text)
514
 
 
 
515
  # 5) Intelligent filler insertion after specific keywords (skip for Jane)
516
  if speaker != "Jane":
517
  def insert_thinking_pause(m):