Spaces:

Prof-Hunt
/

TECH_TALES

Runtime error

App Files Files Community

Prof-Hunt commited on Jan 30

Commit

01c7a6f

verified ·

1 Parent(s): a5abdd6

Update app.py

Browse files

Files changed (1) hide show

app.py +313 -260

app.py CHANGED Viewed

@@ -8,49 +8,68 @@ import textwrap
 import os
 import gc
 import re
 from datetime import datetime
 import spaces
 from kokoro import KPipeline
 import soundfile as sf
-# Initialize models at startup - outside of functions
 print("Loading models...")
 # Load SmolVLM for image analysis
 processor_vlm = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-500M-Instruct")
 model_vlm = AutoModelForVision2Seq.from_pretrained(
     "HuggingFaceTB/SmolVLM-500M-Instruct",
-    torch_dtype=torch.bfloat16,
-    use_safetensors=True
-)
 # Load SmolLM2 for story and prompt generation
 checkpoint = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 tokenizer_lm = AutoTokenizer.from_pretrained(checkpoint)
-model_lm = AutoModelForCausalLM.from_pretrained(
-    checkpoint,
-    use_safetensors=True
-)
-# Load Stable Diffusion pipeline
-pipe = StableDiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    torch_dtype=torch.float16,
-    use_safetensors=True
-)
-pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-# Move models to GPU if available
-if torch.cuda.is_available():
-    model_vlm = model_vlm.to("cuda")
-    model_lm = model_lm.to("cuda")
-    pipe = pipe.to("cuda")
 @torch.inference_mode()
 @spaces.GPU(duration=30)
 def generate_image():
     """Generate a random landscape image."""
-    torch.cuda.empty_cache()
     default_prompt = "a beautiful, professional landscape photograph"
     default_negative_prompt = "blurry, bad quality, distorted, deformed"
@@ -60,15 +79,25 @@ def generate_image():
     generator = torch.Generator("cuda").manual_seed(default_seed)
-    image = pipe(
-        prompt=default_prompt,
-        negative_prompt=default_negative_prompt,
-        num_inference_steps=default_steps,
-        guidance_scale=default_guidance,
-        generator=generator,
-    ).images[0]
-    return image
 @torch.inference_mode()
 @spaces.GPU(duration=30)
@@ -76,7 +105,7 @@ def analyze_image(image):
     if image is None:
         return "Please generate an image first."
-    torch.cuda.empty_cache()
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
@@ -86,38 +115,49 @@ def analyze_image(image):
             "role": "user",
             "content": [
                 {"type": "image"},
-                {"type": "text", "text": "Describe this image very briefly in five sentences or less. Short description."}
             ]
         }
     ]
-    prompt = processor_vlm.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor_vlm(
-        text=prompt,
-        images=[image],
-        return_tensors="pt"
-    ).to('cuda')
-    outputs = model_vlm.generate(
-        input_ids=inputs.input_ids,
-        pixel_values=inputs.pixel_values,
-        attention_mask=inputs.attention_mask,
-        num_return_sequences=1,
-        no_repeat_ngram_size=2,
-        max_new_tokens=500,
-        min_new_tokens=10
-    )
-    description = processor_vlm.decode(outputs[0], skip_special_tokens=True)
-    description = re.sub(r".*?Assistant:\s*", "", description, flags=re.DOTALL).strip()
-    return description
 @torch.inference_mode()
 @spaces.GPU(duration=30)
 def generate_story(image_description):
-    torch.cuda.empty_cache()
     story_prompt = f"""Write a short children's story (one chapter, about 500 words) based on this scene: {image_description}
@@ -128,74 +168,89 @@ def generate_story(image_description):
     4. Keep it simple and engaging for young children
     5. End with a simple moral lesson"""
-    messages = [{"role": "user", "content": story_prompt}]
-    input_text = tokenizer_lm.apply_chat_template(messages, tokenize=False)
-    inputs = tokenizer_lm.encode(input_text, return_tensors="pt").to("cuda")
-    outputs = model_lm.generate(
-        inputs,
-        max_new_tokens=750,
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True,
-        repetition_penalty=1.2
-    )
-    story = tokenizer_lm.decode(outputs[0])
-    story = clean_story_output(story)
-    return story
-@torch.inference_mode()
-@spaces.GPU(duration=30)
-def generate_image_prompts(story_text):
-    torch.cuda.empty_cache()
-    paragraphs = split_into_paragraphs(story_text)
-    all_prompts = []
-    prompt_instruction = '''Here is a story paragraph: {paragraph}
-    Start your response with "Watercolor bulldog" and describe what Champ is doing in this scene. Add where it takes place and one mood detail. Keep it short.'''
-    for i, paragraph in enumerate(paragraphs, 1):
-        messages = [{"role": "user", "content": prompt_instruction.format(paragraph=paragraph)}]
         input_text = tokenizer_lm.apply_chat_template(messages, tokenize=False)
         inputs = tokenizer_lm.encode(input_text, return_tensors="pt").to("cuda")
         outputs = model_lm.generate(
             inputs,
-            max_new_tokens=30,
-            temperature=0.5,
             top_p=0.9,
             do_sample=True,
             repetition_penalty=1.2
         )
-        prompt = process_generated_prompt(tokenizer_lm.decode(outputs[0]), paragraph)
-        section = f"Paragraph {i}:\n{paragraph}\n\nScenery Prompt {i}:\n{prompt}\n\n{'='*50}"
-        all_prompts.append(section)
-    return '\n'.join(all_prompts)
 @torch.inference_mode()
 @spaces.GPU(duration=60)
 def generate_story_image(prompt, seed=-1):
-    """Generate an image using Stable Diffusion with LoRA temporarily loaded."""
-    torch.cuda.empty_cache()
-    generator = torch.Generator("cuda")
-    if seed != -1:
-        generator.manual_seed(seed)
-    else:
-        generator.manual_seed(torch.randint(0, 2**32 - 1, (1,)).item())
-    enhanced_prompt = f"{prompt}, watercolor style, children's book illustration, soft colors"
     try:
-        # Load LoRA only for this function
         pipe.load_lora_weights("Prof-Hunt/lora-bulldog")
         image = pipe(
             prompt=enhanced_prompt,
@@ -205,19 +260,24 @@ def generate_story_image(prompt, seed=-1):
             generator=generator
         ).images[0]
-        # Unload LoRA properly
         pipe.unload_lora_weights()
-        torch.cuda.empty_cache()
     except Exception as e:
         print(f"Error generating image: {e}")
         return None
-    return image
 @torch.inference_mode()
 @spaces.GPU(duration=180)
 def generate_all_scenes(prompts_text):
     generated_images = []
     formatted_prompts = []
@@ -227,43 +287,168 @@ def generate_all_scenes(prompts_text):
         if not section.strip():
             continue
-        lines = [line.strip() for line in section.split('\n') if line.strip()]
         scene_prompt = None
-        for i, line in enumerate(lines):
             if 'Scenery Prompt' in line:
                 scene_num = line.split('Scenery Prompt')[1].split(':')[0].strip()
-                if i + 1 < len(lines):
-                    scene_prompt = lines[i + 1]
                     formatted_prompts.append(f"Scene {scene_num}: {scene_prompt}")
                 break
         if scene_prompt:
             try:
-                torch.cuda.empty_cache()
-                print(f"Generating image for scene: {scene_prompt}")  # Debugging
                 image = generate_story_image(scene_prompt)
                 if image is not None:
                     img_array = np.array(image)
-                    # Ensure the image is valid
-                    if img_array.shape[0] > 0:
-                        generated_images.append(img_array)
-                torch.cuda.empty_cache()
             except Exception as e:
                 print(f"Error generating image: {str(e)}")
                 continue
-    print(f"Generated {len(generated_images)} images.")
     return generated_images, "\n\n".join(formatted_prompts)
-# Helper functions without GPU usage
 def clean_story_output(story):
     story = story.replace("<|im_end|>", "")
     story_start = story.find("Once upon")
@@ -288,6 +473,7 @@ def clean_story_output(story):
     return '\n\n'.join(cleaned_lines).strip()
 def split_into_paragraphs(text):
     paragraphs = []
     current_paragraph = []
@@ -308,6 +494,7 @@ def split_into_paragraphs(text):
                         'keep it simple', 'end with', 'write a'])]
 def process_generated_prompt(prompt, paragraph):
     prompt = prompt.replace("<|im_start|>", "").replace("<|im_end|>", "")
     prompt = prompt.replace("assistant", "").replace("system", "").replace("user", "")
@@ -326,143 +513,9 @@ def process_generated_prompt(prompt, paragraph):
     return prompt
-def overlay_text_on_image(image, text):
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    img = image.convert('RGB')
-    draw = ImageDraw.Draw(img)
-    try:
-        font_size = int(img.width * 0.025)
-        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
-    except:
-        font = ImageFont.load_default()
-    y_position = int(img.height * 0.005)
-    x_margin = int(img.width * 0.005)
-    available_width = img.width - (2 * x_margin)
-    wrapped_text = textwrap.fill(text, width=int(available_width / (font_size * 0.6)))
-    outline_color = (255, 255, 255)
-    text_color = (0, 0, 0)
-    offsets = [-2, -1, 1, 2]
-    for dx in offsets:
-        for dy in offsets:
-            draw.multiline_text(
-                (x_margin + dx, y_position + dy),
-                wrapped_text,
-                font=font,
-                fill=outline_color
-            )
-    draw.multiline_text(
-        (x_margin, y_position),
-        wrapped_text,
-        font=font,
-        fill=text_color
-    )
-    return img
-# Initialize Kokoro TTS pipeline
-pipeline = KPipeline(lang_code='a')  # 'a' for American English
-def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
-    """Generate a single audio file for all paragraphs in the story."""
-    if not story_text:
-        return None
-    # Split story into paragraphs
-    paragraphs = []
-    current_paragraph = []
-    for line in story_text.split('\n'):
-        line = line.strip()
-        if not line:  # Empty line indicates paragraph break
-            if current_paragraph:
-                paragraphs.append(' '.join(current_paragraph))
-                current_paragraph = []
-        else:
-            current_paragraph.append(line)
-    if current_paragraph:
-        paragraphs.append(' '.join(current_paragraph))
-    # Combine audio for all paragraphs
-    combined_audio = []
-    for paragraph in paragraphs:
-        if not paragraph.strip():
-            continue  # Skip empty paragraphs
-        generator = pipeline(
-            paragraph,
-            voice=voice,
-            speed=speed,
-            split_pattern=r'\n+'  # Split on newlines
-        )
-        for _, _, audio in generator:
-            combined_audio.extend(audio)  # Append audio data
-    # Convert combined audio to NumPy array and save
-    combined_audio = np.array(combined_audio)
-    filename = "combined_story.wav"
-    sf.write(filename, combined_audio, 24000)  # Save audio as .wav
-    return filename
-def add_text_to_scenes(gallery_images, prompts_text):
-    if not isinstance(gallery_images, list):
-        return [], []
-    sections = prompts_text.split('='*50)
-    overlaid_images = []
-    output_files = []
-    temp_dir = "temp_book_pages"
-    os.makedirs(temp_dir, exist_ok=True)
-    for i, (image_data, section) in enumerate(zip(gallery_images, sections)):
-        if not section.strip():
-            continue
-        lines = [line.strip() for line in section.split('\n') if line.strip()]
-        paragraph = None
-        for j, line in enumerate(lines):
-            if line.startswith('Paragraph'):
-                if j + 1 < len(lines):
-                    paragraph = lines[j + 1]
-                    break
-        if paragraph and image_data is not None:
-            try:
-                overlaid_img = overlay_text_on_image(image_data, paragraph)
-                if overlaid_img is not None:
-                    overlaid_array = np.array(overlaid_img)
-                    overlaid_images.append(overlaid_array)
-                    output_path = os.path.join(temp_dir, f"panel_{i+1}.png")
-                    overlaid_img.save(output_path)
-                    output_files.append(output_path)
-            except Exception as e:
-                print(f"Error processing image: {str(e)}")
-                continue
-    return overlaid_images, output_files
 def create_interface():
-    theme = gr.themes.Soft().set(
-        body_background_fill="*primary_50",
-        button_primary_background_fill="rgb(173, 216, 230)",  # light blue
-        button_secondary_background_fill="rgb(255, 182, 193)",  # light red
-        button_primary_background_fill_hover="rgb(135, 206, 235)",  # slightly darker blue for hover
-        button_secondary_background_fill_hover="rgb(255, 160, 180)",  # slightly darker red for hover
-        block_title_text_color="*primary_500",
-        block_label_text_color="*secondary_500",
-    )
-    with gr.Blocks(theme=theme) as demo:
         gr.Markdown("# Tech Tales: Story Creation")
         with gr.Row():

 import os
 import gc
 import re
+import psutil
 from datetime import datetime
 import spaces
 from kokoro import KPipeline
 import soundfile as sf
+def clear_memory():
+    """Helper function to clear both CUDA and system memory"""
+    gc.collect()
+    torch.cuda.empty_cache()
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+    process = psutil.Process(os.getpid())
+    if hasattr(process, 'memory_info'):
+        process.memory_info().rss
+    gc.collect(generation=0)
+    gc.collect(generation=1)
+    gc.collect(generation=2)
+    if torch.cuda.is_available():
+        print(f"GPU Memory allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
+        print(f"GPU Memory cached: {torch.cuda.memory_reserved()/1024**2:.2f} MB")
+    print(f"CPU RAM used: {process.memory_info().rss/1024**2:.2f} MB")
+# Initialize models at startup - only the lightweight ones
 print("Loading models...")
 # Load SmolVLM for image analysis
 processor_vlm = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-500M-Instruct")
 model_vlm = AutoModelForVision2Seq.from_pretrained(
     "HuggingFaceTB/SmolVLM-500M-Instruct",
+    torch_dtype=torch.bfloat16
+).to("cuda")
 # Load SmolLM2 for story and prompt generation
 checkpoint = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 tokenizer_lm = AutoTokenizer.from_pretrained(checkpoint)
+model_lm = AutoModelForCausalLM.from_pretrained(checkpoint).to("cuda")
+# Initialize Kokoro TTS pipeline
+pipeline = KPipeline(lang_code='a')  # 'a' for American English
+def load_sd_model():
+    """Load Stable Diffusion model only when needed"""
+    pipe = StableDiffusionPipeline.from_pretrained(
+        "runwayml/stable-diffusion-v1-5",
+        torch_dtype=torch.float16,
+    )
+    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+    pipe.to("cuda")
+    pipe.enable_attention_slicing()
+    return pipe
 @torch.inference_mode()
 @spaces.GPU(duration=30)
 def generate_image():
     """Generate a random landscape image."""
+    clear_memory()
+    pipe = load_sd_model()
     default_prompt = "a beautiful, professional landscape photograph"
     default_negative_prompt = "blurry, bad quality, distorted, deformed"
     generator = torch.Generator("cuda").manual_seed(default_seed)
+    try:
+        image = pipe(
+            prompt=default_prompt,
+            negative_prompt=default_negative_prompt,
+            num_inference_steps=default_steps,
+            guidance_scale=default_guidance,
+            generator=generator,
+        ).images[0]
+        del pipe
+        clear_memory()
+        return image
+    except Exception as e:
+        print(f"Error generating image: {e}")
+        if 'pipe' in locals():
+            del pipe
+        clear_memory()
+        return None
 @torch.inference_mode()
 @spaces.GPU(duration=30)
     if image is None:
         return "Please generate an image first."
+    clear_memory()
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
             "role": "user",
             "content": [
                 {"type": "image"},
+                {"type": "text", "text": "Describe this image and Be brief but descriptive."}
             ]
         }
     ]
+    try:
+        prompt = processor_vlm.apply_chat_template(messages, add_generation_prompt=True)
+        inputs = processor_vlm(
+            text=prompt,
+            images=[image],
+            return_tensors="pt"
+        ).to('cuda')
+        outputs = model_vlm.generate(
+            input_ids=inputs.input_ids,
+            pixel_values=inputs.pixel_values,
+            attention_mask=inputs.attention_mask,
+            num_return_sequences=1,
+            no_repeat_ngram_size=2,
+            max_new_tokens=500,
+            min_new_tokens=10
+        )
+        description = processor_vlm.decode(outputs[0], skip_special_tokens=True)
+        description = re.sub(r".*?Assistant:\s*", "", description, flags=re.DOTALL).strip()
+        # Split into sentences and take only the first three
+        sentences = re.split(r'(?<=[.!?])\s+', description)
+        description = ' '.join(sentences[:3])
+        clear_memory()
+        return description
+    except Exception as e:
+        print(f"Error analyzing image: {e}")
+        clear_memory()
+        return "Error analyzing image. Please try again."
 @torch.inference_mode()
 @spaces.GPU(duration=30)
 def generate_story(image_description):
+    clear_memory()
     story_prompt = f"""Write a short children's story (one chapter, about 500 words) based on this scene: {image_description}
     4. Keep it simple and engaging for young children
     5. End with a simple moral lesson"""
+    try:
+        messages = [{"role": "user", "content": story_prompt}]
         input_text = tokenizer_lm.apply_chat_template(messages, tokenize=False)
         inputs = tokenizer_lm.encode(input_text, return_tensors="pt").to("cuda")
         outputs = model_lm.generate(
             inputs,
+            max_new_tokens=750,
+            temperature=0.7,
             top_p=0.9,
             do_sample=True,
             repetition_penalty=1.2
         )
+        story = tokenizer_lm.decode(outputs[0])
+        story = clean_story_output(story)
+        clear_memory()
+        return story
+    except Exception as e:
+        print(f"Error generating story: {e}")
+        clear_memory()
+        return "Error generating story. Please try again."
+@torch.inference_mode()
+@spaces.GPU(duration=30)
+def generate_image_prompts(story_text):
+    clear_memory()
+    paragraphs = split_into_paragraphs(story_text)
+    all_prompts = []
+    prompt_instruction = '''Here is a story paragraph: {paragraph}
+    Start your response with "Watercolor bulldog" and describe what Champ is doing in this scene. Add where it takes place and one mood detail. Keep it short.'''
+    try:
+        for i, paragraph in enumerate(paragraphs, 1):
+            messages = [{"role": "user", "content": prompt_instruction.format(paragraph=paragraph)}]
+            input_text = tokenizer_lm.apply_chat_template(messages, tokenize=False)
+            inputs = tokenizer_lm.encode(input_text, return_tensors="pt").to("cuda")
+            outputs = model_lm.generate(
+                inputs,
+                max_new_tokens=30,
+                temperature=0.5,
+                top_p=0.9,
+                do_sample=True,
+                repetition_penalty=1.2
+            )
+            prompt = process_generated_prompt(tokenizer_lm.decode(outputs[0]), paragraph)
+            section = f"Paragraph {i}:\n{paragraph}\n\nScenery Prompt {i}:\n{prompt}\n\n{'='*50}"
+            all_prompts.append(section)
+            clear_memory()
+        return '\n'.join(all_prompts)
+    except Exception as e:
+        print(f"Error generating prompts: {e}")
+        clear_memory()
+        return "Error generating prompts. Please try again."
 @torch.inference_mode()
 @spaces.GPU(duration=60)
 def generate_story_image(prompt, seed=-1):
+    clear_memory()
+    pipe = load_sd_model()
     try:
         pipe.load_lora_weights("Prof-Hunt/lora-bulldog")
+        generator = torch.Generator("cuda")
+        if seed != -1:
+            generator.manual_seed(seed)
+        else:
+            generator.manual_seed(torch.randint(0, 2**32 - 1, (1,)).item())
+        enhanced_prompt = f"{prompt}, watercolor style, children's book illustration, soft colors"
         image = pipe(
             prompt=enhanced_prompt,
             generator=generator
         ).images[0]
         pipe.unload_lora_weights()
+        del pipe
+        clear_memory()
+        return image
     except Exception as e:
         print(f"Error generating image: {e}")
+        if 'pipe' in locals():
+            pipe.unload_lora_weights()
+            del pipe
+        clear_memory()
         return None
 @torch.inference_mode()
 @spaces.GPU(duration=180)
 def generate_all_scenes(prompts_text):
+    clear_memory()
     generated_images = []
     formatted_prompts = []
         if not section.strip():
             continue
         scene_prompt = None
+        for line in section.split('\n'):
             if 'Scenery Prompt' in line:
                 scene_num = line.split('Scenery Prompt')[1].split(':')[0].strip()
+                next_line_index = section.split('\n').index(line) + 1
+                if next_line_index < len(section.split('\n')):
+                    scene_prompt = section.split('\n')[next_line_index].strip()
                     formatted_prompts.append(f"Scene {scene_num}: {scene_prompt}")
                 break
         if scene_prompt:
             try:
+                clear_memory()
+                print(f"Generating image for scene: {scene_prompt}")
                 image = generate_story_image(scene_prompt)
                 if image is not None:
                     img_array = np.array(image)
+                    generated_images.append(img_array)
+                clear_memory()
             except Exception as e:
                 print(f"Error generating image: {str(e)}")
+                clear_memory()
                 continue
     return generated_images, "\n\n".join(formatted_prompts)
+def overlay_text_on_image(image, text):
+    if image is None:
+        return None
+    try:
+        img = image.convert('RGB')
+        draw = ImageDraw.Draw(img)
+        font_size = int(img.width * 0.025)
+        try:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
+        except:
+            font = ImageFont.load_default()
+        y_position = int(img.height * 0.005)
+        x_margin = int(img.width * 0.005)
+        available_width = img.width - (2 * x_margin)
+        wrapped_text = textwrap.fill(text, width=int(available_width / (font_size * 0.6)))
+        outline_color = (255, 255, 255)
+        text_color = (0, 0, 0)
+        offsets = [-2, -1, 1, 2]
+        for dx in offsets:
+            for dy in offsets:
+                draw.multiline_text(
+                    (x_margin + dx, y_position + dy),
+                    wrapped_text,
+                    font=font,
+                    fill=outline_color
+                )
+        draw.multiline_text(
+            (x_margin, y_position),
+            wrapped_text,
+            font=font,
+            fill=text_color
+        )
+        return img
+    except Exception as e:
+        print(f"Error overlaying text: {e}")
+        return None
+def add_text_to_scenes(gallery_images, prompts_text):
+    if not isinstance(gallery_images, list):
+        return [], []
+    clear_memory()
+    sections = prompts_text.split('='*50)
+    overlaid_images = []
+    output_files = []
+    temp_dir = "temp_book_pages"
+    os.makedirs(temp_dir, exist_ok=True)
+    for i, (image_data, section) in enumerate(zip(gallery_images, sections)):
+        if not section.strip():
+            continue
+        lines = [line.strip() for line in section.split('\n') if line.strip()]
+        paragraph = None
+        for j, line in enumerate(lines):
+            if line.startswith('Paragraph'):
+                if j + 1 < len(lines):
+                    paragraph = lines[j + 1]
+                    break
+        if paragraph and image_data is not None:
+            try:
+                if isinstance(image_data, np.ndarray):
+                    image = Image.fromarray(image_data)
+                else:
+                    image = image_data
+                overlaid_img = overlay_text_on_image(image, paragraph)
+                if overlaid_img is not None:
+                    overlaid_array = np.array(overlaid_img)
+                    overlaid_images.append(overlaid_array)
+                    output_path = os.path.join(temp_dir, f"panel_{i+1}.png")
+                    overlaid_img.save(output_path)
+                    output_files.append(output_path)
+            except Exception as e:
+                print(f"Error processing image: {str(e)}")
+                continue
+    clear_memory()
+    return overlaid_images, output_files
+def generate_combined_audio_from_story(story_text, voice='af_heart', speed=1):
+    clear_memory()
+    if not story_text:
+        return None
+    paragraphs = split_into_paragraphs(story_text)
+    combined_audio = []
+    try:
+        for paragraph in paragraphs:
+            if not paragraph.strip():
+                continue
+            generator = pipeline(
+                paragraph,
+                voice=voice,
+                speed=speed,
+                split_pattern=r'\n+'
+            )
+            for _, _, audio in generator:
+                combined_audio.extend(audio)
+        # Convert combined audio to NumPy array and save
+        combined_audio = np.array(combined_audio)
+        filename = "combined_story.wav"
+        sf.write(filename, combined_audio, 24000)  # Save audio as .wav
+        clear_memory()
+        return filename
+    except Exception as e:
+        print(f"Error generating audio: {e}")
+        clear_memory()
+        return None
+# Helper functions
 def clean_story_output(story):
+    """Clean up the generated story text."""
     story = story.replace("<|im_end|>", "")
     story_start = story.find("Once upon")
     return '\n\n'.join(cleaned_lines).strip()
 def split_into_paragraphs(text):
+    """Split text into paragraphs."""
     paragraphs = []
     current_paragraph = []
                         'keep it simple', 'end with', 'write a'])]
 def process_generated_prompt(prompt, paragraph):
+    """Process and clean up generated image prompts."""
     prompt = prompt.replace("<|im_start|>", "").replace("<|im_end|>", "")
     prompt = prompt.replace("assistant", "").replace("system", "").replace("user", "")
     return prompt
+# Create the interface
 def create_interface():
+    with gr.Blocks() as demo:
         gr.Markdown("# Tech Tales: Story Creation")
         with gr.Row():