Spaces:

pawipa
/

speech-to-text

Runtime error

App Files Files Community

pawipa commited on Jan 22

Commit

e76c7d1

1 Parent(s): 249fdb2

first draft without AI

Browse files

Files changed (2) hide show

Fraunhofer-IPA-Logo.jpg +0 -0
app.py +136 -145

Fraunhofer-IPA-Logo.jpg ADDED Viewed

app.py CHANGED Viewed

@@ -1,154 +1,145 @@
 import gradio as gr
-import numpy as np
-import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Speech-to-Text Service")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
             )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
             )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result, seed],
     )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import time
+import os
+import zipfile
+from typing import List, Tuple, Generator
+# Initial status message
+STANDARD_OUTPUT_TEXT = "**Status:**<br>"
+def process_files_with_live_updates(
+    files: List[gr.File],
+    dropdown_option: str,
+    dropdown_option_2: str
+) -> Generator[Tuple[str, List[str]], None, None]:
+    """
+    Processes a list of uploaded files and provides live updates with progress.
+    Args:
+        files (List[gr.File]): List of files uploaded by the user.
+        dropdown_option (str): Selected option from the first dropdown.
+        dropdown_option_2 (str): Selected option from the second dropdown.
+    Yields:
+        Tuple[str, List[str]]: Updated status message and list of processed file paths.
+    """
+    file_details = []
+    total_files = len(files)
+    output_files = []
+    # Create a folder to temporarily store output files
+    output_dir = "output_files"
+    os.makedirs(output_dir, exist_ok=True)
+    for idx, file in enumerate(files):
+        # Simulate file processing
+        time.sleep(1)
+        # Add to file details
+        detail = (
+            f"**File Name**: {file.name} - {dropdown_option} - {dropdown_option_2}<br>"
+        )
+        file_details.append(detail)
+        # Generate a .txt file
+        txt_filename = os.path.join(output_dir, f"output_file_{idx + 1}.txt")
+        with open(txt_filename, "w") as txt_file:
+            txt_file.write(f"Original File Name: {file.name}")
+        output_files.append(txt_filename)
+        # Update progress bar and yield the updated Markdown
+        yield (
+            f"**Status: {int(((idx + 1) / total_files) * 100)}%**<br>" + "".join(file_details),
+            output_files,
+        )
+    # Create a zip archive
+    zip_filename = os.path.join(output_dir, "output_files.zip")
+    with zipfile.ZipFile(zip_filename, "w") as zipf:
+        for file_path in output_files:
+            zipf.write(file_path, os.path.basename(file_path))
+    output_files.append(zip_filename)
+    # Final yield
+    yield (
+        f"**Status: {int(((idx + 1) / total_files) * 100)}%**<br>" + "".join(file_details),
+        output_files,
+    )
+# Gradio app layout
+with gr.Blocks() as demo:
+    # Title and Description
+    gr.Markdown("# AI-Powered Speech-to-Text Batch Processor")
+    gr.Markdown(
+        """
+        Upload multiple audio files, select desired processing options, and view real-time updates as files are transcribed.
+        The application uses advanced AI models for sequential speech-to-text translation.
+        """
+    )
+    # Input section
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.Files(file_types=[".wav", ".mp3"], label="Upload your audio files")
+        with gr.Column():
+            dropdown = gr.Dropdown(
+                choices=["Language: English", "Language: German", "Language: French"],
+                label="Select Language",
+                value="Language: English",
             )
+            dropdown_2 = gr.Dropdown(
+                choices=["Format: Plain Text", "Format: JSON", "Format: SRT"],
+                label="Select Output Format",
+                value="Format: Plain Text",
             )
+    # Buttons
+    with gr.Row():
+        submit_button = gr.Button("Start Transcription")
+        clear_button = gr.Button("Clear")
+    # Output section
+    output_md = gr.Markdown(label="Transcription Progress", value=STANDARD_OUTPUT_TEXT)
+    output_files = gr.Files(label="Generated Output Files")
+    # Button actions
+    submit_button.click(
+        process_files_with_live_updates,
+        inputs=[file_input, dropdown, dropdown_2],
+        outputs=[output_md, output_files],
+    )
+    clear_button.click(
+        lambda: (None, "Language: English", "Format: Plain Text", STANDARD_OUTPUT_TEXT, None),
+        inputs=[],  # No inputs
+        outputs=[file_input, dropdown, dropdown_2, output_md, output_files],
     )
+    gr.Textbox(os.getcwd(), label="Current Working Directory")
+    gr.Image("Fraunhofer-IPA-Logo.jpg", show_label=False)
+    # Centered Footer with Logo and Licensing Text
+    with gr.Row():
+        gr.Markdown(
+            """
+            **Fraunhofer IPA**
+            This application is provided under a basic licensing agreement for non-commercial use only.
+            For inquiries, visit [Fraunhofer IPA](https://www.ipa.fraunhofer.de).
+            """,
+            elem_id="footer-markdown",
+        )
+# CSS to center the footer content
+demo.css = """
+#footer-markdown {
+    text-align: center;
+    margin-top: 20px;
+    padding-top: 10px;
+    border-top: 1px solid #ccc;
+}
+"""
+# Launch app
+demo.launch()