Spaces:

tangchao5355
/

tangchao

Sleeping

App Files Files Community

tangchao5355 commited on about 1 month ago

Commit

925ae35

verified ·

1 Parent(s): b7205d1

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -41

app.py CHANGED Viewed

@@ -1,83 +1,126 @@
-# app.py
 import gradio as gr
 import torch
 from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
 from diffusers import StableDiffusionPipeline
 import speech_recognition as sr
-from io import BytesIO
-# ========== Step 1: Prompt Enhancement ==========
-prompt_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
-prompt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
 def enhance_prompt(raw_input, style_choice):
     template = f"Generate a detailed Stable Diffusion prompt about: {raw_input} in {style_choice} style."
     inputs = prompt_tokenizer(template, return_tensors="pt")
     outputs = prompt_model.generate(inputs.input_ids, max_length=100)
     return prompt_tokenizer.decode(outputs[0], skip_special_tokens=True)
-# ========== Step 2: Image Generation ==========
-sd_pipe = StableDiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    torch_dtype=torch.float32,
-    use_safetensors=True
-)
-sd_pipe.enable_attention_slicing()  # 降低内存消耗
 def generate_image(enhanced_prompt, steps=20, guidance=7.5):
-    return sd_pipe(
-        enhanced_prompt,
-        num_inference_steps=int(steps),
-        guidance_scale=guidance,
-        generator=torch.Generator().manual_seed(42)
-    ).images[0]
-# ========== Step 3: Voice Input ==========
 recognizer = sr.Recognizer()
 def audio_to_text(audio_file):
-    with sr.AudioFile(audio_file) as source:
-        audio = recognizer.record(source)
-        return recognizer.recognize_whisper(audio, model="tiny.en")
-# ========== Gradio Interface ==========
-with gr.Blocks(title="AI Art Studio") as app:
-    gr.Markdown("## 🎨 AI Art Generator (CPU Optimized)")
     with gr.Row():
         with gr.Column(scale=2):
-            # ===== 交互控件 =====
-            input_type = gr.Radio(["Text", "Voice"], label="输入方式")
-            voice_input = gr.Audio(source="upload", type="filepath", visible=False)
-            text_input = gr.Textbox(label="输入描述", placeholder="描述你想生成的画面...")
             style_choice = gr.Dropdown(
-                ["Digital Art", "Oil Painting", "Anime", "Photorealistic"],
-                value="Digital Art",
                 label="艺术风格"
             )
             generate_btn = gr.Button("生成作品", variant="primary")
             with gr.Accordion("高级设置", open=False):
                 steps_slider = gr.Slider(10, 30, value=20, step=1, label="生成步数")
                 guidance_slider = gr.Slider(5.0, 10.0, value=7.5, label="创意自由度")
         with gr.Column(scale=3):
-            # ===== 输出展示 =====
             prompt_output = gr.Textbox(label="优化后的Prompt", interactive=False)
-            image_output = gr.Image(label="生成结果", show_label=False)
-    # ===== 交互逻辑 =====
     input_type.change(
-        fn=lambda x: (gr.update(visible=x=="Voice"), gr.update(visible=x=="Text")),
         inputs=input_type,
-        outputs=[voice_input, text_input]
     )
     generate_btn.click(
-        fn=lambda x,t: audio_to_text(x) if t=="Voice" else t,
-        inputs=[voice_input, input_type],
         outputs=text_input
     ).success(
         fn=enhance_prompt,
@@ -89,6 +132,5 @@ with gr.Blocks(title="AI Art Studio") as app:
         outputs=image_output
     )
-# ========== Step 4: Huggingface Deployment ==========
 if __name__ == "__main__":
     app.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import torch
 from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
 from diffusers import StableDiffusionPipeline
 import speech_recognition as sr
+import gc
+from accelerate import init_empty_weights
+# ===== 模型初始化 =====
+def load_models():
+    # Prompt增强模型
+    prompt_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
+    prompt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+    # Stable Diffusion管道
+    sd_pipe = StableDiffusionPipeline.from_pretrained(
+        "runwayml/stable-diffusion-v1-5",
+        torch_dtype=torch.float32,
+        use_safetensors=True,
+        variant="fp16",
+        device_map="auto",
+        offload_state_dict=True
+    )
+    sd_pipe.enable_attention_slicing()
+    sd_pipe.enable_sequential_cpu_offload()
+    return prompt_model, prompt_tokenizer, sd_pipe
+prompt_model, prompt_tokenizer, sd_pipe = load_models()
+# ===== 核心功能 =====
 def enhance_prompt(raw_input, style_choice):
     template = f"Generate a detailed Stable Diffusion prompt about: {raw_input} in {style_choice} style."
     inputs = prompt_tokenizer(template, return_tensors="pt")
     outputs = prompt_model.generate(inputs.input_ids, max_length=100)
     return prompt_tokenizer.decode(outputs[0], skip_special_tokens=True)
 def generate_image(enhanced_prompt, steps=20, guidance=7.5):
+    try:
+        image = sd_pipe(
+            enhanced_prompt,
+            num_inference_steps=int(steps),
+            guidance_scale=guidance,
+            generator=torch.Generator().manual_seed(42)
+        ).images[0]
+    finally:
+        # 清理内存
+        gc.collect()
+        with init_empty_weights():
+            reload_models()
+    return image
+def reload_models():
+    global sd_pipe
+    del sd_pipe
+    sd_pipe = StableDiffusionPipeline.from_pretrained(
+        "runwayml/stable-diffusion-v1-5",
+        torch_dtype=torch.float32,
+        device_map="auto",
+        offload_folder="offload"
+    )
+# ===== 语音处理 =====
 recognizer = sr.Recognizer()
 def audio_to_text(audio_file):
+    if not audio_file:
+        return ""
+    try:
+        with sr.AudioFile(audio_file) as source:
+            audio = recognizer.record(source)
+            return recognizer.recognize_whisper(audio, model="tiny.en")
+    except Exception as e:
+        print(f"语音识别错误: {e}")
+        return ""
+# ===== Gradio界面 =====
+with gr.Blocks(title="AI Art Studio", css=".gradio-container {max-width: 800px !important}") as app:
+    gr.Markdown("## 🎨 AI 艺术生成器 (CPU优化版)")
     with gr.Row():
         with gr.Column(scale=2):
+            # 输入控件
+            input_type = gr.Radio(["文字", "语音"], label="输入方式", value="文字")
+            voice_input = gr.Audio(
+                sources=["upload"],
+                type="filepath",
+                visible=False,
+                label="上传语音文件",
+                elem_classes="voice-input"
+            )
+            text_input = gr.Textbox(label="输入描述", placeholder="例：空中的魔法树屋...", lines=3)
+            # 风格选择
             style_choice = gr.Dropdown(
+                ["数字艺术", "油画", "动漫", "照片写实"],
+                value="数字艺术",
                 label="艺术风格"
             )
+            # 生成按钮
             generate_btn = gr.Button("生成作品", variant="primary")
+            # 高级设置
             with gr.Accordion("高级设置", open=False):
                 steps_slider = gr.Slider(10, 30, value=20, step=1, label="生成步数")
                 guidance_slider = gr.Slider(5.0, 10.0, value=7.5, label="创意自由度")
         with gr.Column(scale=3):
+            # 输出展示
             prompt_output = gr.Textbox(label="优化后的Prompt", interactive=False)
+            image_output = gr.Image(label="生成结果", show_label=False, elem_id="output-image")
+    # 交互逻辑
     input_type.change(
+        fn=lambda x: gr.update(visible=x == "语音"),
         inputs=input_type,
+        outputs=voice_input
     )
     generate_btn.click(
+        fn=audio_to_text,
+        inputs=voice_input,
         outputs=text_input
     ).success(
         fn=enhance_prompt,
         outputs=image_output
     )
 if __name__ == "__main__":
     app.launch(server_name="0.0.0.0", server_port=7860)