Spaces:

tangchao5355
/

tangchao

Sleeping

App Files Files Community

tangchao5355 commited on 15 days ago

Commit

cd3f05b

verified ·

1 Parent(s): 46842e6

Create app.py

Browse files

Files changed (1) hide show

app.py +94 -0

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# app.py
+import gradio as gr
+import torch
+from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
+from diffusers import StableDiffusionPipeline
+import speech_recognition as sr
+from io import BytesIO
+# ========== Step 1: Prompt Enhancement ==========
+prompt_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
+prompt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+def enhance_prompt(raw_input, style_choice):
+    template = f"Generate a detailed Stable Diffusion prompt about: {raw_input} in {style_choice} style."
+    inputs = prompt_tokenizer(template, return_tensors="pt")
+    outputs = prompt_model.generate(inputs.input_ids, max_length=100)
+    return prompt_tokenizer.decode(outputs[0], skip_special_tokens=True)
+# ========== Step 2: Image Generation ==========
+sd_pipe = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5",
+    torch_dtype=torch.float32,
+    use_safetensors=True
+)
+sd_pipe.enable_attention_slicing()  # 降低内存消耗
+def generate_image(enhanced_prompt, steps=20, guidance=7.5):
+    return sd_pipe(
+        enhanced_prompt,
+        num_inference_steps=int(steps),
+        guidance_scale=guidance,
+        generator=torch.Generator().manual_seed(42)
+    ).images[0]
+# ========== Step 3: Voice Input ==========
+recognizer = sr.Recognizer()
+def audio_to_text(audio_file):
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+        return recognizer.recognize_whisper(audio, model="tiny.en")
+# ========== Gradio Interface ==========
+with gr.Blocks(title="AI Art Studio") as app:
+    gr.Markdown("## 🎨 AI Art Generator (CPU Optimized)")
+    with gr.Row():
+        with gr.Column(scale=2):
+            # ===== 交互控件 =====
+            input_type = gr.Radio(["Text", "Voice"], label="输入方式")
+            voice_input = gr.Audio(source="upload", type="filepath", visible=False)
+            text_input = gr.Textbox(label="输入描述", placeholder="描述你想生成的画面...")
+            style_choice = gr.Dropdown(
+                ["Digital Art", "Oil Painting", "Anime", "Photorealistic"],
+                value="Digital Art",
+                label="艺术风格"
+            )
+            generate_btn = gr.Button("生成作品", variant="primary")
+            with gr.Accordion("高级设置", open=False):
+                steps_slider = gr.Slider(10, 30, value=20, step=1, label="生成步数")
+                guidance_slider = gr.Slider(5.0, 10.0, value=7.5, label="创意自由度")
+        with gr.Column(scale=3):
+            # ===== 输出展示 =====
+            prompt_output = gr.Textbox(label="优化后的Prompt", interactive=False)
+            image_output = gr.Image(label="生成结果", show_label=False)
+    # ===== 交互逻辑 =====
+    input_type.change(
+        fn=lambda x: (gr.update(visible=x=="Voice"), gr.update(visible=x=="Text")),
+        inputs=input_type,
+        outputs=[voice_input, text_input]
+    )
+    generate_btn.click(
+        fn=lambda x,t: audio_to_text(x) if t=="Voice" else t,
+        inputs=[voice_input, input_type],
+        outputs=text_input
+    ).success(
+        fn=enhance_prompt,
+        inputs=[text_input, style_choice],
+        outputs=prompt_output
+    ).success(
+        fn=generate_image,
+        inputs=[prompt_output, steps_slider, guidance_slider],
+        outputs=image_output
+    )
+# ========== Step 4: Huggingface Deployment ==========
+if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)