tangchao5355 commited on
Commit
cd3f05b
·
verified ·
1 Parent(s): 46842e6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
5
+ from diffusers import StableDiffusionPipeline
6
+ import speech_recognition as sr
7
+ from io import BytesIO
8
+
9
+ # ========== Step 1: Prompt Enhancement ==========
10
+ prompt_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
11
+ prompt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
12
+
13
+ def enhance_prompt(raw_input, style_choice):
14
+ template = f"Generate a detailed Stable Diffusion prompt about: {raw_input} in {style_choice} style."
15
+ inputs = prompt_tokenizer(template, return_tensors="pt")
16
+ outputs = prompt_model.generate(inputs.input_ids, max_length=100)
17
+ return prompt_tokenizer.decode(outputs[0], skip_special_tokens=True)
18
+
19
+ # ========== Step 2: Image Generation ==========
20
+ sd_pipe = StableDiffusionPipeline.from_pretrained(
21
+ "runwayml/stable-diffusion-v1-5",
22
+ torch_dtype=torch.float32,
23
+ use_safetensors=True
24
+ )
25
+ sd_pipe.enable_attention_slicing() # 降低内存消耗
26
+
27
+ def generate_image(enhanced_prompt, steps=20, guidance=7.5):
28
+ return sd_pipe(
29
+ enhanced_prompt,
30
+ num_inference_steps=int(steps),
31
+ guidance_scale=guidance,
32
+ generator=torch.Generator().manual_seed(42)
33
+ ).images[0]
34
+
35
+ # ========== Step 3: Voice Input ==========
36
+ recognizer = sr.Recognizer()
37
+
38
+ def audio_to_text(audio_file):
39
+ with sr.AudioFile(audio_file) as source:
40
+ audio = recognizer.record(source)
41
+ return recognizer.recognize_whisper(audio, model="tiny.en")
42
+
43
+ # ========== Gradio Interface ==========
44
+ with gr.Blocks(title="AI Art Studio") as app:
45
+ gr.Markdown("## 🎨 AI Art Generator (CPU Optimized)")
46
+
47
+ with gr.Row():
48
+ with gr.Column(scale=2):
49
+ # ===== 交互控件 =====
50
+ input_type = gr.Radio(["Text", "Voice"], label="输入方式")
51
+ voice_input = gr.Audio(source="upload", type="filepath", visible=False)
52
+ text_input = gr.Textbox(label="输入描述", placeholder="描述你想生成的画面...")
53
+
54
+ style_choice = gr.Dropdown(
55
+ ["Digital Art", "Oil Painting", "Anime", "Photorealistic"],
56
+ value="Digital Art",
57
+ label="艺术风格"
58
+ )
59
+
60
+ generate_btn = gr.Button("生成作品", variant="primary")
61
+
62
+ with gr.Accordion("高级设置", open=False):
63
+ steps_slider = gr.Slider(10, 30, value=20, step=1, label="生成步数")
64
+ guidance_slider = gr.Slider(5.0, 10.0, value=7.5, label="创意自由度")
65
+
66
+ with gr.Column(scale=3):
67
+ # ===== 输出展示 =====
68
+ prompt_output = gr.Textbox(label="优化后的Prompt", interactive=False)
69
+ image_output = gr.Image(label="生成结果", show_label=False)
70
+
71
+ # ===== 交互逻辑 =====
72
+ input_type.change(
73
+ fn=lambda x: (gr.update(visible=x=="Voice"), gr.update(visible=x=="Text")),
74
+ inputs=input_type,
75
+ outputs=[voice_input, text_input]
76
+ )
77
+
78
+ generate_btn.click(
79
+ fn=lambda x,t: audio_to_text(x) if t=="Voice" else t,
80
+ inputs=[voice_input, input_type],
81
+ outputs=text_input
82
+ ).success(
83
+ fn=enhance_prompt,
84
+ inputs=[text_input, style_choice],
85
+ outputs=prompt_output
86
+ ).success(
87
+ fn=generate_image,
88
+ inputs=[prompt_output, steps_slider, guidance_slider],
89
+ outputs=image_output
90
+ )
91
+
92
+ # ========== Step 4: Huggingface Deployment ==========
93
+ if __name__ == "__main__":
94
+ app.launch(server_name="0.0.0.0", server_port=7860)