Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,97 +1,50 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
asyncio.get_running_loop()
|
5 |
-
except RuntimeError:
|
6 |
-
asyncio.run(asyncio.sleep(0)) # Ensures an event loop is created before Streamlit starts
|
7 |
-
|
8 |
-
import streamlit as st
|
9 |
-
from main import main
|
10 |
import os
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
""
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
number_of_images = st.slider("🖼️ Nhập số ảnh", 1, 10, 3)
|
49 |
-
|
50 |
-
# Cấu hình đầu vào
|
51 |
-
gender = st.radio("🗣️ Select Voice Gender", options=["female", "male"])
|
52 |
-
|
53 |
-
# Nếu chọn giọng nam, vô hiệu hóa tốc độ (chỉ cho phép "normal")
|
54 |
-
if gender == "male":
|
55 |
-
speed = st.radio("⚡ Speech Speed (Male voice supports only normal)", options=["normal"], disabled=True)
|
56 |
-
else:
|
57 |
-
speed = st.radio("⚡ Speech Speed", options=["fast", "normal", "slow"])
|
58 |
-
|
59 |
-
analysis_level = st.radio("Analysis Level", options=["basic", "detailed"])
|
60 |
-
writing_style = st.radio("Writing Style", options=["academic", "popular", "creative", "humorous"])
|
61 |
-
|
62 |
-
# Tạo thanh trượt với giá trị từ 50 đến 250, bước nhảy 50
|
63 |
-
word_lower_limit, word_upper_limit = st.slider(
|
64 |
-
"Chọn khoảng độ dài văn bản:",
|
65 |
-
min_value=50,
|
66 |
-
max_value=250,
|
67 |
-
value=(50, 250), # Giá trị mặc định
|
68 |
-
step=50
|
69 |
-
)
|
70 |
-
|
71 |
-
st.write(f"Giới hạn độ dài văn bản từ **{word_lower_limit}** đến **{word_upper_limit}** ký tự.")
|
72 |
-
|
73 |
-
detail_level = st.radio("📖 Detail Level of Image Description", options=["short", "detailed"])
|
74 |
-
perspective = st.radio("🔎 Perspective", options=["subjective", "neutral"])
|
75 |
-
emotion = st.text_input("🎭 Emotion", placeholder="Example: mysterious, romantic,...")
|
76 |
-
time_setting = st.text_input("⏳ Time Setting", placeholder="Example: modern, medieval,...")
|
77 |
-
art_style = st.text_input("🖌️ Image Description Style", placeholder="Example: realistic, abstract,...")
|
78 |
-
style = st.text_input("🎨 Image Style", placeholder="Example: realistic, anime,...")
|
79 |
-
color_palette = st.text_input("🌈 Color Palette", placeholder="Example: vibrant, monochrome,...")
|
80 |
-
|
81 |
-
# Nút chạy pipeline
|
82 |
-
if st.button("🚀 Generate Video"):
|
83 |
-
if file_path and os.path.exists(file_path):
|
84 |
-
st.success("⏳ Processing started...")
|
85 |
-
main(file_path, analysis_level, writing_style, word_lower_limit, word_upper_limit, gender, speed, number_of_images, detail_level, perspective, emotion, time_setting, art_style, style, color_palette)
|
86 |
-
|
87 |
-
# Kiểm tra xem video đã được tạo chưa
|
88 |
-
if os.path.exists(OUTPUT_VIDEO_PATH):
|
89 |
-
st.success("🎉 Video generated successfully!")
|
90 |
-
|
91 |
-
# Tạo link tải về
|
92 |
-
with open(OUTPUT_VIDEO_PATH, "rb") as video_file:
|
93 |
-
st.download_button(label="📥 Download Video", data=video_file, file_name="final_output.mp4", mime="video/mp4")
|
94 |
-
else:
|
95 |
-
st.error("⚠️ Video generation failed. Please check the logs.")
|
96 |
else:
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import VitsModel, AutoTokenizer
|
3 |
+
import torchaudio
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import os
|
5 |
+
from gtts import gTTS
|
6 |
+
|
7 |
+
def generate_audio(text, filename="output.mp3", gender="female", speed="normal"):
|
8 |
+
"""
|
9 |
+
Convert text to speech and save it as an audio file.
|
10 |
+
|
11 |
+
Parameters:
|
12 |
+
text (str): The text to convert.
|
13 |
+
filename (str): The output file name.
|
14 |
+
gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
|
15 |
+
speed (str): "slow", "normal", or "fast" (only for gTTS).
|
16 |
+
"""
|
17 |
+
lang = "vi"
|
18 |
+
|
19 |
+
if gender.lower() == "female":
|
20 |
+
# gTTS chỉ có giọng nữ
|
21 |
+
speed_mapping = {"slow": True, "normal": False, "fast": False}
|
22 |
+
slow = speed_mapping.get(speed.lower(), False)
|
23 |
+
|
24 |
+
tts = gTTS(text=text, lang=lang, slow=slow)
|
25 |
+
tts.save(filename)
|
26 |
+
print(f"✅ Audio saved as {filename}")
|
27 |
+
|
28 |
+
elif gender.lower() == "male":
|
29 |
+
# MMS-TTS cho giọng nam
|
30 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-vie")
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-vie")
|
32 |
+
|
33 |
+
inputs = tokenizer(text, return_tensors="pt")
|
34 |
+
with torch.no_grad():
|
35 |
+
output = model(**inputs).waveform
|
36 |
+
|
37 |
+
# Lưu file âm thanh
|
38 |
+
torchaudio.save(filename, output, 24000, backend="sox_io")
|
39 |
+
print(f"✅ Audio saved as {filename}")
|
40 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
else:
|
42 |
+
print("⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.")
|
43 |
+
def text_to_speech(gender, speed):
|
44 |
+
text_folder = "./"
|
45 |
+
text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt') and f != "text.txt" and f != "requirements.txt"])
|
46 |
+
for text_file in text_files:
|
47 |
+
with open(f"{text_file}", "r", encoding="utf-8") as file:
|
48 |
+
content = file.read()
|
49 |
+
audio_file = text_file.replace("txt","mp3")
|
50 |
+
generate_audio(content, f"{audio_file}", gender=gender, speed=speed)
|