Hieucyber2208 commited on
Commit
28d00b8
·
verified ·
1 Parent(s): 8d7b021

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -95
app.py CHANGED
@@ -1,97 +1,50 @@
1
- import asyncio
2
-
3
- try:
4
- asyncio.get_running_loop()
5
- except RuntimeError:
6
- asyncio.run(asyncio.sleep(0)) # Ensures an event loop is created before Streamlit starts
7
-
8
- import streamlit as st
9
- from main import main
10
  import os
11
-
12
- # Định nghĩa đường dẫn video đầu ra
13
- OUTPUT_VIDEO_PATH = "final_output.mp4"
14
-
15
- # Tiêu đề ứng dụng
16
- st.set_page_config(page_title="KnowFlow", page_icon="📖")
17
- st.markdown("<h1 style='text-align: center;'>📖 KnowFlow 🌊</h1>", unsafe_allow_html=True)
18
- st.markdown("<h4 style='text-align: center;'>Convert documents into videos with AI-powered storytelling</h4>", unsafe_allow_html=True)
19
-
20
- # Thông tin tác giả
21
- st.markdown("---")
22
- st.markdown("👨‍💻 **Author:** Nguyễn Trung Hiếu")
23
- st.markdown("🔗 [GitHub Repository](https://github.com/hieunguyen-cyber/KnowFlow.git)")
24
- st.markdown("---")
25
- st.markdown("""
26
- ## 🎯 Purpose
27
- KnowFlow automates the process of converting lecture documents (PDF, DOCX) into narrated videos with structured explanations. It extracts text, formulas, and images, generates explanations, converts text to speech, and assembles everything into a video.
28
-
29
- ## 🛠️ How to Use
30
- 1️⃣ **Upload a lecture file (PDF, DOCX)**.
31
- 2️⃣ **Select processing options** (text extraction, summarization, TTS).
32
- 3️⃣ **Generate the video** – the system will process and compile it.
33
- 4️⃣ **Download the final video** for review or sharing.
34
-
35
- 🚀 Fully open-source and free to use! \n
36
- If you find it's slow, then another person must be using the GPU. Please wait!!
37
- """)
38
- # Upload file PDF
39
- uploaded_file = st.file_uploader("📂 Upload your document (PDF)", type=["pdf", "docx"])
40
-
41
- # Nếu có file, lưu vào thư mục tạm và lấy đường dẫn
42
- file_path = None
43
- if uploaded_file:
44
- file_path = f"{uploaded_file.name}"
45
- with open(file_path, "wb") as f:
46
- f.write(uploaded_file.getbuffer()) # Lưu file thực tế
47
-
48
- number_of_images = st.slider("🖼️ Nhập số ảnh", 1, 10, 3)
49
-
50
- # Cấu hình đầu vào
51
- gender = st.radio("🗣️ Select Voice Gender", options=["female", "male"])
52
-
53
- # Nếu chọn giọng nam, vô hiệu hóa tốc độ (chỉ cho phép "normal")
54
- if gender == "male":
55
- speed = st.radio("⚡ Speech Speed (Male voice supports only normal)", options=["normal"], disabled=True)
56
- else:
57
- speed = st.radio("⚡ Speech Speed", options=["fast", "normal", "slow"])
58
-
59
- analysis_level = st.radio("Analysis Level", options=["basic", "detailed"])
60
- writing_style = st.radio("Writing Style", options=["academic", "popular", "creative", "humorous"])
61
-
62
- # Tạo thanh trượt với giá trị từ 50 đến 250, bước nhảy 50
63
- word_lower_limit, word_upper_limit = st.slider(
64
- "Chọn khoảng độ dài văn bản:",
65
- min_value=50,
66
- max_value=250,
67
- value=(50, 250), # Giá trị mặc định
68
- step=50
69
- )
70
-
71
- st.write(f"Giới hạn độ dài văn bản từ **{word_lower_limit}** đến **{word_upper_limit}** ký tự.")
72
-
73
- detail_level = st.radio("📖 Detail Level of Image Description", options=["short", "detailed"])
74
- perspective = st.radio("🔎 Perspective", options=["subjective", "neutral"])
75
- emotion = st.text_input("🎭 Emotion", placeholder="Example: mysterious, romantic,...")
76
- time_setting = st.text_input("⏳ Time Setting", placeholder="Example: modern, medieval,...")
77
- art_style = st.text_input("🖌️ Image Description Style", placeholder="Example: realistic, abstract,...")
78
- style = st.text_input("🎨 Image Style", placeholder="Example: realistic, anime,...")
79
- color_palette = st.text_input("🌈 Color Palette", placeholder="Example: vibrant, monochrome,...")
80
-
81
- # Nút chạy pipeline
82
- if st.button("🚀 Generate Video"):
83
- if file_path and os.path.exists(file_path):
84
- st.success("⏳ Processing started...")
85
- main(file_path, analysis_level, writing_style, word_lower_limit, word_upper_limit, gender, speed, number_of_images, detail_level, perspective, emotion, time_setting, art_style, style, color_palette)
86
-
87
- # Kiểm tra xem video đã được tạo chưa
88
- if os.path.exists(OUTPUT_VIDEO_PATH):
89
- st.success("🎉 Video generated successfully!")
90
-
91
- # Tạo link tải về
92
- with open(OUTPUT_VIDEO_PATH, "rb") as video_file:
93
- st.download_button(label="📥 Download Video", data=video_file, file_name="final_output.mp4", mime="video/mp4")
94
- else:
95
- st.error("⚠️ Video generation failed. Please check the logs.")
96
  else:
97
- st.error("⚠️ Please upload a valid PDF file.")
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import VitsModel, AutoTokenizer
3
+ import torchaudio
 
 
 
 
 
 
4
  import os
5
+ from gtts import gTTS
6
+
7
+ def generate_audio(text, filename="output.mp3", gender="female", speed="normal"):
8
+ """
9
+ Convert text to speech and save it as an audio file.
10
+
11
+ Parameters:
12
+ text (str): The text to convert.
13
+ filename (str): The output file name.
14
+ gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
15
+ speed (str): "slow", "normal", or "fast" (only for gTTS).
16
+ """
17
+ lang = "vi"
18
+
19
+ if gender.lower() == "female":
20
+ # gTTS chỉ có giọng nữ
21
+ speed_mapping = {"slow": True, "normal": False, "fast": False}
22
+ slow = speed_mapping.get(speed.lower(), False)
23
+
24
+ tts = gTTS(text=text, lang=lang, slow=slow)
25
+ tts.save(filename)
26
+ print(f"✅ Audio saved as {filename}")
27
+
28
+ elif gender.lower() == "male":
29
+ # MMS-TTS cho giọng nam
30
+ model = VitsModel.from_pretrained("facebook/mms-tts-vie")
31
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-vie")
32
+
33
+ inputs = tokenizer(text, return_tensors="pt")
34
+ with torch.no_grad():
35
+ output = model(**inputs).waveform
36
+
37
+ # Lưu file âm thanh
38
+ torchaudio.save(filename, output, 24000, backend="sox_io")
39
+ print(f" Audio saved as {filename}")
40
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  else:
42
+ print("⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.")
43
+ def text_to_speech(gender, speed):
44
+ text_folder = "./"
45
+ text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt') and f != "text.txt" and f != "requirements.txt"])
46
+ for text_file in text_files:
47
+ with open(f"{text_file}", "r", encoding="utf-8") as file:
48
+ content = file.read()
49
+ audio_file = text_file.replace("txt","mp3")
50
+ generate_audio(content, f"{audio_file}", gender=gender, speed=speed)