Spaces:

Hieucyber2208
/

know-flow

Running

App Files Files Community

hieu-nguyen2208 commited on Mar 3

Commit

c9a39eb

1 Parent(s): 864775e

"LOL"

Browse files

Files changed (5) hide show

app.py +25 -2
main.py +3 -1
src/image_gen.py +13 -12
src/text_processing.py +19 -13
src/text_to_video.py +0 -1

app.py CHANGED Viewed

@@ -8,9 +8,11 @@ except RuntimeError:
 import streamlit as st
 from main import main
 import os
 # Định nghĩa đường dẫn video đầu ra
 OUTPUT_VIDEO_PATH = "final_output.mp4"
 # Tiêu đề ứng dụng
 st.set_page_config(page_title="KnowFlow", page_icon="📖")
@@ -74,6 +76,24 @@ art_style = st.text_input("🖌️ Image Description Style", placeholder="Exampl
 style = st.text_input("🎨 Image Style", placeholder="Example: realistic, anime,...")
 color_palette = st.text_input("🌈 Color Palette", placeholder="Example: vibrant, monochrome,...")
 # Nút chạy pipeline
 if st.button("🚀 Generate Video"):
     if file_path and os.path.exists(file_path):
@@ -83,11 +103,14 @@ if st.button("🚀 Generate Video"):
         # Kiểm tra xem video đã được tạo chưa
         if os.path.exists(OUTPUT_VIDEO_PATH):
             st.success("🎉 Video generated successfully!")
             # Tạo link tải về
-            with open(OUTPUT_VIDEO_PATH, "rb") as video_file:
                 st.download_button(label="📥 Download Video", data=video_file, file_name="final_output_fixed.mp4", mime="video/mp4")
         else:
             st.error("⚠️ Video generation failed. Please check the logs.")
     else:
-        st.error("⚠️ Please upload a valid PDF file.")

 import streamlit as st
 from main import main
 import os
+import subprocess
 # Định nghĩa đường dẫn video đầu ra
 OUTPUT_VIDEO_PATH = "final_output.mp4"
+OUTPUT_VIDEO_FIXED_PATH = "final_output_fixed.mp4"
 # Tiêu đề ứng dụng
 st.set_page_config(page_title="KnowFlow", page_icon="📖")
 style = st.text_input("🎨 Image Style", placeholder="Example: realistic, anime,...")
 color_palette = st.text_input("🌈 Color Palette", placeholder="Example: vibrant, monochrome,...")
+def convert_audio_format(video_input, video_output):
+    """Chuyển đổi định dạng âm thanh của video sang AAC."""
+    if not os.path.exists(video_input):
+        raise FileNotFoundError(f"File '{video_input}' không tồn tại!")
+    command = [
+        "ffmpeg", "-i", video_input,
+        "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
+        "-y",  # Ghi đè nếu file output đã tồn tại
+        video_output
+    ]
+    try:
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print(f"✅ Chuyển đổi thành công: {video_output}")
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Lỗi khi chuyển đổi video: {e.stderr.decode()}")
 # Nút chạy pipeline
 if st.button("🚀 Generate Video"):
     if file_path and os.path.exists(file_path):
         # Kiểm tra xem video đã được tạo chưa
         if os.path.exists(OUTPUT_VIDEO_PATH):
             st.success("🎉 Video generated successfully!")
+            # Chuyển đổi định dạng âm thanh
+            convert_audio_format(OUTPUT_VIDEO_PATH, OUTPUT_VIDEO_FIXED_PATH)
             # Tạo link tải về
+            with open(OUTPUT_VIDEO_FIXED_PATH, "rb") as video_file:
                 st.download_button(label="📥 Download Video", data=video_file, file_name="final_output_fixed.mp4", mime="video/mp4")
         else:
             st.error("⚠️ Video generation failed. Please check the logs.")
     else:
+        st.error("⚠️ Please upload a valid PDF file.")

main.py CHANGED Viewed

@@ -17,4 +17,6 @@ def main(file_path, analysis_level='basic', writting_style='academic', word_lowe
     text_processing(file_path = file_path, analysis_level=analysis_level, writting_style=writting_style, word_lower_limit = word_lower_limit, word_upper_limit=word_upper_limit )
     text_to_speech(gender = gender, speed = speed)
     image_gen(number_of_images = number_of_images, detail_level=detail_level, perspective=perspective, emotion=emotion, time_setting=time_setting, art_style=art_style, style=style, color_palette=color_palette)
-    text_to_video()

     text_processing(file_path = file_path, analysis_level=analysis_level, writting_style=writting_style, word_lower_limit = word_lower_limit, word_upper_limit=word_upper_limit )
     text_to_speech(gender = gender, speed = speed)
     image_gen(number_of_images = number_of_images, detail_level=detail_level, perspective=perspective, emotion=emotion, time_setting=time_setting, art_style=art_style, style=style, color_palette=color_palette)
+    text_to_video()
+if __name__ == "__main__":
+    main('phan-tich-hinh-tuong-nguoi-lai-do-song-da-2.pdf')

src/image_gen.py CHANGED Viewed

@@ -8,13 +8,13 @@ from huggingface_hub.utils import HfHubHTTPError
 import random
 import time
 from dotenv import load_dotenv
-load_dotenv()
-HF_TOKEN = os.getenv("HF_TOKEN")
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-client_gemini = genai.Client(api_key = GOOGLE_API_KEY)
-client = InferenceClient(provider="hf-inference", api_key=HF_TOKEN)
 def split_text_for_images(number_of_images):
     with open("text.txt", "r", encoding="utf-8") as file:
         text = file.read().strip()
@@ -39,7 +39,7 @@ def split_text_for_images(number_of_images):
         start = end  # Bắt đầu đoạn tiếp theo từ đây
     return chunks
-def describe_image(description, detail_level="short", perspective="neutral", emotion=None, time_setting=None, art_style=None):
     """
     Nhận một đoạn văn mô tả chi tiết và trả về một câu mô tả hình ảnh theo các tùy chỉnh.
@@ -71,13 +71,13 @@ def describe_image(description, detail_level="short", perspective="neutral", emo
     try:
         response = client_gemini.models.generate_content(
-            model = "gemini-2.0-flash", contents = prompt
         )
         return response.text.strip()
     except Exception as e:
         print(f"Lỗi khi gọi API Gemini: {e}")
         return ""
-def generate_image(prompt, output_path, style=None, color_palette=None):
     model="stabilityai/stable-diffusion-3.5-large"
     """
     Tạo hình ảnh từ mô tả văn bản với các tùy chỉnh linh hoạt.
@@ -99,11 +99,12 @@ def generate_image(prompt, output_path, style=None, color_palette=None):
     image.save(output_path)
     print(f"✅Image saved at {output_path}")
 def image_gen(number_of_images = 3,detail_level = "short", perspective="neutral", emotion=None, time_setting=None, art_style=None, style=None, color_palette=None):
     texts = split_text_for_images(number_of_images)
     index = 0
     for text in tqdm(texts, desc="Processing", unit="image"):
         output_path = f"{index}.png"
-        prompt = describe_image(text, detail_level, perspective, emotion, time_setting, art_style)
         print(prompt)
         # Cơ chế retry với backoff
@@ -112,7 +113,7 @@ def image_gen(number_of_images = 3,detail_level = "short", perspective="neutral"
         while retry_count < max_retries:
             try:
-                generate_image(prompt, output_path, style, color_palette)
                 time.sleep(60)  # Chờ sau khi tạo ảnh thành công
                 break  # Nếu thành công thì thoát khỏi vòng lặp retry
             except HfHubHTTPError as e:

 import random
 import time
 from dotenv import load_dotenv
+def set_up_api():
+    load_dotenv()
+    HF_TOKEN = os.getenv("HF_TOKEN")
+    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+    client_gemini = genai.Client(api_key = GOOGLE_API_KEY)
+    client = InferenceClient(provider="hf-inference", api_key=HF_TOKEN)
+    return client_gemini, client
 def split_text_for_images(number_of_images):
     with open("text.txt", "r", encoding="utf-8") as file:
         text = file.read().strip()
         start = end  # Bắt đầu đoạn tiếp theo từ đây
     return chunks
+def describe_image(description, client_gemini, detail_level="short", perspective="neutral", emotion=None, time_setting=None, art_style=None):
     """
     Nhận một đoạn văn mô tả chi tiết và trả về một câu mô tả hình ảnh theo các tùy chỉnh.
     try:
         response = client_gemini.models.generate_content(
+            model = "gemini-2.0-flash", contents = [prompt]
         )
         return response.text.strip()
     except Exception as e:
         print(f"Lỗi khi gọi API Gemini: {e}")
         return ""
+def generate_image(prompt, client, output_path, style=None, color_palette=None):
     model="stabilityai/stable-diffusion-3.5-large"
     """
     Tạo hình ảnh từ mô tả văn bản với các tùy chỉnh linh hoạt.
     image.save(output_path)
     print(f"✅Image saved at {output_path}")
 def image_gen(number_of_images = 3,detail_level = "short", perspective="neutral", emotion=None, time_setting=None, art_style=None, style=None, color_palette=None):
+    client_gemini, client = set_up_api()
     texts = split_text_for_images(number_of_images)
     index = 0
     for text in tqdm(texts, desc="Processing", unit="image"):
         output_path = f"{index}.png"
+        prompt = describe_image(text, client_gemini, detail_level, perspective, emotion, time_setting, art_style)
         print(prompt)
         # Cơ chế retry với backoff
         while retry_count < max_retries:
             try:
+                generate_image(prompt, client, output_path, style, color_palette)
                 time.sleep(60)  # Chờ sau khi tạo ảnh thành công
                 break  # Nếu thành công thì thoát khỏi vòng lặp retry
             except HfHubHTTPError as e:

src/text_processing.py CHANGED Viewed

@@ -4,9 +4,12 @@ from docx import Document
 from google import genai
 from dotenv import load_dotenv
-load_dotenv()
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-client = genai.Client(api_key=GOOGLE_API_KEY)
 ####################### - TEXT EXTRACTION - #######################
 def extract_text_from_pdf(pdf_path):
@@ -37,7 +40,7 @@ def extract_text_from_file(file_path):
     else:
         raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")
 ####################### - SEMANTIC CHUNKING - #######################
-def split_text_by_semantics(text):
     prompt = f"""
     Bạn là một chuyên gia xử lý văn bản. Hãy chia văn bản sau thành một số đoạn có ý nghĩa sao cho mỗi đoạn vừa đủ để giải thích trong khoảng 3 đến 5 câu.
@@ -52,7 +55,7 @@ def split_text_by_semantics(text):
     try:
         response = client.models.generate_content(
-            model="gemini-2.0-flash", contents=prompt
         )
         result_text = response.text.strip()
         print(result_text)
@@ -65,7 +68,7 @@ def split_text_by_semantics(text):
         return []
 ####################### - CONTENT GENERATION - #######################
-def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_style='academic', word_lower_limit=100, word_upper_limit=150):
     """
     Phân tích nội dung của văn bản theo mức độ và phong cách mong muốn.
@@ -99,9 +102,8 @@ def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_st
     try:
         response = client.models.generate_content(
-            model="gemini-2.0-flash", contents=overview_prompt
         )
-        print(response)
         explanations = []
         for idx, chunk in enumerate(chunks, start=1):
@@ -114,9 +116,10 @@ def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_st
             Hãy đảm bảo phần tóm tắt không vượt quá {word_upper_limit} từ và không ít hơn {word_lower_limit}.
             """
-            part_response = response = client.models.generate_content(
-                    model="gemini-2.0-flash", contents=part_prompt
                 )
             explanations.append(part_response.text.strip())
         return explanations
@@ -125,15 +128,16 @@ def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_st
         print(f"Lỗi khi gọi API Gemini: {e}")
         return []
 def text_processing(file_path, analysis_level='basic', writting_style='academic', word_lower_limit = 100, word_upper_limit = 150):
     # Trích xuất văn bản từ file PDF
     text = extract_text_from_file(file_path=file_path)
     with open("./text.txt", "w", encoding="utf-8") as f:
         f.write(text)
     # Tách văn bản theo ngữ nghĩa
-    semantic_chunks = split_text_by_semantics(text)
     # Tạo thuyết minh cho từng phần semantic chunk
-    explanations = generate_explaination_for_chunks(semantic_chunks, analysis_level=analysis_level, writting_style = writting_style, word_lower_limit = word_lower_limit, word_upper_limit=word_upper_limit)
     # Tạo thư mục nếu chưa tồn tại
     output_dir = "./"
@@ -150,4 +154,6 @@ def text_processing(file_path, analysis_level='basic', writting_style='academic'
                 output_file = os.path.join(output_dir, f"{chunk_idx}_{sentence_idx}.txt")  # Tên file dạng "chunkID_sentenceID.txt"
                 with open(output_file, "w", encoding="utf-8") as f:
                     f.write(sentence.replace("*","") + ".")  # Giữ dấu chấm cuối câu
-                print(f"Đã lưu: {output_file}")

 from google import genai
 from dotenv import load_dotenv
+def set_up_api():
+    load_dotenv()
+    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+    print(GOOGLE_API_KEY)
+    client = genai.Client(api_key=GOOGLE_API_KEY)
+    return client
 ####################### - TEXT EXTRACTION - #######################
 def extract_text_from_pdf(pdf_path):
     else:
         raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")
 ####################### - SEMANTIC CHUNKING - #######################
+def split_text_by_semantics(text, client):
     prompt = f"""
     Bạn là một chuyên gia xử lý văn bản. Hãy chia văn bản sau thành một số đoạn có ý nghĩa sao cho mỗi đoạn vừa đủ để giải thích trong khoảng 3 đến 5 câu.
     try:
         response = client.models.generate_content(
+            model="gemini-2.0-flash", contents=[prompt]
         )
         result_text = response.text.strip()
         print(result_text)
         return []
 ####################### - CONTENT GENERATION - #######################
+def generate_explaination_for_chunks(chunks, client, analysis_level='basic', writting_style='academic', word_lower_limit=100, word_upper_limit=150):
     """
     Phân tích nội dung của văn bản theo mức độ và phong cách mong muốn.
     try:
         response = client.models.generate_content(
+            model="gemini-2.0-flash", contents=[overview_prompt]
         )
         explanations = []
         for idx, chunk in enumerate(chunks, start=1):
             Hãy đảm bảo phần tóm tắt không vượt quá {word_upper_limit} từ và không ít hơn {word_lower_limit}.
             """
+            part_response = client.models.generate_content(
+                    model="gemini-2.0-flash", contents=[part_prompt]
                 )
+            print(part_response.text.strip())
             explanations.append(part_response.text.strip())
         return explanations
         print(f"Lỗi khi gọi API Gemini: {e}")
         return []
 def text_processing(file_path, analysis_level='basic', writting_style='academic', word_lower_limit = 100, word_upper_limit = 150):
+    client = set_up_api()
     # Trích xuất văn bản từ file PDF
     text = extract_text_from_file(file_path=file_path)
     with open("./text.txt", "w", encoding="utf-8") as f:
         f.write(text)
     # Tách văn bản theo ngữ nghĩa
+    semantic_chunks = split_text_by_semantics(text, client)
     # Tạo thuyết minh cho từng phần semantic chunk
+    explanations = generate_explaination_for_chunks(semantic_chunks, client, analysis_level=analysis_level, writting_style = writting_style, word_lower_limit = word_lower_limit, word_upper_limit=word_upper_limit)
     # Tạo thư mục nếu chưa tồn tại
     output_dir = "./"
                 output_file = os.path.join(output_dir, f"{chunk_idx}_{sentence_idx}.txt")  # Tên file dạng "chunkID_sentenceID.txt"
                 with open(output_file, "w", encoding="utf-8") as f:
                     f.write(sentence.replace("*","") + ".")  # Giữ dấu chấm cuối câu
+                print(f"Đã lưu: {output_file}")
+if __name__ == "__main__":
+    text_processing("phan-tich-hinh-tuong-nguoi-lai-do-song-da-2.pdf")

src/text_to_video.py CHANGED Viewed

@@ -8,7 +8,6 @@ import os
 from itertools import accumulate
 import pysrt
 def format_time(seconds):
     """Chuyển đổi thời gian (giây) thành định dạng SRT hh:mm:ss,ms"""
     mins, sec = divmod(seconds, 60)

 from itertools import accumulate
 import pysrt
 def format_time(seconds):
     """Chuyển đổi thời gian (giây) thành định dạng SRT hh:mm:ss,ms"""
     mins, sec = divmod(seconds, 60)