Michael Hu
add more logging
c72d839
raw
history blame
5.93 kB
"""
Main entry point for the Audio Translation Web Application
Handles file upload, processing pipeline, and UI rendering
"""
# Configure logging first
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("app.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
import streamlit as st
import os
import time
import subprocess
from utils.stt import transcribe_audio
from utils.translation import translate_text
from utils.tts_dummy import generate_speech
# Hugging Face Spaces Setup Automation
def setup_huggingface_space():
"""Automatically configure Hugging Face Space requirements"""
logger.debug("Running Hugging Face space setup")
st.sidebar.header("Space Configuration")
try:
subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True)
logger.debug("espeak-ng verification successful")
except (FileNotFoundError, subprocess.CalledProcessError):
logger.error("Missing espeak-ng dependency")
st.sidebar.error("""
**Missing System Dependencies!** Add this to your Space settings:
```txt
apt-get update && apt-get install -y espeak-ng
```
""")
st.stop()
model_dir = "./kokoro"
required_files = [
f"{model_dir}/kokoro-v0_19.pth",
f"{model_dir}/voices/af_bella.pt"
]
if not all(os.path.exists(f) for f in required_files):
logger.error("Missing model files in %s", model_dir)
st.sidebar.warning("""
**Missing Model Files!** Add this to your Space settings:
```txt
git clone https://huggingface.co/hexgrad/Kokoro-82M ./kokoro
```
""")
st.stop()
# Initialize environment configurations
os.makedirs("temp/uploads", exist_ok=True)
os.makedirs("temp/outputs", exist_ok=True)
def configure_page():
"""Set up Streamlit page configuration"""
logger.debug("Configuring Streamlit page")
st.set_page_config(
page_title="Audio Translator",
page_icon="🎧",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown("""
<style>
.reportview-container {margin-top: -2em;}
#MainMenu {visibility: hidden;}
.stDeployButton {display:none;}
.stAlert {padding: 20px !important;}
</style>
""", unsafe_allow_html=True)
def handle_file_processing(upload_path):
"""
Execute the complete processing pipeline:
1. Speech-to-Text (STT)
2. Machine Translation
3. Text-to-Speech (TTS)
"""
logger.info(f"Starting processing for: {upload_path}")
progress_bar = st.progress(0)
status_text = st.empty()
try:
# STT Phase
logger.debug("Beginning STT processing")
status_text.markdown("πŸ” **Performing Speech Recognition...**")
with st.spinner("Initializing Whisper model..."):
english_text = transcribe_audio(upload_path)
progress_bar.progress(30)
logger.info(f"STT completed. Text length: {len(english_text)} characters")
# Translation Phase
logger.debug("Beginning translation")
status_text.markdown("🌐 **Translating Content...**")
with st.spinner("Loading translation model..."):
chinese_text = translate_text(english_text)
progress_bar.progress(60)
logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters")
# TTS Phase
logger.debug("Beginning TTS generation")
status_text.markdown("🎡 **Generating Chinese Speech...**")
with st.spinner("Initializing TTS engine..."):
output_path = generate_speech(chinese_text, language="zh")
progress_bar.progress(100)
logger.info(f"TTS completed. Output file: {output_path}")
# Display results
# Display results
status_text.success("βœ… Processing Complete!")
return english_text, chinese_text, output_path
except Exception as e:
logger.error(f"Processing failed: {str(e)}", exc_info=True)
status_text.error(f"❌ Processing Failed: {str(e)}")
st.exception(e)
raise
def render_results(english_text, chinese_text, output_path):
"""Display processing results in organized columns"""
logger.debug("Rendering results")
st.divider()
col1, col2 = st.columns([2, 1])
with col1:
st.subheader("Recognition Results")
st.code(english_text, language="text")
st.subheader("Translation Results")
st.code(chinese_text, language="text")
with col2:
st.subheader("Audio Output")
st.audio(output_path)
with open(output_path, "rb") as f:
st.download_button(
label="Download Audio",
data=f,
file_name="translated_audio.wav",
mime="audio/wav"
)
def main():
"""Main application workflow"""
logger.info("Starting application")
# setup_huggingface_space() # First-run configuration checks
configure_page()
st.title("🎧 High-Quality Audio Translation System")
st.markdown("Upload English Audio β†’ Get Chinese Speech Output")
uploaded_file = st.file_uploader(
"Select Audio File (MP3/WAV)",
type=["mp3", "wav"],
accept_multiple_files=False
)
if uploaded_file:
logger.info(f"File uploaded: {uploaded_file.name}")
upload_path = os.path.join("temp/uploads", uploaded_file.name)
with open(upload_path, "wb") as f:
f.write(uploaded_file.getbuffer())
results = handle_file_processing(upload_path)
if results:
render_results(*results)
if __name__ == "__main__":
main()