f5-tts_Polish_English_German / inference_cli.py
Gregniuki's picture
Update inference_cli.py
caa0b3d verified
raw
history blame contribute delete
6.25 kB
# --- START OF FILE inference_cli.py ---
import argparse
import shutil
import soundfile as sf
import os # For path manipulation if needed
import sys # To potentially add app.py directory to path
# Try to import app.py - assumes it's in the same directory or Python path
try:
# If app.py is not directly importable, you might need to add its directory to the path
# Example: sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Add current dir
import app
from app import infer # Import the main inference function
except ImportError as e:
print(f"Error: Could not import 'app.py'. Make sure it's in the Python path.")
print(f"Details: {e}")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred during 'app.py' import: {e}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="F5 TTS - Simplified CLI Interface using app.py")
# --- Input Arguments ---
parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3, etc.)")
parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed by app.py's infer function.")
parser.add_argument("--gen_text", required=True, help="Text to generate")
# --- Model & Generation Parameters ---
# Note: app.py seems hardcoded to load the "Multi" model at the top level.
# This argument might not change the loaded model unless app.py's infer logic uses it internally.
parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi - effectiveness depends on app.py)")
parser.add_argument("--language", default="en-us", help="Synthesized language code (e.g., en-us, pl, de) (default: en-us)")
parser.add_argument("--ref_language", default="en-us", help="Reference language code (e.g., en-us, pl, de) (default: en-us)")
parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)")
# --- Postprocessing ---
parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio (uses app.py logic)")
parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration between batches (s)")
# --- Output Arguments ---
parser.add_argument("--output_audio", default="output.wav", help="Path to save the output WAV file")
parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram image (PNG)")
args = parser.parse_args()
print("--- Configuration ---")
print(f"Reference Audio: {args.ref_audio}")
print(f"Reference Text: '{args.ref_text if args.ref_text else '<Automatic Transcription>'}'")
print(f"Generation Text: '{args.gen_text[:100]}...'")
print(f"Model (exp_name): {args.exp_name}")
print(f"Synth Language: {args.language}")
print(f"Ref Language: {args.ref_language}")
print(f"Speed: {args.speed}")
print(f"Remove Silence: {args.remove_silence}")
print(f"Cross-Fade: {args.cross_fade_duration}s")
print(f"Output Audio: {args.output_audio}")
print(f"Output Spectrogram: {args.output_spectrogram}")
print("--------------------")
# --- Set Global Variables in app.py ---
# The 'infer' function in app.py relies on these globals being set.
try:
print(f"Setting language in app module to: {args.language}")
app.language = args.language
print(f"Setting ref_language in app module to: {args.ref_language}")
app.ref_language = args.ref_language
print(f"Setting speed in app module to: {args.speed}")
app.speed = args.speed
except AttributeError as e:
print(f"Error: Could not set global variable in 'app.py'. Does it exist? Details: {e}")
sys.exit(1)
# --- Run Inference ---
print("\nStarting inference process (will load models if not already loaded)...")
try:
# Call the infer function directly from the imported app module
(sr, audio_data), temp_spectrogram_path = infer(
ref_audio_orig=args.ref_audio,
ref_text=args.ref_text,
gen_text=args.gen_text,
exp_name=args.exp_name,
remove_silence=args.remove_silence,
cross_fade_duration=args.cross_fade_duration
# Note: language, ref_language, speed are used globally within app.py's functions
)
print("Inference completed.")
except Exception as e:
print(f"\nError during inference: {e}")
import traceback
traceback.print_exc() # Print detailed traceback
sys.exit(1)
# --- Save Outputs ---
try:
# Save audio
print(f"Saving audio to: {args.output_audio}")
# Ensure directory exists
os.makedirs(os.path.dirname(os.path.abspath(args.output_audio)) or '.', exist_ok=True)
# Ensure data is float32 for soundfile
if audio_data.dtype != "float32":
audio_data = audio_data.astype("float32")
sf.write(args.output_audio, audio_data, sr)
# Copy spectrogram from the temporary path returned by infer
print(f"Copying spectrogram from {temp_spectrogram_path} to: {args.output_spectrogram}")
# Ensure directory exists
os.makedirs(os.path.dirname(os.path.abspath(args.output_spectrogram)) or '.', exist_ok=True)
shutil.copy(temp_spectrogram_path, args.output_spectrogram)
print("\n--- Success ---")
print(f"Audio saved in: {args.output_audio}")
print(f"Spectrogram saved in: {args.output_spectrogram}")
print("---------------")
except Exception as e:
print(f"\nError saving output files: {e}")
sys.exit(1)
# Optional: Clean up the temporary spectrogram file if needed,
# but NamedTemporaryFile usually handles this if delete=True was used in app.py
# try:
# if os.path.exists(temp_spectrogram_path):
# os.remove(temp_spectrogram_path)
# except Exception as e:
# print(f"Warning: Could not clean up temporary spectrogram file {temp_spectrogram_path}: {e}")
if __name__ == "__main__":
main()
# --- END OF FILE inference_cli.py ---