# --- START OF FILE inference_cli.py --- import argparse import shutil import soundfile as sf import os # For path manipulation if needed import sys # To potentially add app.py directory to path # Try to import app.py - assumes it's in the same directory or Python path try: # If app.py is not directly importable, you might need to add its directory to the path # Example: sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Add current dir import app from app import infer # Import the main inference function except ImportError as e: print(f"Error: Could not import 'app.py'. Make sure it's in the Python path.") print(f"Details: {e}") sys.exit(1) except Exception as e: print(f"An unexpected error occurred during 'app.py' import: {e}") sys.exit(1) def main(): parser = argparse.ArgumentParser(description="F5 TTS - Simplified CLI Interface using app.py") # --- Input Arguments --- parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3, etc.)") parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed by app.py's infer function.") parser.add_argument("--gen_text", required=True, help="Text to generate") # --- Model & Generation Parameters --- # Note: app.py seems hardcoded to load the "Multi" model at the top level. # This argument might not change the loaded model unless app.py's infer logic uses it internally. parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi - effectiveness depends on app.py)") parser.add_argument("--language", default="en-us", help="Synthesized language code (e.g., en-us, pl, de) (default: en-us)") parser.add_argument("--ref_language", default="en-us", help="Reference language code (e.g., en-us, pl, de) (default: en-us)") parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)") # --- Postprocessing --- parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio (uses app.py logic)") parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration between batches (s)") # --- Output Arguments --- parser.add_argument("--output_audio", default="output.wav", help="Path to save the output WAV file") parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram image (PNG)") args = parser.parse_args() print("--- Configuration ---") print(f"Reference Audio: {args.ref_audio}") print(f"Reference Text: '{args.ref_text if args.ref_text else ''}'") print(f"Generation Text: '{args.gen_text[:100]}...'") print(f"Model (exp_name): {args.exp_name}") print(f"Synth Language: {args.language}") print(f"Ref Language: {args.ref_language}") print(f"Speed: {args.speed}") print(f"Remove Silence: {args.remove_silence}") print(f"Cross-Fade: {args.cross_fade_duration}s") print(f"Output Audio: {args.output_audio}") print(f"Output Spectrogram: {args.output_spectrogram}") print("--------------------") # --- Set Global Variables in app.py --- # The 'infer' function in app.py relies on these globals being set. try: print(f"Setting language in app module to: {args.language}") app.language = args.language print(f"Setting ref_language in app module to: {args.ref_language}") app.ref_language = args.ref_language print(f"Setting speed in app module to: {args.speed}") app.speed = args.speed except AttributeError as e: print(f"Error: Could not set global variable in 'app.py'. Does it exist? Details: {e}") sys.exit(1) # --- Run Inference --- print("\nStarting inference process (will load models if not already loaded)...") try: # Call the infer function directly from the imported app module (sr, audio_data), temp_spectrogram_path = infer( ref_audio_orig=args.ref_audio, ref_text=args.ref_text, gen_text=args.gen_text, exp_name=args.exp_name, remove_silence=args.remove_silence, cross_fade_duration=args.cross_fade_duration # Note: language, ref_language, speed are used globally within app.py's functions ) print("Inference completed.") except Exception as e: print(f"\nError during inference: {e}") import traceback traceback.print_exc() # Print detailed traceback sys.exit(1) # --- Save Outputs --- try: # Save audio print(f"Saving audio to: {args.output_audio}") # Ensure directory exists os.makedirs(os.path.dirname(os.path.abspath(args.output_audio)) or '.', exist_ok=True) # Ensure data is float32 for soundfile if audio_data.dtype != "float32": audio_data = audio_data.astype("float32") sf.write(args.output_audio, audio_data, sr) # Copy spectrogram from the temporary path returned by infer print(f"Copying spectrogram from {temp_spectrogram_path} to: {args.output_spectrogram}") # Ensure directory exists os.makedirs(os.path.dirname(os.path.abspath(args.output_spectrogram)) or '.', exist_ok=True) shutil.copy(temp_spectrogram_path, args.output_spectrogram) print("\n--- Success ---") print(f"Audio saved in: {args.output_audio}") print(f"Spectrogram saved in: {args.output_spectrogram}") print("---------------") except Exception as e: print(f"\nError saving output files: {e}") sys.exit(1) # Optional: Clean up the temporary spectrogram file if needed, # but NamedTemporaryFile usually handles this if delete=True was used in app.py # try: # if os.path.exists(temp_spectrogram_path): # os.remove(temp_spectrogram_path) # except Exception as e: # print(f"Warning: Could not clean up temporary spectrogram file {temp_spectrogram_path}: {e}") if __name__ == "__main__": main() # --- END OF FILE inference_cli.py ---