Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

App Files Files Community

f5-tts_Polish_English_German / inference_cli.py

Gregniuki

Update inference_cli.py

caa0b3d verified 28 days ago

raw

history blame contribute delete

6.25 kB

	# --- START OF FILE inference_cli.py ---

	import argparse
	import shutil
	import soundfile as sf
	import os # For path manipulation if needed
	import sys # To potentially add app.py directory to path

	# Try to import app.py - assumes it's in the same directory or Python path
	try:
	# If app.py is not directly importable, you might need to add its directory to the path
	# Example: sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Add current dir
	import app
	from app import infer # Import the main inference function
	except ImportError as e:
	print(f"Error: Could not import 'app.py'. Make sure it's in the Python path.")
	print(f"Details: {e}")
	sys.exit(1)
	except Exception as e:
	print(f"An unexpected error occurred during 'app.py' import: {e}")
	sys.exit(1)


	def main():
	parser = argparse.ArgumentParser(description="F5 TTS - Simplified CLI Interface using app.py")

	# --- Input Arguments ---
	parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3, etc.)")
	parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed by app.py's infer function.")
	parser.add_argument("--gen_text", required=True, help="Text to generate")

	# --- Model & Generation Parameters ---
	# Note: app.py seems hardcoded to load the "Multi" model at the top level.
	# This argument might not change the loaded model unless app.py's infer logic uses it internally.
	parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi - effectiveness depends on app.py)")
	parser.add_argument("--language", default="en-us", help="Synthesized language code (e.g., en-us, pl, de) (default: en-us)")
	parser.add_argument("--ref_language", default="en-us", help="Reference language code (e.g., en-us, pl, de) (default: en-us)")
	parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)")

	# --- Postprocessing ---
	parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio (uses app.py logic)")
	parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration between batches (s)")

	# --- Output Arguments ---
	parser.add_argument("--output_audio", default="output.wav", help="Path to save the output WAV file")
	parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram image (PNG)")

	args = parser.parse_args()

	print("--- Configuration ---")
	print(f"Reference Audio: {args.ref_audio}")
	print(f"Reference Text: '{args.ref_text if args.ref_text else '<Automatic Transcription>'}'")
	print(f"Generation Text: '{args.gen_text[:100]}...'")
	print(f"Model (exp_name): {args.exp_name}")
	print(f"Synth Language: {args.language}")
	print(f"Ref Language: {args.ref_language}")
	print(f"Speed: {args.speed}")
	print(f"Remove Silence: {args.remove_silence}")
	print(f"Cross-Fade: {args.cross_fade_duration}s")
	print(f"Output Audio: {args.output_audio}")
	print(f"Output Spectrogram: {args.output_spectrogram}")
	print("--------------------")

	# --- Set Global Variables in app.py ---
	# The 'infer' function in app.py relies on these globals being set.
	try:
	print(f"Setting language in app module to: {args.language}")
	app.language = args.language
	print(f"Setting ref_language in app module to: {args.ref_language}")
	app.ref_language = args.ref_language
	print(f"Setting speed in app module to: {args.speed}")
	app.speed = args.speed
	except AttributeError as e:
	print(f"Error: Could not set global variable in 'app.py'. Does it exist? Details: {e}")
	sys.exit(1)

	# --- Run Inference ---
	print("\nStarting inference process (will load models if not already loaded)...")
	try:
	# Call the infer function directly from the imported app module
	(sr, audio_data), temp_spectrogram_path = infer(
	ref_audio_orig=args.ref_audio,
	ref_text=args.ref_text,
	gen_text=args.gen_text,
	exp_name=args.exp_name,
	remove_silence=args.remove_silence,
	cross_fade_duration=args.cross_fade_duration
	# Note: language, ref_language, speed are used globally within app.py's functions
	)
	print("Inference completed.")

	except Exception as e:
	print(f"\nError during inference: {e}")
	import traceback
	traceback.print_exc() # Print detailed traceback
	sys.exit(1)

	# --- Save Outputs ---
	try:
	# Save audio
	print(f"Saving audio to: {args.output_audio}")
	# Ensure directory exists
	os.makedirs(os.path.dirname(os.path.abspath(args.output_audio)) or '.', exist_ok=True)
	# Ensure data is float32 for soundfile
	if audio_data.dtype != "float32":
	audio_data = audio_data.astype("float32")
	sf.write(args.output_audio, audio_data, sr)

	# Copy spectrogram from the temporary path returned by infer
	print(f"Copying spectrogram from {temp_spectrogram_path} to: {args.output_spectrogram}")
	# Ensure directory exists
	os.makedirs(os.path.dirname(os.path.abspath(args.output_spectrogram)) or '.', exist_ok=True)
	shutil.copy(temp_spectrogram_path, args.output_spectrogram)

	print("\n--- Success ---")
	print(f"Audio saved in: {args.output_audio}")
	print(f"Spectrogram saved in: {args.output_spectrogram}")
	print("---------------")

	except Exception as e:
	print(f"\nError saving output files: {e}")
	sys.exit(1)

	# Optional: Clean up the temporary spectrogram file if needed,
	# but NamedTemporaryFile usually handles this if delete=True was used in app.py
	# try:
	# if os.path.exists(temp_spectrogram_path):
	# os.remove(temp_spectrogram_path)
	# except Exception as e:
	# print(f"Warning: Could not clean up temporary spectrogram file {temp_spectrogram_path}: {e}")

	if __name__ == "__main__":
	main()

	# --- END OF FILE inference_cli.py ---