Spaces:
Running
on
Zero
Running
on
Zero
# --- START OF FILE inference_cli.py --- | |
import argparse | |
import shutil | |
import soundfile as sf | |
import os # For path manipulation if needed | |
import sys # To potentially add app.py directory to path | |
# Try to import app.py - assumes it's in the same directory or Python path | |
try: | |
# If app.py is not directly importable, you might need to add its directory to the path | |
# Example: sys.path.append(os.path.dirname(os.path.abspath(__file__))) # Add current dir | |
import app | |
from app import infer # Import the main inference function | |
except ImportError as e: | |
print(f"Error: Could not import 'app.py'. Make sure it's in the Python path.") | |
print(f"Details: {e}") | |
sys.exit(1) | |
except Exception as e: | |
print(f"An unexpected error occurred during 'app.py' import: {e}") | |
sys.exit(1) | |
def main(): | |
parser = argparse.ArgumentParser(description="F5 TTS - Simplified CLI Interface using app.py") | |
# --- Input Arguments --- | |
parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3, etc.)") | |
parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed by app.py's infer function.") | |
parser.add_argument("--gen_text", required=True, help="Text to generate") | |
# --- Model & Generation Parameters --- | |
# Note: app.py seems hardcoded to load the "Multi" model at the top level. | |
# This argument might not change the loaded model unless app.py's infer logic uses it internally. | |
parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi - effectiveness depends on app.py)") | |
parser.add_argument("--language", default="en-us", help="Synthesized language code (e.g., en-us, pl, de) (default: en-us)") | |
parser.add_argument("--ref_language", default="en-us", help="Reference language code (e.g., en-us, pl, de) (default: en-us)") | |
parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)") | |
# --- Postprocessing --- | |
parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio (uses app.py logic)") | |
parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration between batches (s)") | |
# --- Output Arguments --- | |
parser.add_argument("--output_audio", default="output.wav", help="Path to save the output WAV file") | |
parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram image (PNG)") | |
args = parser.parse_args() | |
print("--- Configuration ---") | |
print(f"Reference Audio: {args.ref_audio}") | |
print(f"Reference Text: '{args.ref_text if args.ref_text else '<Automatic Transcription>'}'") | |
print(f"Generation Text: '{args.gen_text[:100]}...'") | |
print(f"Model (exp_name): {args.exp_name}") | |
print(f"Synth Language: {args.language}") | |
print(f"Ref Language: {args.ref_language}") | |
print(f"Speed: {args.speed}") | |
print(f"Remove Silence: {args.remove_silence}") | |
print(f"Cross-Fade: {args.cross_fade_duration}s") | |
print(f"Output Audio: {args.output_audio}") | |
print(f"Output Spectrogram: {args.output_spectrogram}") | |
print("--------------------") | |
# --- Set Global Variables in app.py --- | |
# The 'infer' function in app.py relies on these globals being set. | |
try: | |
print(f"Setting language in app module to: {args.language}") | |
app.language = args.language | |
print(f"Setting ref_language in app module to: {args.ref_language}") | |
app.ref_language = args.ref_language | |
print(f"Setting speed in app module to: {args.speed}") | |
app.speed = args.speed | |
except AttributeError as e: | |
print(f"Error: Could not set global variable in 'app.py'. Does it exist? Details: {e}") | |
sys.exit(1) | |
# --- Run Inference --- | |
print("\nStarting inference process (will load models if not already loaded)...") | |
try: | |
# Call the infer function directly from the imported app module | |
(sr, audio_data), temp_spectrogram_path = infer( | |
ref_audio_orig=args.ref_audio, | |
ref_text=args.ref_text, | |
gen_text=args.gen_text, | |
exp_name=args.exp_name, | |
remove_silence=args.remove_silence, | |
cross_fade_duration=args.cross_fade_duration | |
# Note: language, ref_language, speed are used globally within app.py's functions | |
) | |
print("Inference completed.") | |
except Exception as e: | |
print(f"\nError during inference: {e}") | |
import traceback | |
traceback.print_exc() # Print detailed traceback | |
sys.exit(1) | |
# --- Save Outputs --- | |
try: | |
# Save audio | |
print(f"Saving audio to: {args.output_audio}") | |
# Ensure directory exists | |
os.makedirs(os.path.dirname(os.path.abspath(args.output_audio)) or '.', exist_ok=True) | |
# Ensure data is float32 for soundfile | |
if audio_data.dtype != "float32": | |
audio_data = audio_data.astype("float32") | |
sf.write(args.output_audio, audio_data, sr) | |
# Copy spectrogram from the temporary path returned by infer | |
print(f"Copying spectrogram from {temp_spectrogram_path} to: {args.output_spectrogram}") | |
# Ensure directory exists | |
os.makedirs(os.path.dirname(os.path.abspath(args.output_spectrogram)) or '.', exist_ok=True) | |
shutil.copy(temp_spectrogram_path, args.output_spectrogram) | |
print("\n--- Success ---") | |
print(f"Audio saved in: {args.output_audio}") | |
print(f"Spectrogram saved in: {args.output_spectrogram}") | |
print("---------------") | |
except Exception as e: | |
print(f"\nError saving output files: {e}") | |
sys.exit(1) | |
# Optional: Clean up the temporary spectrogram file if needed, | |
# but NamedTemporaryFile usually handles this if delete=True was used in app.py | |
# try: | |
# if os.path.exists(temp_spectrogram_path): | |
# os.remove(temp_spectrogram_path) | |
# except Exception as e: | |
# print(f"Warning: Could not clean up temporary spectrogram file {temp_spectrogram_path}: {e}") | |
if __name__ == "__main__": | |
main() | |
# --- END OF FILE inference_cli.py --- |