{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "xiaRzuzPOP4H" }, "outputs": [], "source": [ "!pip install git+https://github.com/declare-lab/TangoFlux.git" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "Hfu3zXTDOP4J" }, "outputs": [], "source": [ "import IPython\n", "import torchaudio\n", "from tangoflux import TangoFluxInference\n", "from IPython.display import Audio\n", "\n", "model = TangoFluxInference(name='declare-lab/TangoFlux')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "oFiak5QIOP4K" }, "outputs": [], "source": [ "# @title Generate Audio\n", "\n", "prompt = 'a futuristic space craft with unique engine sound' # @param {type:\"string\"}\n", "duration = 10 # @param {type:\"number\"}\n", "steps = 50 # @param {type:\"number\"}\n", "\n", "audio = model.generate(prompt, steps=steps, duration=duration)\n", "\n", "Audio(data=audio, rate=44100)" ] }, { "cell_type": "code", "source": [ "import IPython\n", "import torchaudio\n", "from tangoflux import TangoFluxInference\n", "from IPython.display import Audio\n", "\n", "model = TangoFluxInference(name='declare-lab/TangoFlux')\n", "\n", "# @title Generate Audio\n", "prompt = 'Melodic human whistling harmonizing with natural birdsong' # @param {type:\"string\"}\n", "duration = 10 # @param {type:\"number\"}\n", "steps = 50 # @param {type:\"number\"}\n", "\n", "# Generate the audio\n", "audio = model.generate(prompt, steps=steps, duration=duration)\n", "\n", "# Ensure audio is in the correct format (2D Tensor: [channels, samples])\n", "if len(audio.shape) == 1: # If mono audio (1D tensor)\n", " audio_tensor = audio.unsqueeze(0) # Add channel dimension to make it [1, samples]\n", "elif len(audio.shape) == 2: # Stereo audio (2D tensor)\n", " audio_tensor = audio # Already in correct format\n", "else:\n", " raise ValueError(f\"Unexpected audio tensor shape: {audio.shape}\")\n", "\n", "# Save the audio as a .wav file\n", "torchaudio.save('generated_audio.wav', audio_tensor, sample_rate=44100)\n", "\n", "# Optionally play the audio in the notebook\n", "Audio(data=audio.numpy(), rate=44100)\n" ], "metadata": { "id": "_Z8elHyOHOQ1" }, "execution_count": null, "outputs": [] } ], "metadata": { "language_info": { "name": "python" }, "colab": { "provenance": [], "machine_shape": "hm", "private_outputs": true, "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "nbformat": 4, "nbformat_minor": 0 }