Spaces:

matdmiller
/

tts-openai

Runtime error

File size: 10,945 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bedf0dc-8d8e-4ede-a9e6-b8f35136aa00",
   "metadata": {},
   "outputs": [],
   "source": [
    "#|default_exp app"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "667802a7-0f36-4136-a381-e66210b20462",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "#tts_openai_secrets.py content:\n",
    "#import os\n",
    "#os.environ['OPENAI_API_KEY'] = 'sk-XXXXXXXXXXXXXXXXXXXXXX'\n",
    "import os\n",
    "secret_import_failed = False\n",
    "try:\n",
    "    _ = os.environ['OPENAI_API_KEY']\n",
    "    print('OPENAI_API_KEY environment variable was found.')\n",
    "except:\n",
    "    print('OPENAI_API_KEY environment variable was not found.')\n",
    "    secret_import_failed = True\n",
    "try:\n",
    "    GRADIO_PASSWORD = os.environ['GRADIO_PASSWORD']\n",
    "    print('GRADIO_PASSWORD environment variable was found.')\n",
    "except:\n",
    "    print('GRADIO_PASSWORD environment variable was not found.')\n",
    "    secret_import_failed = True\n",
    "\n",
    "if secret_import_failed == True:\n",
    "    import tts_openai_secrets\n",
    "    GRADIO_PASSWORD = os.environ['GRADIO_PASSWORD']\n",
    "    print('import tts_openai_secrets succeeded')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d9863fc-969e-409b-8e20-b9c3cd2cc3e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| hide\n",
    "try:\n",
    "    import nbdev\n",
    "except:\n",
    "    print('to convert this notebook to app.py you need to pip install nbdev')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f486d3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "import gradio as gr\n",
    "import openai\n",
    "from pydub import AudioSegment\n",
    "import io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ffd33b4-cb9b-4c01-bff6-4c3102854ab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "try:\n",
    "    tts_models = [o.id for o in openai.models.list().data if 'tts' in o.id]\n",
    "    print('successfully got tts model list:', tts_models)\n",
    "except:\n",
    "    tts_models = ['tts-1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ddbca5d-4b04-43ab-afaf-430802980e78",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "tts_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24674094-4d47-4e48-b591-55faabcff8df",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "def split_text(input_text, max_length=4000, lookback=1000):\n",
    "    # If the text is shorter than the max_length, return it as is\n",
    "    if len(input_text) <= max_length:\n",
    "        return [input_text]\n",
    "\n",
    "    chunks = []\n",
    "    while input_text:\n",
    "        # Check if the remaining text is shorter than the max_length\n",
    "        if len(input_text) <= max_length:\n",
    "            chunks.append(input_text)\n",
    "            break\n",
    "\n",
    "        # Define the split point, initially set to max_length\n",
    "        split_point = max_length\n",
    "\n",
    "        # Look for a newline in the last 'lookback' characters\n",
    "        newline_index = input_text.rfind('\\n', max_length-lookback, max_length)\n",
    "        if newline_index != -1:\n",
    "            split_point = newline_index + 1  # Include the newline in the current chunk\n",
    "\n",
    "        # If no newline, look for a period followed by space\n",
    "        elif '. ' in input_text[max_length-lookback:max_length]:\n",
    "            # Find the last '. ' in the lookback range\n",
    "            period_index = input_text.rfind('. ', max_length-lookback, max_length)\n",
    "            split_point = period_index + 2  # Split after the space\n",
    "\n",
    "        # Split the text and update the input_text\n",
    "        chunks.append(input_text[:split_point])\n",
    "        input_text = input_text[split_point:]\n",
    "\n",
    "    return chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6224ae5-3792-42b2-8392-3abd42998a50",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "def concatenate_mp3(mp3_files):\n",
    "    if len(mp3_files) == 1:\n",
    "        return mp3_files[0]\n",
    "    else:\n",
    "        # Initialize an empty AudioSegment object for concatenation\n",
    "        combined = AudioSegment.empty()\n",
    "        \n",
    "        # Write out audio file responses as individual files for debugging\n",
    "        # for idx, mp3_data in enumerate(mp3_files):\n",
    "        #     with open(f'./{idx}.mp3', 'wb') as f:\n",
    "        #         f.write(mp3_data)\n",
    "\n",
    "        # Loop through the list of mp3 binary data\n",
    "        for mp3_data in mp3_files:\n",
    "            # Convert binary data to an audio segment\n",
    "            audio_segment = AudioSegment.from_file(io.BytesIO(mp3_data), format=\"mp3\")\n",
    "            # Concatenate this segment to the combined segment\n",
    "            combined += audio_segment\n",
    "\n",
    "        # Export the combined segment to a new mp3 file\n",
    "        # Use a BytesIO object to handle this in memory\n",
    "        combined_mp3 = io.BytesIO()\n",
    "        combined.export(combined_mp3, format=\"mp3\")\n",
    "\n",
    "        # Seek to the start so it's ready for reading\n",
    "        combined_mp3.seek(0)\n",
    "\n",
    "        return combined_mp3.getvalue()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5388e860",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "def create_speech(input_text, model='tts-1', voice='alloy', progress=gr.Progress()):\n",
    "    # Split the input text into chunks\n",
    "    chunks = split_text(input_text)\n",
    "\n",
    "    # Initialize the progress bar\n",
    "    progress(0, desc=\"Starting TTS processing...\")\n",
    "\n",
    "    # Initialize a list to hold the audio data of each chunk\n",
    "    audio_data = []\n",
    "\n",
    "    # Create a client instance for OpenAI\n",
    "    client = openai.OpenAI()\n",
    "\n",
    "    # Calculate the progress increment for each chunk\n",
    "    progress_increment = 1.0 / len(chunks)\n",
    "\n",
    "    # Process each chunk\n",
    "    for i, chunk in enumerate(chunks):\n",
    "        response = client.audio.speech.create(\n",
    "            model=model,\n",
    "            voice=voice,\n",
    "            input=chunk,\n",
    "            speed=1.0\n",
    "        )\n",
    "        # Append the audio content of the response to the list\n",
    "        audio_data.append(response.content)\n",
    "\n",
    "        # Update the progress bar\n",
    "        progress((i + 1) * progress_increment, desc=f\"Processing chunk {i + 1} of {len(chunks)}\")\n",
    "\n",
    "    # Close the client connection\n",
    "    client.close()\n",
    "\n",
    "    # Concatenate the audio data from all chunks\n",
    "    combined_audio = concatenate_mp3(audio_data)\n",
    "\n",
    "    # Final update to the progress bar\n",
    "    progress(1, desc=\"Processing completed\")\n",
    "\n",
    "    return combined_audio\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "236dd8d3-4364-4731-af93-7dcdec6f18a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "def get_input_text_len(input_text):\n",
    "    return len(input_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e4fb3159-579b-4271-bc96-4cd1e2816eca",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "with gr.Blocks(title='OpenAI TTS', head='OpenAI TTS') as app:\n",
    "    gr.Markdown(\"# OpenAI TTS\")\n",
    "    gr.Markdown(\"Start typing below and then click **Go** to create the speech from your text. The current limit is 4,000 characters.\")\n",
    "    with gr.Row():\n",
    "        input_text = gr.Textbox(max_lines=100, label=\"Enter text here\")\n",
    "    with gr.Row():\n",
    "        tts_model_dropdown = gr.Dropdown(value='tts-1',choices=tts_models, label='Model')\n",
    "        tts_voice_dropdown = gr.Dropdown(value='alloy',choices=tts_voices,label='Voice')\n",
    "        input_text_length = gr.Label(label=\"Number of characters\")\n",
    "        output_audio = gr.Audio()\n",
    "    input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)\n",
    "    go_btn = gr.Button(\"Go\")\n",
    "    go_btn.click(fn=create_speech, inputs=[input_text, tts_model_dropdown, tts_voice_dropdown], outputs=[output_audio])\n",
    "    clear_btn = gr.Button('Clear')\n",
    "    clear_btn.click(fn=lambda: '', outputs=input_text)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a00648a1-891b-470b-9959-f5d502055713",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "launch_kwargs = {'auth':('username',GRADIO_PASSWORD),\n",
    "                 'auth_message':'Please log in to Mat\\'s TTS App with username: username and password.'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b534fe7-4337-423e-846a-1bdb7cccc4ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| hide\n",
    "#Notebook launch\n",
    "app.launch(**launch_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb886d45",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| export\n",
    "#.py launch\n",
    "if __name__ == \"__main__\":\n",
    "    app.launch(**launch_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28e8d888-e790-46fa-bbac-4511b9ab796c",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| hide\n",
    "app.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "afbc9699-4d16-4060-88f4-cd1251754cbd",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| hide\n",
    "gr.close_all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0420310d-930b-4904-8bd4-3458ad8bdbd3",
   "metadata": {},
   "outputs": [],
   "source": [
    "#| hide\n",
    "nbdev.export.nb_export('app.ipynb',lib_path='.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9869749d-bc7c-4e24-9dbc-403f665d6200",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}