Moustafa1111111111 commited on
Commit
7651129
·
1 Parent(s): 0086768

Add all application files and TTS submodule

Browse files
Files changed (15) hide show
  1. .gitattributes +1 -0
  2. .gitignore +8 -0
  3. Dockerfile +85 -0
  4. README.md +5 -4
  5. index.html +19 -0
  6. local_server_new.py +136 -0
  7. requirements.txt +178 -0
  8. runtime.txt +1 -0
  9. script.js +38 -0
  10. speaker_reference.wav +3 -0
  11. start.sh +13 -0
  12. style.css +76 -0
  13. web/index.html +19 -0
  14. web/script.js +38 -0
  15. web/style.css +76 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ speaker_reference.wav filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ TTS/**/*.wav
2
+ TTS/**/*.png
3
+ TTS/**/*.gif
4
+ TTS/**/*.exe
5
+ TTS/new_venv1/
6
+ __pycache__/
7
+ *.pyc
8
+ *.log
Dockerfile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim-buster
2
+
3
+ # Install Git (if not already included)
4
+ RUN apt-get update && apt-get install -y git
5
+
6
+ # Set the working directory in the container
7
+ WORKDIR /app
8
+
9
+ # Copy your entire repository content (including .git, .gitmodules, TTS as a submodule pointer)
10
+ COPY . /app/
11
+
12
+ # Install build tools and other dependencies BEFORE submodule init
13
+ RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ build-essential \
15
+ cmake \
16
+ pkg-config \
17
+ libblis-dev \
18
+ python3-venv \
19
+ python3-dev \
20
+ wget \
21
+ libopenblas-dev
22
+
23
+ # Initialize and update submodules
24
+ RUN git submodule init
25
+ RUN git submodule update
26
+
27
+ # Set the working directory to the TTS directory AFTER updating submodules
28
+ WORKDIR /app/TTS
29
+
30
+ # Set a generic architecture flag
31
+ ENV BLIS_ARCH="generic"
32
+
33
+ # Try to agree to the Coqui TTS license via environment variable
34
+ ENV COQUI_TTS_AGREED=1
35
+
36
+ # Create a virtual environment
37
+ RUN python3 -m venv venv
38
+ RUN . /app/venv/bin/activate
39
+
40
+ # Install Coqui TTS requirements
41
+ RUN pip install -r requirements.txt --timeout=300
42
+
43
+ # Explicitly install the TTS package itself in editable mode
44
+ RUN pip install -e . --timeout=300
45
+
46
+ # Change working directory back to /app
47
+ WORKDIR /app
48
+
49
+ # Create the model directory
50
+ RUN mkdir -p /app/models/xtts_v2
51
+
52
+ # Download XTTS v2 model files
53
+ RUN wget -O /app/models/xtts_v2/config.json https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json?download=true
54
+ RUN wget -O /app/models/xtts_v2/model.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth?download=true
55
+ RUN wget -O /app/models/xtts_v2/vocab.json https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json?download=true
56
+ RUN wget -O /app/models/xtts_v2/dvae.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth?download=true
57
+ RUN wget -O /app/models/xtts_v2/speakers_xtts.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/speakers_xtts.pth?download=true
58
+
59
+ # Create the audio directory if it doesn't exist
60
+ RUN mkdir -p /app/audio
61
+
62
+ # Copy the speaker_reference.wav file if it exists at the root
63
+ COPY speaker_reference.wav /app/audio/speaker_reference.wav
64
+
65
+ # Copy the web page files
66
+ COPY web /app/web
67
+
68
+ # Copy the application code
69
+ COPY local_server_new.py /app/
70
+
71
+ # Install your other dependencies (fastapi, uvicorn, bangla, etc.)
72
+ COPY requirements.txt /app/
73
+ RUN . /app/venv/bin/activate && pip install -r /app/requirements.txt --no-cache-dir --timeout=300
74
+
75
+ # Create start.sh script
76
+ RUN echo "#!/bin/bash" > start.sh && \
77
+ echo "source /app/venv/bin/activate" >> start.sh && \
78
+ echo "/app/venv/bin/python -m uvicorn local_server_new:app --host 0.0.0.0 --port 80" >> start.sh && \
79
+ chmod +x start.sh
80
+
81
+ # Expose port
82
+ EXPOSE 80
83
+
84
+ # Run the app using the script
85
+ CMD ["./start.sh"]
README.md CHANGED
@@ -1,11 +1,12 @@
1
  ---
2
- title: TTS2
3
- emoji: 💻
4
- colorFrom: yellow
5
- colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: TTS
3
+ emoji: 😻
4
+ colorFrom: pink
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
+ short_description: XTTS Large Language Model
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Text to Speech</title>
5
+ <link rel="stylesheet" href="style.css">
6
+ </head>
7
+ <body>
8
+ <h1>Text to Speech</h1>
9
+ <textarea id="inputText" rows="5" cols="50"></textarea><br><br>
10
+ <button id="convertButton">Convert to Speech</button>
11
+ <div id="status"></div>
12
+ <div id="audioOutput" style="margin-top: 20px;">
13
+ <a id="downloadLink" href="#" download="output.wav" style="display: none;">Download Audio</a>
14
+ <audio id="audioPlayer" controls style="display: none;"></audio>
15
+ </div>
16
+
17
+ <script src="script.js"></script>
18
+ </body>
19
+ </html>
local_server_new.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from fastapi.responses import FileResponse
5
+ from fastapi.staticfiles import StaticFiles
6
+ import logging
7
+ import torch
8
+ import os
9
+ from TTS.api import TTS
10
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
11
+ from langdetect import detect
12
+
13
+ # Allowlist XttsConfig so torch.load doesn't raise UnpicklingError
14
+ from torch.serialization import add_safe_globals
15
+ from TTS.tts.configs.xtts_config import XttsConfig
16
+ add_safe_globals([XttsConfig])
17
+
18
+ # ✅ Monkey-patch torch.load to always use weights_only=False
19
+ _original_torch_load = torch.load
20
+ def patched_torch_load(*args, **kwargs):
21
+ kwargs["weights_only"] = False
22
+ return _original_torch_load(*args, **kwargs)
23
+ torch.load = patched_torch_load
24
+
25
+ logging.basicConfig(level=logging.DEBUG)
26
+
27
+ # Initialize FastAPI
28
+ app = FastAPI()
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"],
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Load TTS model from local files
37
+ try:
38
+ model_dir = "/app/models/xtts_v2"
39
+ config_path = os.path.join(model_dir, "config.json")
40
+ # When providing config_path, TTS might expect the directory for model_path
41
+ tts = TTS(model_path=model_dir, config_path=config_path).to("cuda" if torch.cuda.is_available() else "cpu")
42
+ print("XTTS v2 model loaded successfully from local files.")
43
+ except Exception as e:
44
+ print(f"Error loading XTTS v2 model from local files: {e}")
45
+ print("Falling back to loading by model name (license might be required).")
46
+ tts = TTS("tts_models/multilingual/multi-dataset-xtts_v2").to("cuda" if torch.cuda.is_available() else "cpu")
47
+
48
+ # Load sentiment models
49
+ arabic_model_name = "aubmindlab/bert-base-arabertv02-twitter"
50
+ sentiment_tokenizer = AutoTokenizer.from_pretrained(arabic_model_name)
51
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERT")
52
+ sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
53
+
54
+ # Input class for POST body
55
+ class Message(BaseModel):
56
+ text: str
57
+
58
+ # Language detection
59
+ def detect_language_safely(text):
60
+ try:
61
+ if any('\u0600' <= c <= '\u06FF' for c in text):
62
+ return "ar"
63
+ return detect(text)
64
+ except:
65
+ return "ar" if any('\u0600' <= c <= '\u06FF' for c in text) else "en"
66
+
67
+ # Sentiment to emotion mapping
68
+ def map_sentiment_to_emotion(sentiment, language="en"):
69
+ if language == "ar":
70
+ return "happy" if sentiment == "positive" else "sad" if sentiment == "negative" else "neutral"
71
+ return "happy" if "positive" in sentiment.lower() else "sad" if "negative" in sentiment.lower() else "neutral"
72
+
73
+ # Simple Arabic sentiment analysis
74
+ def arabic_sentiment_analysis(text):
75
+ pos_words = ["سعيد", "فرح", "ممتاز", "رائع", "جيد", "حب", "جميل", "نجاح", "أحسنت", "شكرا"]
76
+ neg_words = ["حزين", "غاضب", "سيء", "فشل", "خطأ", "مشكلة", "صعب", "لا أحب", "سخيف", "مؤسف"]
77
+ pos_count = sum(1 for word in pos_words if word in text.lower())
78
+ neg_count = sum(1 for word in neg_words if word in text.lower())
79
+
80
+ if pos_count > neg_count:
81
+ return "positive"
82
+ elif neg_count > pos_count:
83
+ return "negative"
84
+ else:
85
+ try:
86
+ inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
87
+ outputs = sentiment_model(**inputs)
88
+ sentiment_class = torch.argmax(outputs.logits).item()
89
+ return ["negative", "neutral", "positive"][sentiment_class]
90
+ except:
91
+ return "neutral"
92
+
93
+ # Main TTS endpoint
94
+ @app.post("/text-to-speech/")
95
+ def text_to_speech(msg: Message):
96
+ text = msg.text
97
+ language = detect_language_safely(text)
98
+ emotion = "neutral"
99
+
100
+ if language == "en":
101
+ try:
102
+ sentiment_result = sentiment_analyzer(text)[0]
103
+ emotion = map_sentiment_to_emotion(sentiment_result["label"])
104
+ except:
105
+ pass
106
+ else:
107
+ try:
108
+ sentiment_result = arabic_sentiment_analysis(text)
109
+ emotion = map_sentiment_to_emotion(sentiment_result, language="ar")
110
+ except:
111
+ pass
112
+
113
+ output_filename = "output.wav"
114
+ try:
115
+ tts.tts_to_file(
116
+ text=text,
117
+ file_path=output_filename,
118
+ emotion=emotion,
119
+ speaker_wav="/app/audio/speaker_reference.wav", # Updated path
120
+ language=language
121
+ )
122
+ return {
123
+ "status": "success",
124
+ "audio_file": output_filename,
125
+ "url": "/audio"
126
+ }
127
+ except Exception as e:
128
+ return {"status": "error", "message": str(e)}
129
+
130
+ # ✅ Serve the audio file
131
+ @app.get("/audio")
132
+ def get_audio():
133
+ return FileResponse("output.wav", media_type="audio/wav", filename="output.wav")
134
+
135
+ # Serve static files (your web page) from the 'web' directory
136
+ app.mount("/", StaticFiles(directory="web", html=True), name="static")
requirements.txt ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # core deps
2
+ numpy==1.22.0;python_version<="3.10"
3
+ numpy>=1.24.3;python_version>"3.10"
4
+ cython>=0.29.30
5
+ scipy>=1.11.2
6
+ langdetect
7
+ torch>=2.1
8
+ torchaudio==2.6.0
9
+ soundfile>=0.12.0
10
+ librosa>=0.10.0
11
+ scikit-learn>=1.3.0
12
+ numba==0.55.1;python_version<"3.9"
13
+ numba>=0.57.0;python_version>="3.9"
14
+ inflect>=7.5.0
15
+ tqdm>=4.67.1
16
+ anyascii>=0.3.2
17
+ pyyaml>=6.0.2
18
+ fsspec>=2025.3.2
19
+ aiohttp>=3.8.1
20
+ packaging>=24.2
21
+ mutagen==1.47.0
22
+ # deps for examples
23
+ flask>=3.1.0
24
+ # deps for inference
25
+ pysbd>=0.3.4
26
+ # deps for notebooks
27
+ umap-learn>=0.5.7
28
+ pandas>=1.4,<2.0
29
+ # deps for training
30
+ matplotlib>=3.8.4
31
+ # coqui stack
32
+ trainer>=0.0.36
33
+ # config management
34
+ coqpit>=0.0.17
35
+ # chinese g2p deps
36
+ jieba==0.42.1
37
+ pypinyin==0.54.0
38
+ # korean
39
+ hangul-romanize==0.1.0
40
+ # gruut+supported langs
41
+ gruut[de,es,fr]==2.2.3
42
+ gruut-ipa==0.13.0
43
+ gruut_lang_de==2.0.1
44
+ gruut_lang_en==2.0.1
45
+ gruut_lang_es==2.0.1
46
+ gruut_lang_fr==2.0.2
47
+ # deps for korean
48
+ jamo==0.4.1
49
+ nltk==3.9.1
50
+ g2pkk>=0.1.2
51
+ # deps for bangla
52
+ bangla==0.0.1
53
+ bnnumerizer==0.0.2
54
+ bnunicodenormalizer==0.1.7
55
+ #deps for tortoise
56
+ einops==0.8.1
57
+ transformers==4.51.2
58
+ #deps for bark
59
+ encodec==0.1.1
60
+ # deps for XTTS
61
+ unidecode>=1.3.8
62
+ num2words==0.5.14
63
+ spacy[ja]>=3
64
+ # Additional dependencies from TTS requirements
65
+ absl-py==2.2.2
66
+ aiohappyeyeballs==2.6.1
67
+ aiosignal==1.3.2
68
+ annotated-types==0.7.0
69
+ anyio==4.9.0
70
+ async-timeout==5.0.1
71
+ attrs==25.3.0
72
+ audioread==3.0.1
73
+ babel==2.17.0
74
+ blinker==1.9.0
75
+ catalogue==2.0.10
76
+ certifi==2025.1.31
77
+ cffi==1.17.1
78
+ charset-normalizer==3.4.1
79
+ click==8.1.8
80
+ cloudpathlib==0.21.0
81
+ colorama==0.4.6
82
+ comtypes==1.4.10
83
+ confection==0.1.5
84
+ contourpy==1.2.1
85
+ cycler==0.12.1
86
+ cymem==2.0.11
87
+ Cython==3.0.12
88
+ dateparser==1.1.8
89
+ decorator==5.2.1
90
+ docopt==0.6.2
91
+ fastapi==0.109.2
92
+ filelock==3.18.0
93
+ fonttools==4.57.0
94
+ frozenlist==1.5.0
95
+ grpcio==1.71.0
96
+ h11==0.14.0
97
+ huggingface-hub==0.30.2
98
+ idna==3.10
99
+ importlib_metadata==8.6.1
100
+ importlib_resources==6.5.2
101
+ itsdangerous==2.2.0
102
+ Jinja2==3.1.6
103
+ joblib==1.4.2
104
+ jsonlines==1.2.0
105
+ kiwisolver==1.4.7
106
+ langcodes==3.5.0
107
+ language_data==1.3.0
108
+ lazy_loader==0.4
109
+ llvmlite==0.43.0
110
+ marisa-trie==1.2.1
111
+ Markdown==3.8
112
+ markdown-it-py==3.0.0
113
+ MarkupSafe==3.0.2
114
+ mdurl==0.1.2
115
+ more-itertools==10.6.0
116
+ mpmath==1.3.0
117
+ msgpack==1.1.0
118
+ multidict==6.4.3
119
+ murmurhash==1.0.12
120
+ networkx==2.8.8
121
+ num2words==0.5.14
122
+ packaging
123
+ pandas>=1.4,<2.0
124
+ pillow==11.1.0
125
+ platformdirs==4.3.7
126
+ pooch==1.8.2
127
+ preshed==3.0.9
128
+ propcache==0.3.1
129
+ protobuf==6.30.2
130
+ psutil==7.0.0
131
+ pycparser==2.22
132
+ pydantic==1.10.21
133
+ pydantic_core==2.33.1
134
+ Pygments==2.19.1
135
+ pynndescent==0.5.13
136
+ pyparsing==3.2.3
137
+ python-crfsuite==0.9.11
138
+ python-dateutil==2.9.0.post0
139
+ pyttsx3==2.98
140
+ pytz==2025.2
141
+ regex==2024.11.6
142
+ requests==2.32.3
143
+ rich==14.0.0
144
+ safetensors==0.5.3
145
+ shellingham==1.5.4
146
+ six==1.17.0
147
+ smart-open==7.1.0
148
+ sniffio==1.3.1
149
+ soxr==0.5.0.post1
150
+ spacy-legacy==3.0.12
151
+ spacy-loggers==1.0.5
152
+ SpeechRecognition==3.14.2
153
+ srsly==2.5.1
154
+ starlette==0.36.3
155
+ SudachiDict-core==20250129
156
+ SudachiPy==0.6.10
157
+ sympy==1.13.1
158
+ tensorboard==2.19.0
159
+ tensorboard-data-server==0.7.2
160
+ thinc==8.3.4
161
+ threadpoolctl==3.6.0
162
+ tokenizers==0.21.1
163
+ typeguard==4.4.2
164
+ typer==0.15.2
165
+ typing-inspection==0.4.0
166
+ typing_extensions==4.13.2
167
+ tzdata==2025.2
168
+ tzlocal==5.3.1
169
+ Unidecode==1.3.8
170
+ urllib3==2.4.0
171
+ uvicorn==0.34.0
172
+ wasabi==1.1.3
173
+ weasel==0.4.1
174
+ Werkzeug==3.1.3
175
+ wrapt==1.17.2
176
+ yarl==1.19.0
177
+ zipp==3.21.0
178
+ # Force rebuild
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.9
script.js ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener('DOMContentLoaded', () => {
2
+ const convertButton = document.getElementById('convertButton');
3
+ const inputText = document.getElementById('inputText');
4
+ const statusDiv = document.getElementById('status');
5
+ const downloadLink = document.getElementById('downloadLink');
6
+ const audioPlayer = document.getElementById('audioPlayer');
7
+
8
+ convertButton.addEventListener('click', async () => {
9
+ const text = inputText.value;
10
+ statusDiv.textContent = 'Processing...';
11
+ downloadLink.style.display = 'none';
12
+ audioPlayer.style.display = 'none';
13
+
14
+ try {
15
+ const response = await fetch('http://localhost:5000/text-to-speech/', {
16
+ method: 'POST',
17
+ headers: {
18
+ 'Content-Type': 'application/json',
19
+ },
20
+ body: JSON.stringify({ text: text }),
21
+ });
22
+
23
+ const data = await response.json();
24
+
25
+ if (data.status === 'success') {
26
+ statusDiv.textContent = 'Speech generated successfully!';
27
+ downloadLink.href = 'http://localhost:5000' + data.url;
28
+ downloadLink.style.display = 'block';
29
+ audioPlayer.src = 'http://localhost:5000' + data.url;
30
+ audioPlayer.style.display = 'block';
31
+ } else {
32
+ statusDiv.textContent = `Error: ${data.message}`;
33
+ }
34
+ } catch (error) {
35
+ statusDiv.textContent = `Network error: ${error}`;
36
+ }
37
+ });
38
+ });
speaker_reference.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7fa2e3b58516ba6057ab93fa819eca83097c31a90d24e53cc6593ef384ce1f1
3
+ size 188476
start.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 1. Navigate to the application directory (where local_server_new.py is)
4
+ cd /app
5
+
6
+ # 2. Activate the Python virtual environment
7
+ source venv/bin/activate
8
+
9
+ # 3. Run the FastAPI application using Uvicorn
10
+ # - Bind to all interfaces (0.0.0.0)
11
+ # - Listen on port 80 (required by Hugging Face Spaces for HTTP)
12
+ # - Specify your FastAPI application module and app instance (local_server_new:app)
13
+ uvicorn local_server_new:app --host 0.0.0.0 --port 80
style.css ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
3
+ background-color: #f4f7f6;
4
+ margin: 40px;
5
+ display: flex;
6
+ flex-direction: column;
7
+ align-items: center;
8
+ color: #333;
9
+ }
10
+
11
+ h1 {
12
+ color: #2c3e50;
13
+ margin-bottom: 30px;
14
+ text-align: center;
15
+ font-size: 2.5em;
16
+ }
17
+
18
+ textarea {
19
+ padding: 15px;
20
+ border: 1px solid #ccc;
21
+ border-radius: 8px;
22
+ font-size: 1em;
23
+ margin-bottom: 20px;
24
+ width: 80%;
25
+ max-width: 600px;
26
+ box-sizing: border-box;
27
+ resize: vertical; /* Allows vertical resizing */
28
+ box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
29
+ }
30
+
31
+ button {
32
+ background-color: #3498db;
33
+ color: white;
34
+ padding: 12px 25px;
35
+ border: none;
36
+ border-radius: 8px;
37
+ cursor: pointer;
38
+ font-size: 1.1em;
39
+ transition: background-color 0.3s ease;
40
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
41
+ }
42
+
43
+ button:hover {
44
+ background-color: #2980b9;
45
+ }
46
+
47
+ #status {
48
+ margin-top: 20px;
49
+ font-weight: bold;
50
+ color: #27ae60; /* Green for success, you can change for errors */
51
+ }
52
+
53
+ #audioOutput {
54
+ margin-top: 30px;
55
+ text-align: center;
56
+ }
57
+
58
+ #downloadLink {
59
+ display: inline-block;
60
+ background-color: #2ecc71;
61
+ color: white;
62
+ padding: 10px 20px;
63
+ border-radius: 5px;
64
+ text-decoration: none;
65
+ font-size: 1em;
66
+ transition: background-color 0.3s ease;
67
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
68
+ }
69
+
70
+ #downloadLink:hover {
71
+ background-color: #27ae60;
72
+ }
73
+
74
+ #audioPlayer {
75
+ margin-top: 10px;
76
+ }
web/index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Text to Speech</title>
5
+ <link rel="stylesheet" href="style.css">
6
+ </head>
7
+ <body>
8
+ <h1>Text to Speech</h1>
9
+ <textarea id="inputText" rows="5" cols="50"></textarea><br><br>
10
+ <button id="convertButton">Convert to Speech</button>
11
+ <div id="status"></div>
12
+ <div id="audioOutput" style="margin-top: 20px;">
13
+ <a id="downloadLink" href="#" download="output.wav" style="display: none;">Download Audio</a>
14
+ <audio id="audioPlayer" controls style="display: none;"></audio>
15
+ </div>
16
+
17
+ <script src="script.js"></script>
18
+ </body>
19
+ </html>
web/script.js ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener('DOMContentLoaded', () => {
2
+ const convertButton = document.getElementById('convertButton');
3
+ const inputText = document.getElementById('inputText');
4
+ const statusDiv = document.getElementById('status');
5
+ const downloadLink = document.getElementById('downloadLink');
6
+ const audioPlayer = document.getElementById('audioPlayer');
7
+
8
+ convertButton.addEventListener('click', async () => {
9
+ const text = inputText.value;
10
+ statusDiv.textContent = 'Processing...';
11
+ downloadLink.style.display = 'none';
12
+ audioPlayer.style.display = 'none';
13
+
14
+ try {
15
+ const response = await fetch('http://localhost:5000/text-to-speech/', {
16
+ method: 'POST',
17
+ headers: {
18
+ 'Content-Type': 'application/json',
19
+ },
20
+ body: JSON.stringify({ text: text }),
21
+ });
22
+
23
+ const data = await response.json();
24
+
25
+ if (data.status === 'success') {
26
+ statusDiv.textContent = 'Speech generated successfully!';
27
+ downloadLink.href = 'http://localhost:5000' + data.url;
28
+ downloadLink.style.display = 'block';
29
+ audioPlayer.src = 'http://localhost:5000' + data.url;
30
+ audioPlayer.style.display = 'block';
31
+ } else {
32
+ statusDiv.textContent = `Error: ${data.message}`;
33
+ }
34
+ } catch (error) {
35
+ statusDiv.textContent = `Network error: ${error}`;
36
+ }
37
+ });
38
+ });
web/style.css ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
3
+ background-color: #f4f7f6;
4
+ margin: 40px;
5
+ display: flex;
6
+ flex-direction: column;
7
+ align-items: center;
8
+ color: #333;
9
+ }
10
+
11
+ h1 {
12
+ color: #2c3e50;
13
+ margin-bottom: 30px;
14
+ text-align: center;
15
+ font-size: 2.5em;
16
+ }
17
+
18
+ textarea {
19
+ padding: 15px;
20
+ border: 1px solid #ccc;
21
+ border-radius: 8px;
22
+ font-size: 1em;
23
+ margin-bottom: 20px;
24
+ width: 80%;
25
+ max-width: 600px;
26
+ box-sizing: border-box;
27
+ resize: vertical; /* Allows vertical resizing */
28
+ box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
29
+ }
30
+
31
+ button {
32
+ background-color: #3498db;
33
+ color: white;
34
+ padding: 12px 25px;
35
+ border: none;
36
+ border-radius: 8px;
37
+ cursor: pointer;
38
+ font-size: 1.1em;
39
+ transition: background-color 0.3s ease;
40
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
41
+ }
42
+
43
+ button:hover {
44
+ background-color: #2980b9;
45
+ }
46
+
47
+ #status {
48
+ margin-top: 20px;
49
+ font-weight: bold;
50
+ color: #27ae60; /* Green for success, you can change for errors */
51
+ }
52
+
53
+ #audioOutput {
54
+ margin-top: 30px;
55
+ text-align: center;
56
+ }
57
+
58
+ #downloadLink {
59
+ display: inline-block;
60
+ background-color: #2ecc71;
61
+ color: white;
62
+ padding: 10px 20px;
63
+ border-radius: 5px;
64
+ text-decoration: none;
65
+ font-size: 1em;
66
+ transition: background-color 0.3s ease;
67
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
68
+ }
69
+
70
+ #downloadLink:hover {
71
+ background-color: #27ae60;
72
+ }
73
+
74
+ #audioPlayer {
75
+ margin-top: 10px;
76
+ }