sachin commited on
Commit
8ca8b71
·
1 Parent(s): d2e6815

a-i server

Browse files
Files changed (1) hide show
  1. src/server/main.py +104 -417
src/server/main.py CHANGED
@@ -1,49 +1,28 @@
1
  import argparse
2
- import io
3
- from time import time
4
- from typing import List, Optional
5
  from abc import ABC, abstractmethod
6
-
7
  import uvicorn
8
- from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Header, Form
9
  from fastapi.middleware.cors import CORSMiddleware
10
- from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
11
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
12
- from pydantic import BaseModel, field_validator, Field
13
- from slowapi import Limiter
14
- from slowapi.util import get_remote_address
15
  import requests
16
- from PIL import Image
17
- import base64
18
- from Crypto.Cipher import AES
19
-
20
- # Import from auth.py
21
- from utils.auth import get_current_user, get_current_user_with_admin, login, refresh_token, register, app_register, TokenResponse, Settings, LoginRequest, RegisterRequest, bearer_scheme
22
-
23
- # Import decryption utility
24
- from utils.crypto import decrypt_data
25
 
26
  # Assuming these are in your project structure
27
  from config.tts_config import SPEED, ResponseFormat, config as tts_config
28
  from config.logging_config import logger
29
 
30
- settings = Settings()
31
-
32
  # FastAPI app setup with enhanced docs
33
  app = FastAPI(
34
  title="Dhwani API",
35
- description="A multilingual AI-powered API supporting Indian languages for chat, text-to-speech, audio processing, and transcription. "
36
- "**Authentication Guide:** \n"
37
- "1. Obtain an access token by sending a POST request to `/v1/token` with `username` and `password`. \n"
38
- "2. Click the 'Authorize' button (top-right), enter your access token (e.g., `your_access_token`) in the 'bearerAuth' field, and click 'Authorize'. \n"
39
- "All protected endpoints require this token for access. \n",
40
  version="1.0.0",
41
  redirect_slashes=False,
42
  openapi_tags=[
43
  {"name": "Chat", "description": "Chat-related endpoints"},
44
  {"name": "Audio", "description": "Audio processing and TTS endpoints"},
45
  {"name": "Translation", "description": "Text translation endpoints"},
46
- {"name": "Authentication", "description": "User authentication and registration"},
47
  {"name": "Utility", "description": "General utility endpoints"},
48
  ],
49
  )
@@ -56,17 +35,6 @@ app.add_middleware(
56
  allow_headers=["*"],
57
  )
58
 
59
- # Rate limiting based on user_id with fallback to IP
60
- async def get_user_id_for_rate_limit(request: Request):
61
- try:
62
- credentials = bearer_scheme(request)
63
- user_id = await get_current_user(credentials)
64
- return user_id
65
- except Exception:
66
- return get_remote_address(request)
67
-
68
- limiter = Limiter(key_func=get_user_id_for_rate_limit)
69
-
70
  # Request/Response Models
71
  class TranscriptionResponse(BaseModel):
72
  text: str = Field(..., description="Transcribed text from the audio")
@@ -87,24 +55,16 @@ class AudioProcessingResponse(BaseModel):
87
  schema_extra = {"example": {"result": "Processed audio output"}}
88
 
89
  class ChatRequest(BaseModel):
90
- prompt: str = Field(..., description="Base64-encoded encrypted prompt (max 1000 characters after decryption)")
91
- src_lang: str = Field(..., description="Base64-encoded encrypted source language code")
92
- tgt_lang: str = Field(..., description="Base64-encoded encrypted target language code")
93
-
94
- @field_validator("prompt", "src_lang", "tgt_lang")
95
- def must_be_valid_base64(cls, v):
96
- try:
97
- base64.b64decode(v)
98
- except Exception:
99
- raise ValueError("Field must be valid base64-encoded data")
100
- return v
101
 
102
  class Config:
103
  schema_extra = {
104
  "example": {
105
- "prompt": "base64_encoded_encrypted_prompt",
106
- "src_lang": "base64_encoded_encrypted_kan_Knda",
107
- "tgt_lang": "base64_encoded_encrypted_kan_Knda"
108
  }
109
  }
110
 
@@ -115,31 +75,16 @@ class ChatResponse(BaseModel):
115
  schema_extra = {"example": {"response": "Hi there, I'm doing great!"}}
116
 
117
  class TranslationRequest(BaseModel):
118
- sentences: List[str] = Field(..., description="List of base64-encoded encrypted sentences")
119
- src_lang: str = Field(..., description="Base64-encoded encrypted source language code")
120
- tgt_lang: str = Field(..., description="Base64-encoded encrypted target language code")
121
-
122
- @field_validator("sentences", "src_lang", "tgt_lang")
123
- def must_be_valid_base64(cls, v):
124
- if isinstance(v, list):
125
- for item in v:
126
- try:
127
- base64.b64decode(item)
128
- except Exception:
129
- raise ValueError("Each sentence must be valid base64-encoded data")
130
- else:
131
- try:
132
- base64.b64decode(v)
133
- except Exception:
134
- raise ValueError("Field must be valid base64-encoded data")
135
- return v
136
 
137
  class Config:
138
  schema_extra = {
139
  "example": {
140
- "sentences": ["base64_encoded_encrypted_hello", "base64_encoded_encrypted_how_are_you"],
141
- "src_lang": "base64_encoded_encrypted_en",
142
- "tgt_lang": "base64_encoded_encrypted_kan_Knda"
143
  }
144
  }
145
 
@@ -150,24 +95,16 @@ class TranslationResponse(BaseModel):
150
  schema_extra = {"example": {"translations": ["ನಮಸ್ಕಾರ", "ನೀವು ಹೇಗಿದ್ದೀರಿ?"]}}
151
 
152
  class VisualQueryRequest(BaseModel):
153
- query: str = Field(..., description="Base64-encoded encrypted text query")
154
- src_lang: str = Field(..., description="Base64-encoded encrypted source language code")
155
- tgt_lang: str = Field(..., description="Base64-encoded encrypted target language code")
156
-
157
- @field_validator("query", "src_lang", "tgt_lang")
158
- def must_be_valid_base64(cls, v):
159
- try:
160
- base64.b64decode(v)
161
- except Exception:
162
- raise ValueError("Field must be valid base64-encoded data")
163
- return v
164
 
165
  class Config:
166
  schema_extra = {
167
  "example": {
168
- "query": "base64_encoded_encrypted_describe_image",
169
- "src_lang": "base64_encoded_encrypted_kan_Knda",
170
- "tgt_lang": "base64_encoded_encrypted_kan_Knda"
171
  }
172
  }
173
 
@@ -183,8 +120,7 @@ class TTSService(ABC):
183
  class ExternalTTSService(TTSService):
184
  async def generate_speech(self, payload: dict) -> requests.Response:
185
  try:
186
-
187
- base_url = f"{settings.external_api_base_url}/v1/audio/speech"
188
  return requests.post(
189
  base_url,
190
  json=payload,
@@ -209,7 +145,7 @@ def get_tts_service() -> TTSService:
209
  tags=["Utility"],
210
  response_model=dict)
211
  async def health_check():
212
- return {"status": "healthy", "model": settings.llm_model_name}
213
 
214
  @app.get("/",
215
  summary="Redirect to Docs",
@@ -218,118 +154,34 @@ async def health_check():
218
  async def home():
219
  return RedirectResponse(url="/docs")
220
 
221
- @app.post("/v1/token",
222
- response_model=TokenResponse,
223
- summary="User Login",
224
- description="Authenticate a user with encrypted email and device token to obtain an access token and refresh token. Requires X-Session-Key header.",
225
- tags=["Authentication"],
226
- responses={
227
- 200: {"description": "Successful login", "model": TokenResponse},
228
- 400: {"description": "Invalid encrypted data"},
229
- 401: {"description": "Invalid email or device token"}
230
- })
231
- async def token(
232
- login_request: LoginRequest,
233
- x_session_key: str = Header(..., alias="X-Session-Key")
234
- ):
235
- return await login(login_request, x_session_key)
236
-
237
- @app.post("/v1/refresh",
238
- response_model=TokenResponse,
239
- summary="Refresh Access Token",
240
- description="Generate a new access token and refresh token using an existing valid refresh token.",
241
- tags=["Authentication"],
242
- responses={
243
- 200: {"description": "New tokens issued", "model": TokenResponse},
244
- 401: {"description": "Invalid or expired refresh token"}
245
- })
246
- async def refresh(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)):
247
- return await refresh_token(credentials)
248
-
249
- @app.post("/v1/register",
250
- response_model=TokenResponse,
251
- summary="Register New User (Admin Only)",
252
- description="Create a new user account in the `users` table. Only admin accounts can register new users (use 'admin' user with password 'admin54321' initially). Non-admin users are forbidden from modifying the users table.",
253
- tags=["Authentication"],
254
- responses={
255
- 200: {"description": "User registered successfully", "model": TokenResponse},
256
- 400: {"description": "Username already exists"},
257
- 401: {"description": "Unauthorized - Valid admin token required"},
258
- 403: {"description": "Forbidden - Admin access required"},
259
- 500: {"description": "Registration failed due to server error"}
260
- })
261
- async def register_user(
262
- register_request: RegisterRequest,
263
- current_user: str = Depends(get_current_user_with_admin)
264
- ):
265
- return await register(register_request, current_user)
266
-
267
- @app.post("/v1/app/register",
268
- response_model=TokenResponse,
269
- summary="Register New App User",
270
- description="Create a new user account for the mobile app in the `app_users` table using an encrypted email and device token. Returns an access token and refresh token. Rate limited to 5 requests per minute per IP. Requires X-Session-Key header.",
271
- tags=["Authentication"],
272
- responses={
273
- 200: {"description": "User registered successfully", "model": TokenResponse},
274
- 400: {"description": "Email already registered or invalid encrypted data"},
275
- 429: {"description": "Rate limit exceeded"}
276
- })
277
- @limiter.limit(settings.speech_rate_limit)
278
- async def app_register_user(
279
- request: Request,
280
- register_request: RegisterRequest,
281
- x_session_key: str = Header(..., alias="X-Session-Key")
282
- ):
283
- logger.info(f"App registration attempt")
284
- return await app_register(register_request, x_session_key)
285
-
286
  @app.post("/v1/audio/speech",
287
  summary="Generate Speech from Text",
288
- description="Convert encrypted text to speech using an external TTS service. Rate limited to 5 requests per minute per user. Requires authentication and X-Session-Key header.",
289
  tags=["Audio"],
290
  responses={
291
  200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
292
  400: {"description": "Invalid or empty input"},
293
- 401: {"description": "Unauthorized - Token required"},
294
- 429: {"description": "Rate limit exceeded"},
295
  502: {"description": "External TTS service unavailable"},
296
  504: {"description": "TTS service timeout"}
297
  })
298
- @limiter.limit(settings.speech_rate_limit)
299
  async def generate_audio(
300
  request: Request,
301
- input: str = Query(..., description="Base64-encoded encrypted text to convert to speech (max 1000 characters after decryption)"),
302
  response_format: str = Query("mp3", description="Audio format (ignored, defaults to mp3 for external API)"),
303
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
304
- x_session_key: str = Header(..., alias="X-Session-Key"),
305
  tts_service: TTSService = Depends(get_tts_service)
306
  ):
307
- user_id = await get_current_user(credentials)
308
- session_key = base64.b64decode(x_session_key)
309
-
310
- # Decrypt input
311
- try:
312
- encrypted_input = base64.b64decode(input)
313
- decrypted_input = decrypt_data(encrypted_input, session_key).decode("utf-8")
314
- except Exception as e:
315
- logger.error(f"Input decryption failed: {str(e)}")
316
- raise HTTPException(status_code=400, detail="Invalid encrypted input")
317
-
318
- if not decrypted_input.strip():
319
  raise HTTPException(status_code=400, detail="Input cannot be empty")
320
- if len(decrypted_input) > 1000:
321
- raise HTTPException(status_code=400, detail="Decrypted input cannot exceed 1000 characters")
322
 
323
  logger.info("Processing speech request", extra={
324
  "endpoint": "/v1/audio/speech",
325
- "input_length": len(decrypted_input),
326
- "client_ip": get_remote_address(request),
327
- "user_id": user_id
328
  })
329
 
330
- payload = {
331
- "text": decrypted_input
332
- }
333
 
334
  try:
335
  response = await tts_service.generate_speech(payload)
@@ -353,62 +205,30 @@ async def generate_audio(
353
  @app.post("/v1/chat",
354
  response_model=ChatResponse,
355
  summary="Chat with AI",
356
- description="Generate a chat response from an encrypted prompt and encrypted language code. Rate limited to 100 requests per minute per user. Requires authentication and X-Session-Key header.",
357
  tags=["Chat"],
358
  responses={
359
  200: {"description": "Chat response", "model": ChatResponse},
360
- 400: {"description": "Invalid prompt, encrypted data, or language code"},
361
- 401: {"description": "Unauthorized - Token required"},
362
- 429: {"description": "Rate limit exceeded"},
363
  504: {"description": "Chat service timeout"}
364
  })
365
- @limiter.limit(settings.chat_rate_limit)
366
  async def chat(
367
  request: Request,
368
- chat_request: ChatRequest,
369
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
370
- x_session_key: str = Header(..., alias="X-Session-Key")
371
  ):
372
- user_id = await get_current_user(credentials)
373
- session_key = base64.b64decode(x_session_key)
374
-
375
- # Decrypt the prompt
376
- try:
377
- encrypted_prompt = base64.b64decode(chat_request.prompt)
378
- decrypted_prompt = decrypt_data(encrypted_prompt, session_key).decode("utf-8")
379
- except Exception as e:
380
- logger.error(f"Prompt decryption failed: {str(e)}")
381
- raise HTTPException(status_code=400, detail="Invalid encrypted prompt")
382
-
383
- # Decrypt the source language
384
- try:
385
- encrypted_src_lang = base64.b64decode(chat_request.src_lang)
386
- decrypted_src_lang = decrypt_data(encrypted_src_lang, session_key).decode("utf-8")
387
- except Exception as e:
388
- logger.error(f"Source language decryption failed: {str(e)}")
389
- raise HTTPException(status_code=400, detail="Invalid encrypted source language")
390
-
391
- # Decrypt the target language
392
- try:
393
- encrypted_tgt_lang = base64.b64decode(chat_request.tgt_lang)
394
- decrypted_tgt_lang = decrypt_data(encrypted_tgt_lang, session_key).decode("utf-8")
395
- except Exception as e:
396
- logger.error(f"Target language decryption failed: {str(e)}")
397
- raise HTTPException(status_code=400, detail="Invalid encrypted target language")
398
-
399
- if not decrypted_prompt:
400
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
401
- if len(decrypted_prompt) > 1000:
402
- raise HTTPException(status_code=400, detail="Decrypted prompt cannot exceed 1000 characters")
403
 
404
- logger.info(f"Received prompt: {decrypted_prompt}, src_lang: {decrypted_src_lang}, user_id: {user_id}")
405
 
406
  try:
407
- external_url = f"{settings.external_api_base_url}/v1/chat"
408
  payload = {
409
- "prompt": decrypted_prompt,
410
- "src_lang": decrypted_src_lang,
411
- "tgt_lang": decrypted_tgt_lang
412
  }
413
 
414
  response = requests.post(
@@ -440,43 +260,28 @@ async def chat(
440
  @app.post("/v1/transcribe/",
441
  response_model=TranscriptionResponse,
442
  summary="Transcribe Audio File",
443
- description="Transcribe an encrypted audio file into text in the specified encrypted language. Requires authentication and X-Session-Key header.",
444
  tags=["Audio"],
445
  responses={
446
  200: {"description": "Transcription result", "model": TranscriptionResponse},
447
- 400: {"description": "Invalid encrypted audio or language"},
448
- 401: {"description": "Unauthorized - Token required"},
449
  504: {"description": "Transcription service timeout"}
450
  })
451
  async def transcribe_audio(
452
- file: UploadFile = File(..., description="Encrypted audio file to transcribe"),
453
- language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
454
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
455
- x_session_key: str = Header(..., alias="X-Session-Key")
456
  ):
457
- user_id = await get_current_user(credentials)
458
- session_key = base64.b64decode(x_session_key)
459
-
460
- # Decrypt the language
461
- try:
462
- encrypted_language = base64.b64decode(language)
463
- decrypted_language = decrypt_data(encrypted_language, session_key).decode("utf-8")
464
- except Exception as e:
465
- logger.error(f"Language decryption failed: {str(e)}")
466
- raise HTTPException(status_code=400, detail="Invalid encrypted language")
467
-
468
  # Validate language
469
  allowed_languages = ["kannada", "hindi", "tamil"]
470
- if decrypted_language not in allowed_languages:
471
  raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
472
 
473
  start_time = time()
474
  try:
475
- encrypted_content = await file.read()
476
- file_content = decrypt_data(encrypted_content, session_key)
477
  files = {"file": (file.filename, file_content, file.content_type)}
478
 
479
- external_url = f"{settings.external_api_base_url}/v1/transcribe/?language={decrypted_language}"
480
  response = requests.post(
481
  external_url,
482
  files=files,
@@ -489,8 +294,6 @@ async def transcribe_audio(
489
  logger.info(f"Transcription completed in {time() - start_time:.2f} seconds")
490
  return TranscriptionResponse(text=transcription)
491
 
492
- except HTTPException:
493
- raise
494
  except requests.Timeout:
495
  logger.error("Transcription service timed out")
496
  raise HTTPException(status_code=504, detail="Transcription service timeout")
@@ -501,78 +304,38 @@ async def transcribe_audio(
501
  @app.post("/v1/translate",
502
  response_model=TranslationResponse,
503
  summary="Translate Text",
504
- description="Translate a list of base64-encoded encrypted sentences from an encrypted source to an encrypted target language. Requires authentication and X-Session-Key header.",
505
  tags=["Translation"],
506
  responses={
507
  200: {"description": "Translation result", "model": TranslationResponse},
508
- 400: {"description": "Invalid encrypted sentences or languages"},
509
- 401: {"description": "Unauthorized - Token required"},
510
  500: {"description": "Translation service error"},
511
  504: {"description": "Translation service timeout"}
512
  })
513
  async def translate(
514
- request: TranslationRequest,
515
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
516
- x_session_key: str = Header(..., alias="X-Session-Key")
517
  ):
518
- user_id = await get_current_user(credentials)
519
- try:
520
- session_key = base64.b64decode(x_session_key)
521
- except Exception as e:
522
- logger.error(f"Invalid X-Session-Key: {str(e)}")
523
- raise HTTPException(status_code=400, detail="Invalid session key")
524
-
525
- # Decrypt sentences
526
- decrypted_sentences = []
527
- for sentence in request.sentences:
528
- try:
529
- encrypted_sentence = base64.b64decode(sentence)
530
- decrypted_sentence = decrypt_data(encrypted_sentence, session_key).decode("utf-8")
531
- if not decrypted_sentence.strip():
532
- raise ValueError("Decrypted sentence is empty")
533
- decrypted_sentences.append(decrypted_sentence)
534
- except Exception as e:
535
- logger.error(f"Sentence decryption failed: {str(e)}")
536
- raise HTTPException(status_code=400, detail=f"Invalid encrypted sentence: {str(e)}")
537
-
538
- # Decrypt source language
539
- try:
540
- encrypted_src_lang = base64.b64decode(request.src_lang)
541
- decrypted_src_lang = decrypt_data(encrypted_src_lang, session_key).decode("utf-8")
542
- if not decrypted_src_lang.strip():
543
- raise ValueError("Decrypted source language is empty")
544
- except Exception as e:
545
- logger.error(f"Source language decryption failed: {str(e)}")
546
- raise HTTPException(status_code=400, detail=f"Invalid encrypted source language: {str(e)}")
547
-
548
- # Decrypt target language
549
- try:
550
- encrypted_tgt_lang = base64.b64decode(request.tgt_lang)
551
- decrypted_tgt_lang = decrypt_data(encrypted_tgt_lang, session_key).decode("utf-8")
552
- if not decrypted_tgt_lang.strip():
553
- raise ValueError("Decrypted target language is empty")
554
- except Exception as e:
555
- logger.error(f"Target language decryption failed: {str(e)}")
556
- raise HTTPException(status_code=400, detail=f"Invalid encrypted target language: {str(e)}")
557
-
558
  # Validate language codes
559
  supported_languages = [
560
  "eng_Latn", "hin_Deva", "kan_Knda", "tam_Taml", "mal_Mlym", "tel_Telu",
561
  "deu_Latn", "fra_Latn", "nld_Latn", "spa_Latn", "ita_Latn", "por_Latn",
562
  "rus_Cyrl", "pol_Latn"
563
  ]
564
- if decrypted_src_lang not in supported_languages or decrypted_tgt_lang not in supported_languages:
565
- logger.error(f"Unsupported language codes: src={decrypted_src_lang}, tgt={decrypted_tgt_lang}")
566
- raise HTTPException(status_code=400, detail=f"Unsupported language codes: src={decrypted_src_lang}, tgt={decrypted_tgt_lang}")
567
 
568
- logger.info(f"Received translation request: {len(decrypted_sentences)} sentences, src_lang: {decrypted_src_lang}, tgt_lang: {decrypted_tgt_lang}, user_id: {user_id}")
569
 
570
- external_url = f"{settings.external_api_base_url}/v1/translate"
571
 
572
  payload = {
573
- "sentences": decrypted_sentences,
574
- "src_lang": decrypted_src_lang,
575
- "tgt_lang": decrypted_tgt_lang
576
  }
577
 
578
  try:
@@ -590,7 +353,7 @@ async def translate(
590
  response_data = response.json()
591
  translations = response_data.get("translations", [])
592
 
593
- if not translations or len(translations) != len(decrypted_sentences):
594
  logger.warning(f"Unexpected response format: {response_data}")
595
  raise HTTPException(status_code=500, detail="Invalid response from translation service")
596
 
@@ -620,56 +383,38 @@ class PDFTextExtractionResponse(BaseModel):
620
  @app.post("/v1/extract-text",
621
  response_model=PDFTextExtractionResponse,
622
  summary="Extract Text from PDF",
623
- description="Extract text from a specified page of an encrypted PDF file by calling an external API. Rate limited to 100 requests per minute per user. Requires authentication and X-Session-Key header.",
624
  tags=["PDF"],
625
  responses={
626
  200: {"description": "Extracted text", "model": PDFTextExtractionResponse},
627
- 400: {"description": "Invalid encrypted PDF or page number"},
628
- 401: {"description": "Unauthorized - Token required"},
629
- 429: {"description": "Rate limit exceeded"},
630
  500: {"description": "External API error"},
631
  504: {"description": "External API timeout"}
632
  })
633
- @limiter.limit(settings.chat_rate_limit)
634
  async def extract_text(
635
  request: Request,
636
- file: UploadFile = File(..., description="Encrypted PDF file to extract text from"),
637
  page_number: int = Query(1, description="Page number to extract text from (1-based indexing)"),
638
- encrypted_language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
639
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
640
- x_session_key: str = Header(..., alias="X-Session-Key")
641
  ):
642
- user_id = await get_current_user(credentials)
643
- session_key = base64.b64decode(x_session_key)
644
-
645
  # Validate page number
646
  if page_number < 1:
647
  raise HTTPException(status_code=400, detail="Page number must be at least 1")
648
 
649
- # Decrypt PDF content
650
- try:
651
- encrypted_content = await file.read()
652
- decrypted_content = decrypt_data(encrypted_content, session_key)
653
- #decrpyted_language = decrypt_data(encrypted_language, session_key)
654
- except Exception as e:
655
- logger.error(f"PDF decryption failed: {str(e)}")
656
- raise HTTPException(status_code=400, detail="Invalid encrypted PDF")
657
-
658
  logger.info("Processing PDF text extraction request", extra={
659
  "endpoint": "/v1/extract-text",
660
  "file_name": file.filename,
661
  "page_number": page_number,
662
- "decrpyted_language" : encrypted_language,
663
- "client_ip": get_remote_address(request),
664
- "user_id": user_id
665
  })
666
 
667
  start_time = time()
668
  try:
669
- # Call external API
670
- external_url = f"{settings.external_pdf_api_base_url}/extract-text/?page_number={page_number}&language={encrypted_language}"
671
- files = {"file": (file.filename, decrypted_content, file.content_type)}
672
 
 
673
  response = requests.post(
674
  external_url,
675
  files=files,
@@ -700,27 +445,19 @@ async def extract_text(
700
  @app.post("/v1/visual_query",
701
  response_model=VisualQueryResponse,
702
  summary="Visual Query with Image",
703
- description="Process a visual query with an encrypted text query, encrypted image, and encrypted language codes provided in a JSON body named 'data'. Rate limited to 100 requests per minute per user. Requires authentication and X-Session-Key header.",
704
  tags=["Chat"],
705
  responses={
706
  200: {"description": "Query response", "model": VisualQueryResponse},
707
- 400: {"description": "Invalid query, encrypted data, or language codes"},
708
- 401: {"description": "Unauthorized - Token required"},
709
  422: {"description": "Validation error in request body"},
710
- 429: {"description": "Rate limit exceeded"},
711
  504: {"description": "Visual query service timeout"}
712
  })
713
- @limiter.limit(settings.chat_rate_limit)
714
  async def visual_query(
715
  request: Request,
716
- data: str = Form(..., description="JSON string containing encrypted query, src_lang, and tgt_lang"),
717
- file: UploadFile = File(..., description="Encrypted image file to analyze"),
718
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
719
- x_session_key: str = Header(..., alias="X-Session-Key")
720
  ):
721
- user_id = await get_current_user(credentials)
722
- session_key = base64.b64decode(x_session_key)
723
-
724
  # Parse and validate JSON data
725
  try:
726
  import json
@@ -730,58 +467,26 @@ async def visual_query(
730
  logger.error(f"Failed to parse JSON data: {str(e)}")
731
  raise HTTPException(status_code=422, detail=f"Invalid JSON data: {str(e)}")
732
 
733
- # Decrypt query
734
- try:
735
- encrypted_query = base64.b64decode(visual_query_request.query)
736
- decrypted_query = decrypt_data(encrypted_query, session_key).decode("utf-8")
737
- except Exception as e:
738
- logger.error(f"Query decryption failed: {str(e)}")
739
- raise HTTPException(status_code=400, detail="Invalid encrypted query")
740
-
741
- # Decrypt source language
742
- try:
743
- encrypted_src_lang = base64.b64decode(visual_query_request.src_lang)
744
- decrypted_src_lang = decrypt_data(encrypted_src_lang, session_key).decode("utf-8")
745
- except Exception as e:
746
- logger.error(f"Source language decryption failed: {str(e)}")
747
- raise HTTPException(status_code=400, detail="Invalid encrypted source language")
748
-
749
- # Decrypt target language
750
- try:
751
- encrypted_tgt_lang = base64.b64decode(visual_query_request.tgt_lang)
752
- decrypted_tgt_lang = decrypt_data(encrypted_tgt_lang, session_key).decode("utf-8")
753
- except Exception as e:
754
- logger.error(f"Target language decryption failed: {str(e)}")
755
- raise HTTPException(status_code=400, detail="Invalid encrypted target language")
756
-
757
- if not decrypted_query.strip():
758
  raise HTTPException(status_code=400, detail="Query cannot be empty")
759
- if len(decrypted_query) > 1000:
760
- raise HTTPException(status_code=400, detail="Decrypted query cannot exceed 1000 characters")
761
-
762
- # Decrypt image
763
- try:
764
- encrypted_content = await file.read()
765
- decrypted_content = decrypt_data(encrypted_content, session_key)
766
- except Exception as e:
767
- logger.error(f"Image decryption failed: {str(e)}")
768
- raise HTTPException(status_code=400, detail="Invalid encrypted image")
769
 
770
  logger.info("Processing visual query request", extra={
771
  "endpoint": "/v1/visual_query",
772
- "query_length": len(decrypted_query),
773
  "file_name": file.filename,
774
- "client_ip": get_remote_address(request),
775
- "user_id": user_id,
776
- "src_lang": decrypted_src_lang,
777
- "tgt_lang": decrypted_tgt_lang
778
  })
779
 
780
- external_url = f"{settings.external_api_base_url}/v1/visual_query/?src_lang={decrypted_src_lang}&tgt_lang={decrypted_tgt_lang}"
781
 
782
  try:
783
- files = {"file": (file.filename, decrypted_content, file.content_type)}
784
- data = {"query": decrypted_query}
 
785
 
786
  response = requests.post(
787
  external_url,
@@ -821,53 +526,35 @@ class SupportedLanguage(str, Enum):
821
 
822
  @app.post("/v1/speech_to_speech",
823
  summary="Speech-to-Speech Conversion",
824
- description="Convert input encrypted speech to processed speech in the specified encrypted language by calling an external speech-to-speech API. Rate limited to 5 requests per minute per user. Requires authentication and X-Session-Key header.",
825
  tags=["Audio"],
826
  responses={
827
  200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
828
- 400: {"description": "Invalid input, encrypted audio, or language"},
829
- 401: {"description": "Unauthorized - Token required"},
830
- 429: {"description": "Rate limit exceeded"},
831
  504: {"description": "External API timeout"},
832
  500: {"description": "External API error"}
833
  })
834
- @limiter.limit(settings.speech_rate_limit)
835
  async def speech_to_speech(
836
  request: Request,
837
- file: UploadFile = File(..., description="Encrypted audio file to process"),
838
- language: str = Query(..., description="Base64-encoded encrypted language of the audio (kannada, hindi, tamil after decryption)"),
839
- credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
840
- x_session_key: str = Header(..., alias="X-Session-Key")
841
  ) -> StreamingResponse:
842
- user_id = await get_current_user(credentials)
843
- session_key = base64.b64decode(x_session_key)
844
-
845
- # Decrypt the language
846
- try:
847
- encrypted_language = base64.b64decode(language)
848
- decrypted_language = decrypt_data(encrypted_language, session_key).decode("utf-8")
849
- except Exception as e:
850
- logger.error(f"Language decryption failed: {str(e)}")
851
- raise HTTPException(status_code=400, detail="Invalid encrypted language")
852
-
853
  # Validate language
854
  allowed_languages = [lang.value for lang in SupportedLanguage]
855
- if decrypted_language not in allowed_languages:
856
  raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
857
 
858
  logger.info("Processing speech-to-speech request", extra={
859
  "endpoint": "/v1/speech_to_speech",
860
  "audio_filename": file.filename,
861
- "language": decrypted_language,
862
- "client_ip": get_remote_address(request),
863
- "user_id": user_id
864
  })
865
 
866
  try:
867
- encrypted_content = await file.read()
868
- file_content = decrypt_data(encrypted_content, session_key)
869
  files = {"file": (file.filename, file_content, file.content_type)}
870
- external_url = f"{settings.external_api_base_url}/v1/speech_to_speech?language={decrypted_language}"
871
 
872
  response = requests.post(
873
  external_url,
@@ -891,15 +578,15 @@ async def speech_to_speech(
891
  )
892
 
893
  except requests.Timeout:
894
- logger.error("External speech-to-speech API timed out", extra={"user_id": user_id})
895
  raise HTTPException(status_code=504, detail="External API timeout")
896
  except requests.RequestException as e:
897
- logger.error(f"External speech-to-speech API error: {str(e)}", extra={"user_id": user_id})
898
  raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
899
 
900
  if __name__ == "__main__":
901
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")
902
- parser.add_argument("--port", type=int, default=settings.port, help="Port to run the server on.")
903
- parser.add_argument("--host", type=str, default=settings.host, help="Host to run the server on.")
904
  args = parser.parse_args()
905
  uvicorn.run(app, host=args.host, port=args.port)
 
1
  import argparse
2
+ from typing import List
 
 
3
  from abc import ABC, abstractmethod
 
4
  import uvicorn
5
+ from fastapi import FastAPI, File, HTTPException, Query, Request, UploadFile
6
  from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.responses import RedirectResponse, StreamingResponse
8
+ from pydantic import BaseModel, Field
 
 
 
9
  import requests
10
+ from time import time
 
 
 
 
 
 
 
 
11
 
12
  # Assuming these are in your project structure
13
  from config.tts_config import SPEED, ResponseFormat, config as tts_config
14
  from config.logging_config import logger
15
 
 
 
16
  # FastAPI app setup with enhanced docs
17
  app = FastAPI(
18
  title="Dhwani API",
19
+ description="A multilingual AI-powered API supporting Indian languages for chat, text-to-speech, audio processing, and transcription.",
 
 
 
 
20
  version="1.0.0",
21
  redirect_slashes=False,
22
  openapi_tags=[
23
  {"name": "Chat", "description": "Chat-related endpoints"},
24
  {"name": "Audio", "description": "Audio processing and TTS endpoints"},
25
  {"name": "Translation", "description": "Text translation endpoints"},
 
26
  {"name": "Utility", "description": "General utility endpoints"},
27
  ],
28
  )
 
35
  allow_headers=["*"],
36
  )
37
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Request/Response Models
39
  class TranscriptionResponse(BaseModel):
40
  text: str = Field(..., description="Transcribed text from the audio")
 
55
  schema_extra = {"example": {"result": "Processed audio output"}}
56
 
57
  class ChatRequest(BaseModel):
58
+ prompt: str = Field(..., description="Prompt for chat (max 1000 characters)")
59
+ src_lang: str = Field(..., description="Source language code")
60
+ tgt_lang: str = Field(..., description="Target language code")
 
 
 
 
 
 
 
 
61
 
62
  class Config:
63
  schema_extra = {
64
  "example": {
65
+ "prompt": "Hello, how are you?",
66
+ "src_lang": "kan_Knda",
67
+ "tgt_lang": "kan_Knda"
68
  }
69
  }
70
 
 
75
  schema_extra = {"example": {"response": "Hi there, I'm doing great!"}}
76
 
77
  class TranslationRequest(BaseModel):
78
+ sentences: List[str] = Field(..., description="List of sentences to translate")
79
+ src_lang: str = Field(..., description="Source language code")
80
+ tgt_lang: str = Field(..., description="Target language code")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  class Config:
83
  schema_extra = {
84
  "example": {
85
+ "sentences": ["Hello", "How are you?"],
86
+ "src_lang": "en",
87
+ "tgt_lang": "kan_Knda"
88
  }
89
  }
90
 
 
95
  schema_extra = {"example": {"translations": ["ನಮಸ್ಕಾರ", "ನೀವು ಹೇಗಿದ್ದೀರಿ?"]}}
96
 
97
  class VisualQueryRequest(BaseModel):
98
+ query: str = Field(..., description="Text query")
99
+ src_lang: str = Field(..., description="Source language code")
100
+ tgt_lang: str = Field(..., description="Target language code")
 
 
 
 
 
 
 
 
101
 
102
  class Config:
103
  schema_extra = {
104
  "example": {
105
+ "query": "Describe the image",
106
+ "src_lang": "kan_Knda",
107
+ "tgt_lang": "kan_Knda"
108
  }
109
  }
110
 
 
120
  class ExternalTTSService(TTSService):
121
  async def generate_speech(self, payload: dict) -> requests.Response:
122
  try:
123
+ base_url = "http://example.com/v1/audio/speech" # Placeholder URL
 
124
  return requests.post(
125
  base_url,
126
  json=payload,
 
145
  tags=["Utility"],
146
  response_model=dict)
147
  async def health_check():
148
+ return {"status": "healthy", "model": "llm_model_name"} # Placeholder model name
149
 
150
  @app.get("/",
151
  summary="Redirect to Docs",
 
154
  async def home():
155
  return RedirectResponse(url="/docs")
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  @app.post("/v1/audio/speech",
158
  summary="Generate Speech from Text",
159
+ description="Convert text to speech using an external TTS service.",
160
  tags=["Audio"],
161
  responses={
162
  200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
163
  400: {"description": "Invalid or empty input"},
 
 
164
  502: {"description": "External TTS service unavailable"},
165
  504: {"description": "TTS service timeout"}
166
  })
 
167
  async def generate_audio(
168
  request: Request,
169
+ input: str = Query(..., description="Text to convert to speech (max 1000 characters)"),
170
  response_format: str = Query("mp3", description="Audio format (ignored, defaults to mp3 for external API)"),
 
 
171
  tts_service: TTSService = Depends(get_tts_service)
172
  ):
173
+ if not input.strip():
 
 
 
 
 
 
 
 
 
 
 
174
  raise HTTPException(status_code=400, detail="Input cannot be empty")
175
+ if len(input) > 1000:
176
+ raise HTTPException(status_code=400, detail="Input cannot exceed 1000 characters")
177
 
178
  logger.info("Processing speech request", extra={
179
  "endpoint": "/v1/audio/speech",
180
+ "input_length": len(input),
181
+ "client_ip": request.client.host
 
182
  })
183
 
184
+ payload = {"text": input}
 
 
185
 
186
  try:
187
  response = await tts_service.generate_speech(payload)
 
205
  @app.post("/v1/chat",
206
  response_model=ChatResponse,
207
  summary="Chat with AI",
208
+ description="Generate a chat response from a prompt and language code.",
209
  tags=["Chat"],
210
  responses={
211
  200: {"description": "Chat response", "model": ChatResponse},
212
+ 400: {"description": "Invalid prompt or language code"},
 
 
213
  504: {"description": "Chat service timeout"}
214
  })
 
215
  async def chat(
216
  request: Request,
217
+ chat_request: ChatRequest
 
 
218
  ):
219
+ if not chat_request.prompt:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  raise HTTPException(status_code=400, detail="Prompt cannot be empty")
221
+ if len(chat_request.prompt) > 1000:
222
+ raise HTTPException(status_code=400, detail="Prompt cannot exceed 1000 characters")
223
 
224
+ logger.info(f"Received prompt: {chat_request.prompt}, src_lang: {chat_request.src_lang}")
225
 
226
  try:
227
+ external_url = "http://example.com/v1/chat" # Placeholder URL
228
  payload = {
229
+ "prompt": chat_request.prompt,
230
+ "src_lang": chat_request.src_lang,
231
+ "tgt_lang": chat_request.tgt_lang
232
  }
233
 
234
  response = requests.post(
 
260
  @app.post("/v1/transcribe/",
261
  response_model=TranscriptionResponse,
262
  summary="Transcribe Audio File",
263
+ description="Transcribe an audio file into text in the specified language.",
264
  tags=["Audio"],
265
  responses={
266
  200: {"description": "Transcription result", "model": TranscriptionResponse},
267
+ 400: {"description": "Invalid audio or language"},
 
268
  504: {"description": "Transcription service timeout"}
269
  })
270
  async def transcribe_audio(
271
+ file: UploadFile = File(..., description="Audio file to transcribe"),
272
+ language: str = Query(..., description="Language of the audio (kannada, hindi, tamil)")
 
 
273
  ):
 
 
 
 
 
 
 
 
 
 
 
274
  # Validate language
275
  allowed_languages = ["kannada", "hindi", "tamil"]
276
+ if language not in allowed_languages:
277
  raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
278
 
279
  start_time = time()
280
  try:
281
+ file_content = await file.read()
 
282
  files = {"file": (file.filename, file_content, file.content_type)}
283
 
284
+ external_url = f"http://example.com/v1/transcribe/?language={language}" # Placeholder URL
285
  response = requests.post(
286
  external_url,
287
  files=files,
 
294
  logger.info(f"Transcription completed in {time() - start_time:.2f} seconds")
295
  return TranscriptionResponse(text=transcription)
296
 
 
 
297
  except requests.Timeout:
298
  logger.error("Transcription service timed out")
299
  raise HTTPException(status_code=504, detail="Transcription service timeout")
 
304
  @app.post("/v1/translate",
305
  response_model=TranslationResponse,
306
  summary="Translate Text",
307
+ description="Translate a list of sentences from a source to a target language.",
308
  tags=["Translation"],
309
  responses={
310
  200: {"description": "Translation result", "model": TranslationResponse},
311
+ 400: {"description": "Invalid sentences or languages"},
 
312
  500: {"description": "Translation service error"},
313
  504: {"description": "Translation service timeout"}
314
  })
315
  async def translate(
316
+ request: TranslationRequest
 
 
317
  ):
318
+ # Validate inputs
319
+ if not request.sentences:
320
+ raise HTTPException(status_code=400, detail="Sentences cannot be empty")
321
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  # Validate language codes
323
  supported_languages = [
324
  "eng_Latn", "hin_Deva", "kan_Knda", "tam_Taml", "mal_Mlym", "tel_Telu",
325
  "deu_Latn", "fra_Latn", "nld_Latn", "spa_Latn", "ita_Latn", "por_Latn",
326
  "rus_Cyrl", "pol_Latn"
327
  ]
328
+ if request.src_lang not in supported_languages or request.tgt_lang not in supported_languages:
329
+ raise HTTPException(status_code=400, detail=f"Unsupported language codes: src={request.src_lang}, tgt={request.tgt_lang}")
 
330
 
331
+ logger.info(f"Received translation request: {len(request.sentences)} sentences, src_lang: {request.src_lang}, tgt_lang: {request.tgt_lang}")
332
 
333
+ external_url = "http://example.com/v1/translate" # Placeholder URL
334
 
335
  payload = {
336
+ "sentences": request.sentences,
337
+ "src_lang": request.src_lang,
338
+ "tgt_lang": request.tgt_lang
339
  }
340
 
341
  try:
 
353
  response_data = response.json()
354
  translations = response_data.get("translations", [])
355
 
356
+ if not translations or len(translations) != len(request.sentences):
357
  logger.warning(f"Unexpected response format: {response_data}")
358
  raise HTTPException(status_code=500, detail="Invalid response from translation service")
359
 
 
383
  @app.post("/v1/extract-text",
384
  response_model=PDFTextExtractionResponse,
385
  summary="Extract Text from PDF",
386
+ description="Extract text from a specified page of a PDF file by calling an external API.",
387
  tags=["PDF"],
388
  responses={
389
  200: {"description": "Extracted text", "model": PDFTextExtractionResponse},
390
+ 400: {"description": "Invalid PDF or page number"},
 
 
391
  500: {"description": "External API error"},
392
  504: {"description": "External API timeout"}
393
  })
 
394
  async def extract_text(
395
  request: Request,
396
+ file: UploadFile = File(..., description="PDF file to extract text from"),
397
  page_number: int = Query(1, description="Page number to extract text from (1-based indexing)"),
398
+ language: str = Query(..., description="Language of the PDF content (kannada, hindi, tamil)")
 
 
399
  ):
 
 
 
400
  # Validate page number
401
  if page_number < 1:
402
  raise HTTPException(status_code=400, detail="Page number must be at least 1")
403
 
 
 
 
 
 
 
 
 
 
404
  logger.info("Processing PDF text extraction request", extra={
405
  "endpoint": "/v1/extract-text",
406
  "file_name": file.filename,
407
  "page_number": page_number,
408
+ "language": language,
409
+ "client_ip": request.client.host
 
410
  })
411
 
412
  start_time = time()
413
  try:
414
+ file_content = await file.read()
415
+ files = {"file": (file.filename, file_content, file.content_type)}
 
416
 
417
+ external_url = f"http://example.com/extract-text/?page_number={page_number}&language={language}" # Placeholder URL
418
  response = requests.post(
419
  external_url,
420
  files=files,
 
445
  @app.post("/v1/visual_query",
446
  response_model=VisualQueryResponse,
447
  summary="Visual Query with Image",
448
+ description="Process a visual query with a text query, image, and language codes provided in a JSON body named 'data'.",
449
  tags=["Chat"],
450
  responses={
451
  200: {"description": "Query response", "model": VisualQueryResponse},
452
+ 400: {"description": "Invalid query or language codes"},
 
453
  422: {"description": "Validation error in request body"},
 
454
  504: {"description": "Visual query service timeout"}
455
  })
 
456
  async def visual_query(
457
  request: Request,
458
+ data: str = Form(..., description="JSON string containing query, src_lang, and tgt_lang"),
459
+ file: UploadFile = File(..., description="Image file to analyze")
 
 
460
  ):
 
 
 
461
  # Parse and validate JSON data
462
  try:
463
  import json
 
467
  logger.error(f"Failed to parse JSON data: {str(e)}")
468
  raise HTTPException(status_code=422, detail=f"Invalid JSON data: {str(e)}")
469
 
470
+ if not visual_query_request.query.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  raise HTTPException(status_code=400, detail="Query cannot be empty")
472
+ if len(visual_query_request.query) > 1000:
473
+ raise HTTPException(status_code=400, detail="Query cannot exceed 1000 characters")
 
 
 
 
 
 
 
 
474
 
475
  logger.info("Processing visual query request", extra={
476
  "endpoint": "/v1/visual_query",
477
+ "query_length": len(visual_query_request.query),
478
  "file_name": file.filename,
479
+ "client_ip": request.client.host,
480
+ "src_lang": visual_query_request.src_lang,
481
+ "tgt_lang": visual_query_request.tgt_lang
 
482
  })
483
 
484
+ external_url = f"http://example.com/v1/visual_query/?src_lang={visual_query_request.src_lang}&tgt_lang={visual_query_request.tgt_lang}" # Placeholder URL
485
 
486
  try:
487
+ file_content = await file.read()
488
+ files = {"file": (file.filename, file_content, file.content_type)}
489
+ data = {"query": visual_query_request.query}
490
 
491
  response = requests.post(
492
  external_url,
 
526
 
527
  @app.post("/v1/speech_to_speech",
528
  summary="Speech-to-Speech Conversion",
529
+ description="Convert input speech to processed speech in the specified language by calling an external speech-to-speech API.",
530
  tags=["Audio"],
531
  responses={
532
  200: {"description": "Audio stream", "content": {"audio/mp3": {"example": "Binary audio data"}}},
533
+ 400: {"description": "Invalid input or language"},
 
 
534
  504: {"description": "External API timeout"},
535
  500: {"description": "External API error"}
536
  })
 
537
  async def speech_to_speech(
538
  request: Request,
539
+ file: UploadFile = File(..., description="Audio file to process"),
540
+ language: str = Query(..., description="Language of the audio (kannada, hindi, tamil)")
 
 
541
  ) -> StreamingResponse:
 
 
 
 
 
 
 
 
 
 
 
542
  # Validate language
543
  allowed_languages = [lang.value for lang in SupportedLanguage]
544
+ if language not in allowed_languages:
545
  raise HTTPException(status_code=400, detail=f"Language must be one of {allowed_languages}")
546
 
547
  logger.info("Processing speech-to-speech request", extra={
548
  "endpoint": "/v1/speech_to_speech",
549
  "audio_filename": file.filename,
550
+ "language": language,
551
+ "client_ip": request.client.host
 
552
  })
553
 
554
  try:
555
+ file_content = await file.read()
 
556
  files = {"file": (file.filename, file_content, file.content_type)}
557
+ external_url = f"http://example.com/v1/speech_to_speech?language={language}" # Placeholder URL
558
 
559
  response = requests.post(
560
  external_url,
 
578
  )
579
 
580
  except requests.Timeout:
581
+ logger.error("External speech-to-speech API timed out")
582
  raise HTTPException(status_code=504, detail="External API timeout")
583
  except requests.RequestException as e:
584
+ logger.error(f"External speech-to-speech API error: {str(e)}")
585
  raise HTTPException(status_code=500, detail=f"External API error: {str(e)}")
586
 
587
  if __name__ == "__main__":
588
  parser = argparse.ArgumentParser(description="Run the FastAPI server.")
589
+ parser.add_argument("--port", type=int, default=8000, help="Port to run the server on.")
590
+ parser.add_argument("--host", type=str, default="0.0.0.0", help="Host to run the server on.")
591
  args = parser.parse_args()
592
  uvicorn.run(app, host=args.host, port=args.port)