Spaces:

ChandimaPrabath
/

stt

Sleeping

App Files Files Community

ChandimaPrabath commited on 14 days ago

Commit

0bf8a29

verified ·

1 Parent(s): 0c3f503

Create app.py

Browse files

Files changed (1) hide show

app.py +113 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# app.py
+import os
+import uuid
+import shutil
+import tempfile
+from fastapi import FastAPI, UploadFile, File, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from stt import SpeechToText
+# -----------------------------------------------------------------------------
+# CONFIGURATION
+# -----------------------------------------------------------------------------
+MODEL_NAME      = os.getenv("WHISPER_MODEL", "tiny.en")
+DEFAULT_DUR     = float(os.getenv("RECORD_DURATION", "5.0"))
+TEMP_DIR        = os.getenv("TEMP_DIR", tempfile.gettempdir())
+ALLOWED_TYPES   = {"audio/wav", "audio/x-wav", "audio/mpeg", "audio/mp3"}
+# -----------------------------------------------------------------------------
+app = FastAPI(
+    title="STT Service",
+    description="Speech-to-Text API using pywhispercpp's Whisper",
+    version="1.0",
+)
+# Allow any origin (adjust for production)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["POST", "GET", "OPTIONS"],
+    allow_headers=["*"],
+)
+# Load the STT engine once at startup
+stt_engine = SpeechToText(
+    model_name=MODEL_NAME,
+    sample_rate=16_000,
+    record_duration=DEFAULT_DUR,
+    temp_dir=TEMP_DIR,
+    verbose=False,       # mute console logs in API
+)
+@app.get("/health", summary="Health check")
+def health():
+    return {"status": "ok", "model": MODEL_NAME}
+@app.post("/transcribe", summary="Transcribe uploaded audio file")
+async def transcribe_audio(
+    file: UploadFile = File(..., description="An audio file (WAV, MP3, etc.)"),
+):
+    if file.content_type not in ALLOWED_TYPES:
+        raise HTTPException(415, detail=f"Unsupported Media Type: {file.content_type}")
+    # 1) save upload to temp WAV path
+    ext = os.path.splitext(file.filename)[1] or ".wav"
+    tmp_name = f"{uuid.uuid4()}{ext}"
+    tmp_path = os.path.join(TEMP_DIR, tmp_name)
+    try:
+        with open(tmp_path, "wb") as out_f:
+            shutil.copyfileobj(file.file, out_f)
+        # 2) run transcription
+        text = stt_engine.transcribe_file(tmp_path)
+        return {"text": text}
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(500, detail=str(e))
+    finally:
+        # clean up
+        if os.path.exists(tmp_path):
+            os.remove(tmp_path)
+@app.post("/record", summary="Record from mic + transcribe")
+def record_and_transcribe(
+    duration: float = Query(
+        DEFAULT_DUR, gt=0, le=30,
+        description="Seconds to record from server mic"
+    )
+):
+    """
+    Records from the server's default microphone for `duration` seconds,
+    then transcribes that chunk of audio.
+    """
+    try:
+        # temporarily override record_duration
+        original = stt_engine.record_duration
+        stt_engine.record_duration = duration
+        # record & transcribe
+        text = stt_engine.transcribe(save_temp=False)
+        return {"text": text}
+    except Exception as e:
+        raise HTTPException(500, detail=str(e))
+    finally:
+        stt_engine.record_duration = original
+# If you run with `python app.py`, this block ensures Uvicorn starts
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "app:app",
+        host="0.0.0.0",
+        port=int(os.getenv("PORT", 8000)),
+        reload=True
+    )