ChandimaPrabath commited on
Commit
0bf8a29
·
verified ·
1 Parent(s): 0c3f503

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import uuid
4
+ import shutil
5
+ import tempfile
6
+
7
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Query
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import JSONResponse
10
+
11
+ from stt import SpeechToText
12
+
13
+ # -----------------------------------------------------------------------------
14
+ # CONFIGURATION
15
+ # -----------------------------------------------------------------------------
16
+ MODEL_NAME = os.getenv("WHISPER_MODEL", "tiny.en")
17
+ DEFAULT_DUR = float(os.getenv("RECORD_DURATION", "5.0"))
18
+ TEMP_DIR = os.getenv("TEMP_DIR", tempfile.gettempdir())
19
+ ALLOWED_TYPES = {"audio/wav", "audio/x-wav", "audio/mpeg", "audio/mp3"}
20
+ # -----------------------------------------------------------------------------
21
+
22
+ app = FastAPI(
23
+ title="STT Service",
24
+ description="Speech-to-Text API using pywhispercpp's Whisper",
25
+ version="1.0",
26
+ )
27
+
28
+ # Allow any origin (adjust for production)
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"],
32
+ allow_methods=["POST", "GET", "OPTIONS"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Load the STT engine once at startup
37
+ stt_engine = SpeechToText(
38
+ model_name=MODEL_NAME,
39
+ sample_rate=16_000,
40
+ record_duration=DEFAULT_DUR,
41
+ temp_dir=TEMP_DIR,
42
+ verbose=False, # mute console logs in API
43
+ )
44
+
45
+
46
+ @app.get("/health", summary="Health check")
47
+ def health():
48
+ return {"status": "ok", "model": MODEL_NAME}
49
+
50
+
51
+ @app.post("/transcribe", summary="Transcribe uploaded audio file")
52
+ async def transcribe_audio(
53
+ file: UploadFile = File(..., description="An audio file (WAV, MP3, etc.)"),
54
+ ):
55
+ if file.content_type not in ALLOWED_TYPES:
56
+ raise HTTPException(415, detail=f"Unsupported Media Type: {file.content_type}")
57
+
58
+ # 1) save upload to temp WAV path
59
+ ext = os.path.splitext(file.filename)[1] or ".wav"
60
+ tmp_name = f"{uuid.uuid4()}{ext}"
61
+ tmp_path = os.path.join(TEMP_DIR, tmp_name)
62
+
63
+ try:
64
+ with open(tmp_path, "wb") as out_f:
65
+ shutil.copyfileobj(file.file, out_f)
66
+ # 2) run transcription
67
+ text = stt_engine.transcribe_file(tmp_path)
68
+ return {"text": text}
69
+ except HTTPException:
70
+ raise
71
+ except Exception as e:
72
+ raise HTTPException(500, detail=str(e))
73
+ finally:
74
+ # clean up
75
+ if os.path.exists(tmp_path):
76
+ os.remove(tmp_path)
77
+
78
+
79
+ @app.post("/record", summary="Record from mic + transcribe")
80
+ def record_and_transcribe(
81
+ duration: float = Query(
82
+ DEFAULT_DUR, gt=0, le=30,
83
+ description="Seconds to record from server mic"
84
+ )
85
+ ):
86
+ """
87
+ Records from the server's default microphone for `duration` seconds,
88
+ then transcribes that chunk of audio.
89
+ """
90
+ try:
91
+ # temporarily override record_duration
92
+ original = stt_engine.record_duration
93
+ stt_engine.record_duration = duration
94
+
95
+ # record & transcribe
96
+ text = stt_engine.transcribe(save_temp=False)
97
+ return {"text": text}
98
+ except Exception as e:
99
+ raise HTTPException(500, detail=str(e))
100
+ finally:
101
+ stt_engine.record_duration = original
102
+
103
+
104
+ # If you run with `python app.py`, this block ensures Uvicorn starts
105
+ if __name__ == "__main__":
106
+ import uvicorn
107
+
108
+ uvicorn.run(
109
+ "app:app",
110
+ host="0.0.0.0",
111
+ port=int(os.getenv("PORT", 8000)),
112
+ reload=True
113
+ )