Spaces:

r3Vibe
/

mother-tongue

Runtime error

App Files Files Community

arnabg95 commited on Jun 2, 2024

Commit

c3c3a5e

1 Parent(s): 3fa3e73

all files added

Browse files

Files changed (28) hide show

.vscode/PythonImportHelper-v2-Completion.json +646 -0
Dockerfile +14 -0
app/.vscode/PythonImportHelper-v2-Completion.json +144 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/__pycache__/matcher.cpython-311.pyc +0 -0
app/__pycache__/mfcc.cpython-311.pyc +0 -0
app/__pycache__/transcriber.cpython-311.pyc +0 -0
app/main.py +56 -0
app/matcher.py +46 -0
app/mfcc.py +49 -0
app/routers/V1/__init__.py +0 -0
app/routers/V1/__pycache__/__init__.cpython-311.pyc +0 -0
app/routers/V1/__pycache__/v1_routers.cpython-311.pyc +0 -0
app/routers/V1/v1_routers.py +14 -0
app/routers/V1/voice/__init__.py +0 -0
app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc +0 -0
app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc +0 -0
app/routers/V1/voice/voice_router.py +64 -0
app/routers/__init__.py +0 -0
app/routers/__pycache__/__init__.cpython-311.pyc +0 -0
app/routers/__pycache__/routes.cpython-311.pyc +0 -0
app/routers/routes.py +13 -0
app/static/main.css +0 -0
app/templates/index.html +323 -0
app/transcriber.py +41 -0
requirements.txt +0 -0

.vscode/PythonImportHelper-v2-Completion.json ADDED Viewed

	@@ -0,0 +1,646 @@

+[
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "UploadFile",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "File",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "Body",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "HTTPException",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "status",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "FastAPI",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "Request",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "JSONResponse",
+        "importPath": "fastapi.responses",
+        "description": "fastapi.responses",
+        "isExtraImport": true,
+        "detail": "fastapi.responses",
+        "documentation": {}
+    },
+    {
+        "label": "HTMLResponse",
+        "importPath": "fastapi.responses",
+        "description": "fastapi.responses",
+        "isExtraImport": true,
+        "detail": "fastapi.responses",
+        "documentation": {}
+    },
+    {
+        "label": "Annotated",
+        "importPath": "typing",
+        "description": "typing",
+        "isExtraImport": true,
+        "detail": "typing",
+        "documentation": {}
+    },
+    {
+        "label": "time",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "time",
+        "description": "time",
+        "detail": "time",
+        "documentation": {}
+    },
+    {
+        "label": "os",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "os",
+        "description": "os",
+        "detail": "os",
+        "documentation": {}
+    },
+    {
+        "label": "get_transcription",
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "isExtraImport": true,
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "match",
+        "importPath": "app.matcher",
+        "description": "app.matcher",
+        "isExtraImport": true,
+        "detail": "app.matcher",
+        "documentation": {}
+    },
+    {
+        "label": "mfcc_similarty_check",
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "isExtraImport": true,
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "voice_router",
+        "importPath": "app.routers.V1.voice",
+        "description": "app.routers.V1.voice",
+        "isExtraImport": true,
+        "detail": "app.routers.V1.voice",
+        "documentation": {}
+    },
+    {
+        "label": "v1_routers",
+        "importPath": "app.routers.V1",
+        "description": "app.routers.V1",
+        "isExtraImport": true,
+        "detail": "app.routers.V1",
+        "documentation": {}
+    },
+    {
+        "label": "StaticFiles",
+        "importPath": "fastapi.staticfiles",
+        "description": "fastapi.staticfiles",
+        "isExtraImport": true,
+        "detail": "fastapi.staticfiles",
+        "documentation": {}
+    },
+    {
+        "label": "Jinja2Templates",
+        "importPath": "fastapi.templating",
+        "description": "fastapi.templating",
+        "isExtraImport": true,
+        "detail": "fastapi.templating",
+        "documentation": {}
+    },
+    {
+        "label": "CORSMiddleware",
+        "importPath": "fastapi.middleware.cors",
+        "description": "fastapi.middleware.cors",
+        "isExtraImport": true,
+        "detail": "fastapi.middleware.cors",
+        "documentation": {}
+    },
+    {
+        "label": "routes",
+        "importPath": "app.routers",
+        "description": "app.routers",
+        "isExtraImport": true,
+        "detail": "app.routers",
+        "documentation": {}
+    },
+    {
+        "label": "difflib",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "difflib",
+        "description": "difflib",
+        "detail": "difflib",
+        "documentation": {}
+    },
+    {
+        "label": "fuzz",
+        "importPath": "fuzzywuzzy",
+        "description": "fuzzywuzzy",
+        "isExtraImport": true,
+        "detail": "fuzzywuzzy",
+        "documentation": {}
+    },
+    {
+        "label": "librosa",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "librosa",
+        "description": "librosa",
+        "detail": "librosa",
+        "documentation": {}
+    },
+    {
+        "label": "AutoFeatureExtractor",
+        "importPath": "transformers",
+        "description": "transformers",
+        "isExtraImport": true,
+        "detail": "transformers",
+        "documentation": {}
+    },
+    {
+        "label": "Wav2Vec2BertModel",
+        "importPath": "transformers",
+        "description": "transformers",
+        "isExtraImport": true,
+        "detail": "transformers",
+        "documentation": {}
+    },
+    {
+        "label": "AutoModelForSpeechSeq2Seq",
+        "importPath": "transformers",
+        "description": "transformers",
+        "isExtraImport": true,
+        "detail": "transformers",
+        "documentation": {}
+    },
+    {
+        "label": "AutoProcessor",
+        "importPath": "transformers",
+        "description": "transformers",
+        "isExtraImport": true,
+        "detail": "transformers",
+        "documentation": {}
+    },
+    {
+        "label": "pipeline",
+        "importPath": "transformers",
+        "description": "transformers",
+        "isExtraImport": true,
+        "detail": "transformers",
+        "documentation": {}
+    },
+    {
+        "label": "soundfile",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "soundfile",
+        "description": "soundfile",
+        "detail": "soundfile",
+        "documentation": {}
+    },
+    {
+        "label": "cosine_similarity",
+        "importPath": "sklearn.metrics.pairwise",
+        "description": "sklearn.metrics.pairwise",
+        "isExtraImport": true,
+        "detail": "sklearn.metrics.pairwise",
+        "documentation": {}
+    },
+    {
+        "label": "numpy",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "numpy",
+        "description": "numpy",
+        "detail": "numpy",
+        "documentation": {}
+    },
+    {
+        "label": "torch",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "torch",
+        "description": "torch",
+        "detail": "torch",
+        "documentation": {}
+    },
+    {
+        "label": "load_dataset",
+        "importPath": "datasets",
+        "description": "datasets",
+        "isExtraImport": true,
+        "detail": "datasets",
+        "documentation": {}
+    },
+    {
+        "label": "annotations",
+        "importPath": "__future__",
+        "description": "__future__",
+        "isExtraImport": true,
+        "detail": "__future__",
+        "documentation": {}
+    },
+    {
+        "label": "site",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "site",
+        "description": "site",
+        "detail": "site",
+        "documentation": {}
+    },
+    {
+        "label": "sys",
+        "kind": 6,
+        "isExtraImport": true,
+        "importPath": "sys",
+        "description": "sys",
+        "detail": "sys",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "app.routers.V1.voice.voice_router",
+        "description": "app.routers.V1.voice.voice_router",
+        "peekOfCode": "router = APIRouter(prefix=\"/voice\", tags=[\"Voice\"])\[email protected](\"/transcribe\")\nasync def transcribe_audio(\n    file: Annotated[UploadFile, File()], matcher_text: Annotated[str, Body()]\n):\n    try:\n        # Validate file type\n        if not file.filename.endswith(\".wav\"):\n            raise HTTPException(\n                status_code=status.HTTP_400_BAD_REQUEST,",
+        "detail": "app.routers.V1.voice.voice_router",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "app.routers.V1.v1_routers",
+        "description": "app.routers.V1.v1_routers",
+        "peekOfCode": "router = APIRouter()\n\"\"\" include auth routes \"\"\"\nrouter.include_router(voice_router.router)",
+        "detail": "app.routers.V1.v1_routers",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "app.routers.routes",
+        "description": "app.routers.routes",
+        "peekOfCode": "router = APIRouter()\n\"\"\" include the v1 routes here \"\"\"\nrouter.include_router(v1_routers.router)",
+        "detail": "app.routers.routes",
+        "documentation": {}
+    },
+    {
+        "label": "app",
+        "kind": 5,
+        "importPath": "app.main",
+        "description": "app.main",
+        "peekOfCode": "app = FastAPI(\n    title=\"Mother Tongue Voice Matcher\",\n    version=\"0.0.5\",\n    servers=[{\n        \"url\": \"http://127.0.0.1:8000/api/v1\", \"description\": \"Local Server\"\n    }],\n    root_path=\"/api/v1\",\n    root_path_in_servers=False,\n)\n# cors policy",
+        "detail": "app.main",
+        "documentation": {}
+    },
+    {
+        "label": "origins",
+        "kind": 5,
+        "importPath": "app.main",
+        "description": "app.main",
+        "peekOfCode": "origins = [\n    \"http://localhost\",\n    \"http://localhost:8080\",\n    \"http://localhost:3000\",\n    \"http://localhost:5173\",\n    \"http://127.0.0.1\",\n    \"http://127.0.0.1:8080\",\n    \"http://127.0.0.1:3000\",\n    \"http://127.0.0.1:5173\",\n]",
+        "detail": "app.main",
+        "documentation": {}
+    },
+    {
+        "label": "templates",
+        "kind": 5,
+        "importPath": "app.main",
+        "description": "app.main",
+        "peekOfCode": "templates = Jinja2Templates(directory=\"app/templates\")\[email protected](\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def root(request: Request):\n    \"\"\"set the root to show a html welcome page\"\"\"\n    return templates.TemplateResponse(request=request, name=\"index.html\")\n# include all the other api endpoints\napp.include_router(routes.router)",
+        "detail": "app.main",
+        "documentation": {}
+    },
+    {
+        "label": "phonetic_match",
+        "kind": 2,
+        "importPath": "app.matcher",
+        "description": "app.matcher",
+        "peekOfCode": "def phonetic_match(word1, word2):\n    \"\"\"\n    Compares two words based on their phonetic similarity.\n    \"\"\"\n    return fuzz.ratio(word1, word2)\n# Custom sequence matching function\ndef sequence_match(a, b):\n    \"\"\"\n    Uses sequence matching to compare two sequences of words.\n    \"\"\"",
+        "detail": "app.matcher",
+        "documentation": {}
+    },
+    {
+        "label": "sequence_match",
+        "kind": 2,
+        "importPath": "app.matcher",
+        "description": "app.matcher",
+        "peekOfCode": "def sequence_match(a, b):\n    \"\"\"\n    Uses sequence matching to compare two sequences of words.\n    \"\"\"\n    return difflib.SequenceMatcher(None, a, b).ratio()\n# Main function to compare texts with percentage match\ndef compare_texts(text1, text2):\n    \"\"\"\n    Compares two texts using phonetic matching and sequence matching,\n    returning a percentage match score.",
+        "detail": "app.matcher",
+        "documentation": {}
+    },
+    {
+        "label": "compare_texts",
+        "kind": 2,
+        "importPath": "app.matcher",
+        "description": "app.matcher",
+        "peekOfCode": "def compare_texts(text1, text2):\n    \"\"\"\n    Compares two texts using phonetic matching and sequence matching,\n    returning a percentage match score.\n    \"\"\"\n    words1 = text1.lower().split()\n    words2 = text2.lower().split()\n    total_matches = len(words1)\n    mismatches = 0\n    for word1, word2 in zip(words1, words2):",
+        "detail": "app.matcher",
+        "documentation": {}
+    },
+    {
+        "label": "match",
+        "kind": 2,
+        "importPath": "app.matcher",
+        "description": "app.matcher",
+        "peekOfCode": "def match(original, transcription):\n    return compare_texts(original, transcription)",
+        "detail": "app.matcher",
+        "documentation": {}
+    },
+    {
+        "label": "load_and_resample_audio",
+        "kind": 2,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "def load_and_resample_audio(file_path, target_sample_rate=16000):\n    audio_input, sample_rate = sf.read(file_path)\n    if sample_rate != target_sample_rate:\n        audio_input = librosa.resample(\n            audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n        )\n    return audio_input, sample_rate\ndef calculate_mfcc(audio_data, sample_rate):\n    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)\n    mfccs_scaled = np.mean(mfccs.T, axis=0)  # Average across time dimension",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "calculate_mfcc",
+        "kind": 2,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "def calculate_mfcc(audio_data, sample_rate):\n    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)\n    mfccs_scaled = np.mean(mfccs.T, axis=0)  # Average across time dimension\n    return mfccs_scaled\ndef calculate_similarity(mfccs1, mfccs2):\n    similarity = cosine_similarity(\n        mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))\n    return similarity[0][0]\ndef mfcc_similarty_check(original: str, recorded: str):\n    correct_pronunciation_audio, _ = load_and_resample_audio(original)",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "calculate_similarity",
+        "kind": 2,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "def calculate_similarity(mfccs1, mfccs2):\n    similarity = cosine_similarity(\n        mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))\n    return similarity[0][0]\ndef mfcc_similarty_check(original: str, recorded: str):\n    correct_pronunciation_audio, _ = load_and_resample_audio(original)\n    user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)\n    # Extract MFCCs from audio data\n    correct_mfccs = calculate_mfcc(correct_pronunciation_audio, sample_rate)\n    user_mfccs = calculate_mfcc(user_pronunciation_audio, sample_rate)",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "mfcc_similarty_check",
+        "kind": 2,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "def mfcc_similarty_check(original: str, recorded: str):\n    correct_pronunciation_audio, _ = load_and_resample_audio(original)\n    user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)\n    # Extract MFCCs from audio data\n    correct_mfccs = calculate_mfcc(correct_pronunciation_audio, sample_rate)\n    user_mfccs = calculate_mfcc(user_pronunciation_audio, sample_rate)\n    distance = np.linalg.norm(correct_mfccs.flatten() - user_mfccs.flatten())\n    # Calculate cosine similarity using MFCCs\n    similarity_score = calculate_similarity(correct_mfccs, user_mfccs)\n    accuracy_percentage = similarity_score * 100",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "model_id",
+        "kind": 5,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "model_id = \"facebook/w2v-bert-2.0\"\nfeature_extractor = AutoFeatureExtractor.from_pretrained(model_id)\nmodel = Wav2Vec2BertModel.from_pretrained(model_id)\ndef load_and_resample_audio(file_path, target_sample_rate=16000):\n    audio_input, sample_rate = sf.read(file_path)\n    if sample_rate != target_sample_rate:\n        audio_input = librosa.resample(\n            audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n        )\n    return audio_input, sample_rate",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "feature_extractor",
+        "kind": 5,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)\nmodel = Wav2Vec2BertModel.from_pretrained(model_id)\ndef load_and_resample_audio(file_path, target_sample_rate=16000):\n    audio_input, sample_rate = sf.read(file_path)\n    if sample_rate != target_sample_rate:\n        audio_input = librosa.resample(\n            audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n        )\n    return audio_input, sample_rate\ndef calculate_mfcc(audio_data, sample_rate):",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "model",
+        "kind": 5,
+        "importPath": "app.mfcc",
+        "description": "app.mfcc",
+        "peekOfCode": "model = Wav2Vec2BertModel.from_pretrained(model_id)\ndef load_and_resample_audio(file_path, target_sample_rate=16000):\n    audio_input, sample_rate = sf.read(file_path)\n    if sample_rate != target_sample_rate:\n        audio_input = librosa.resample(\n            audio_input, orig_sr=sample_rate, target_sr=target_sample_rate\n        )\n    return audio_input, sample_rate\ndef calculate_mfcc(audio_data, sample_rate):\n    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)",
+        "detail": "app.mfcc",
+        "documentation": {}
+    },
+    {
+        "label": "get_transcription",
+        "kind": 2,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "def get_transcription(file: str):\n    result = pipe(file, generate_kwargs={\"language\": \"shona\"})\n    return result[\"text\"]",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "device",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\ntorch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32\nmodel_id = \"openai/whisper-large-v3\"\nmodel = AutoModelForSpeechSeq2Seq.from_pretrained(\n    model_id,\n    torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "torch_dtype",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32\nmodel_id = \"openai/whisper-large-v3\"\nmodel = AutoModelForSpeechSeq2Seq.from_pretrained(\n    model_id,\n    torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n    \"automatic-speech-recognition\",",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "model_id",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "model_id = \"openai/whisper-large-v3\"\nmodel = AutoModelForSpeechSeq2Seq.from_pretrained(\n    model_id,\n    torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n    \"automatic-speech-recognition\",\n    model=model,",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "model",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "model = AutoModelForSpeechSeq2Seq.from_pretrained(\n    model_id,\n    torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True\n)\nmodel.to(device)\nprocessor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n    \"automatic-speech-recognition\",\n    model=model,\n    tokenizer=processor.tokenizer,",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "processor",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "processor = AutoProcessor.from_pretrained(model_id)\npipe = pipeline(\n    \"automatic-speech-recognition\",\n    model=model,\n    tokenizer=processor.tokenizer,\n    feature_extractor=processor.feature_extractor,\n    max_new_tokens=128,\n    chunk_length_s=30,\n    batch_size=16,\n    return_timestamps=True,",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "pipe",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "pipe = pipeline(\n    \"automatic-speech-recognition\",\n    model=model,\n    tokenizer=processor.tokenizer,\n    feature_extractor=processor.feature_extractor,\n    max_new_tokens=128,\n    chunk_length_s=30,\n    batch_size=16,\n    return_timestamps=True,\n    torch_dtype=torch_dtype,",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "dataset",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "dataset = load_dataset(\n    \"distil-whisper/librispeech_long\", \"clean\", split=\"validation\")\nsample = dataset[0][\"audio\"]\ndef get_transcription(file: str):\n    result = pipe(file, generate_kwargs={\"language\": \"shona\"})\n    return result[\"text\"]",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "sample",
+        "kind": 5,
+        "importPath": "app.transcriber",
+        "description": "app.transcriber",
+        "peekOfCode": "sample = dataset[0][\"audio\"]\ndef get_transcription(file: str):\n    result = pipe(file, generate_kwargs={\"language\": \"shona\"})\n    return result[\"text\"]",
+        "detail": "app.transcriber",
+        "documentation": {}
+    },
+    {
+        "label": "bin_dir",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "bin_dir = os.path.dirname(abs_file)\nbase = bin_dir[: -len(\"Scripts\") - 1]  # strip away the bin part from the __file__, plus the path separator\n# prepend bin to PATH (this file is inside the bin directory)\nos.environ[\"PATH\"] = os.pathsep.join([bin_dir, *os.environ.get(\"PATH\", \"\").split(os.pathsep)])\nos.environ[\"VIRTUAL_ENV\"] = base  # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base)  # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n    path = os.path.realpath(os.path.join(bin_dir, lib))",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "base",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "base = bin_dir[: -len(\"Scripts\") - 1]  # strip away the bin part from the __file__, plus the path separator\n# prepend bin to PATH (this file is inside the bin directory)\nos.environ[\"PATH\"] = os.pathsep.join([bin_dir, *os.environ.get(\"PATH\", \"\").split(os.pathsep)])\nos.environ[\"VIRTUAL_ENV\"] = base  # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base)  # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n    path = os.path.realpath(os.path.join(bin_dir, lib))\n    site.addsitedir(path.decode(\"utf-8\") if \"\" else path)",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "os.environ[\"PATH\"]",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "os.environ[\"PATH\"] = os.pathsep.join([bin_dir, *os.environ.get(\"PATH\", \"\").split(os.pathsep)])\nos.environ[\"VIRTUAL_ENV\"] = base  # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base)  # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n    path = os.path.realpath(os.path.join(bin_dir, lib))\n    site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "os.environ[\"VIRTUAL_ENV\"]",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "os.environ[\"VIRTUAL_ENV\"] = base  # virtual env is right above bin directory\nos.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base)  # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n    path = os.path.realpath(os.path.join(bin_dir, lib))\n    site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "os.environ[\"VIRTUAL_ENV_PROMPT\"]",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "os.environ[\"VIRTUAL_ENV_PROMPT\"] = \"\" or os.path.basename(base)  # noqa: SIM222\n# add the virtual environments libraries to the host python import mechanism\nprev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n    path = os.path.realpath(os.path.join(bin_dir, lib))\n    site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "prev_length",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "prev_length = len(sys.path)\nfor lib in \"..\\\\Lib\\\\site-packages\".split(os.pathsep):\n    path = os.path.realpath(os.path.join(bin_dir, lib))\n    site.addsitedir(path.decode(\"utf-8\") if \"\" else path)\nsys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "sys.path[:]",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]\nsys.real_prefix = sys.prefix\nsys.prefix = base",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "sys.real_prefix",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "sys.real_prefix = sys.prefix\nsys.prefix = base",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    },
+    {
+        "label": "sys.prefix",
+        "kind": 5,
+        "importPath": "env.Scripts.activate_this",
+        "description": "env.Scripts.activate_this",
+        "peekOfCode": "sys.prefix = base",
+        "detail": "env.Scripts.activate_this",
+        "documentation": {}
+    }
+]

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.11
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/.vscode/PythonImportHelper-v2-Completion.json ADDED Viewed

	@@ -0,0 +1,144 @@

+[
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "APIRouter",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "FastAPI",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "Request",
+        "importPath": "fastapi",
+        "description": "fastapi",
+        "isExtraImport": true,
+        "detail": "fastapi",
+        "documentation": {}
+    },
+    {
+        "label": "v1_routers",
+        "importPath": "app.routers.V1",
+        "description": "app.routers.V1",
+        "isExtraImport": true,
+        "detail": "app.routers.V1",
+        "documentation": {}
+    },
+    {
+        "label": "HTMLResponse",
+        "importPath": "fastapi.responses",
+        "description": "fastapi.responses",
+        "isExtraImport": true,
+        "detail": "fastapi.responses",
+        "documentation": {}
+    },
+    {
+        "label": "StaticFiles",
+        "importPath": "fastapi.staticfiles",
+        "description": "fastapi.staticfiles",
+        "isExtraImport": true,
+        "detail": "fastapi.staticfiles",
+        "documentation": {}
+    },
+    {
+        "label": "Jinja2Templates",
+        "importPath": "fastapi.templating",
+        "description": "fastapi.templating",
+        "isExtraImport": true,
+        "detail": "fastapi.templating",
+        "documentation": {}
+    },
+    {
+        "label": "CORSMiddleware",
+        "importPath": "fastapi.middleware.cors",
+        "description": "fastapi.middleware.cors",
+        "isExtraImport": true,
+        "detail": "fastapi.middleware.cors",
+        "documentation": {}
+    },
+    {
+        "label": "routes",
+        "importPath": "app.routers",
+        "description": "app.routers",
+        "isExtraImport": true,
+        "detail": "app.routers",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "routers.V1.voice.voice_router",
+        "description": "routers.V1.voice.voice_router",
+        "peekOfCode": "router = APIRouter(prefix=\"/voice\", tags=[\"Voice\"])\[email protected](\"/transcribe\")\nasync def transcribe_audio():\n    return",
+        "detail": "routers.V1.voice.voice_router",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "routers.V1.v1_routers",
+        "description": "routers.V1.v1_routers",
+        "peekOfCode": "router = APIRouter()\n\"\"\" include auth routes \"\"\"\nrouter.include_router()",
+        "detail": "routers.V1.v1_routers",
+        "documentation": {}
+    },
+    {
+        "label": "router",
+        "kind": 5,
+        "importPath": "routers.routes",
+        "description": "routers.routes",
+        "peekOfCode": "router = APIRouter()\n\"\"\" include the v1 routes here \"\"\"\nrouter.include_router(v1_routers.router)",
+        "detail": "routers.routes",
+        "documentation": {}
+    },
+    {
+        "label": "app",
+        "kind": 5,
+        "importPath": "main",
+        "description": "main",
+        "peekOfCode": "app = FastAPI(\n    title=\"Mother Tongue Voice Matcher\",\n    version=\"0.0.5\",\n    servers=[{\n        \"url\": \"http://127.0.0.1:8000/api/v1\", \"description\": \"Local Server\"\n    }],\n    root_path=\"/api/v1\",\n    root_path_in_servers=False,\n)\n# cors policy",
+        "detail": "main",
+        "documentation": {}
+    },
+    {
+        "label": "origins",
+        "kind": 5,
+        "importPath": "main",
+        "description": "main",
+        "peekOfCode": "origins = [\n    \"http://localhost\",\n    \"http://localhost:8080\",\n    \"http://localhost:3000\",\n    \"http://localhost:5173\",\n    \"http://127.0.0.1\",\n    \"http://127.0.0.1:8080\",\n    \"http://127.0.0.1:3000\",\n    \"http://127.0.0.1:5173\",\n]",
+        "detail": "main",
+        "documentation": {}
+    },
+    {
+        "label": "templates",
+        "kind": 5,
+        "importPath": "main",
+        "description": "main",
+        "peekOfCode": "templates = Jinja2Templates(directory=\"app/templates\")\[email protected](\"/\", response_class=HTMLResponse, include_in_schema=False)\nasync def root(request: Request):\n    \"\"\"set the root to show a html welcome page\"\"\"\n    return templates.TemplateResponse(request=request, name=\"index.html\")\n# include all the other api endpoints\napp.include_router(routes.router)",
+        "detail": "main",
+        "documentation": {}
+    }
+]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (141 Bytes). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (2.07 kB). View file

app/__pycache__/matcher.cpython-311.pyc ADDED Viewed

Binary file (1.92 kB). View file

app/__pycache__/mfcc.cpython-311.pyc ADDED Viewed

Binary file (2.67 kB). View file

app/__pycache__/transcriber.cpython-311.pyc ADDED Viewed

Binary file (1.72 kB). View file

app/main.py ADDED Viewed

	@@ -0,0 +1,56 @@

+""" main api file """
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from app.routers import routes
+""" initialize app with openapi configurations """
+app = FastAPI(
+    title="Mother Tongue Voice Matcher",
+    version="0.0.5",
+    servers=[{
+        "url": "http://127.0.0.1:8000/api/v1", "description": "Local Server"
+    }],
+    root_path="/api/v1",
+    root_path_in_servers=False,
+)
+# cors policy
+origins = [
+    "http://localhost",
+    "http://localhost:8080",
+    "http://localhost:3000",
+    "http://localhost:5173",
+    "http://127.0.0.1",
+    "http://127.0.0.1:8080",
+    "http://127.0.0.1:3000",
+    "http://127.0.0.1:5173",
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# mount the static folder
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
+# mount the templets folder
+templates = Jinja2Templates(directory="app/templates")
+@app.get("/", response_class=HTMLResponse, include_in_schema=False)
+async def root(request: Request):
+    """set the root to show a html welcome page"""
+    return templates.TemplateResponse(request=request, name="index.html")
+# include all the other api endpoints
+app.include_router(routes.router)

app/matcher.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import difflib
+from fuzzywuzzy import fuzz
+# Custom phonetic matching function
+def phonetic_match(word1, word2):
+    """
+    Compares two words based on their phonetic similarity.
+    """
+    return fuzz.ratio(word1, word2)
+# Custom sequence matching function
+def sequence_match(a, b):
+    """
+    Uses sequence matching to compare two sequences of words.
+    """
+    return difflib.SequenceMatcher(None, a, b).ratio()
+# Main function to compare texts with percentage match
+def compare_texts(text1, text2):
+    """
+    Compares two texts using phonetic matching and sequence matching,
+    returning a percentage match score.
+    """
+    words1 = text1.lower().split()
+    words2 = text2.lower().split()
+    total_matches = len(words1)
+    mismatches = 0
+    for word1, word2 in zip(words1, words2):
+        if word1 != word2:
+            mismatches += 1
+            if phonetic_match(word1, word2) < 80:
+                # Use sequence matching only if phonetic is low
+                if sequence_match(word1, word2) < 0.8:
+                    mismatches += 1  # Penalty for bad sequence match
+    accuracy = 1 - (mismatches / total_matches)
+    return accuracy * 100  # Convert to percentage
+def match(original, transcription):
+    return compare_texts(original, transcription)

app/mfcc.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import librosa
+from transformers import AutoFeatureExtractor, Wav2Vec2BertModel
+import soundfile as sf
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+# Model and feature extractor (same as before)
+model_id = "facebook/w2v-bert-2.0"
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2BertModel.from_pretrained(model_id)
+def load_and_resample_audio(file_path, target_sample_rate=16000):
+    audio_input, sample_rate = sf.read(file_path)
+    if sample_rate != target_sample_rate:
+        audio_input = librosa.resample(
+            audio_input, orig_sr=sample_rate, target_sr=target_sample_rate
+        )
+    return audio_input, sample_rate
+def calculate_mfcc(audio_data, sample_rate):
+    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)
+    mfccs_scaled = np.mean(mfccs.T, axis=0)  # Average across time dimension
+    return mfccs_scaled
+def calculate_similarity(mfccs1, mfccs2):
+    similarity = cosine_similarity(
+        mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))
+    return similarity[0][0]
+def mfcc_similarty_check(original: str, recorded: str):
+    correct_pronunciation_audio, _ = load_and_resample_audio(original)
+    user_pronunciation_audio, sample_rate = load_and_resample_audio(recorded)
+    # Extract MFCCs from audio data
+    correct_mfccs = calculate_mfcc(correct_pronunciation_audio, sample_rate)
+    user_mfccs = calculate_mfcc(user_pronunciation_audio, sample_rate)
+    distance = np.linalg.norm(correct_mfccs.flatten() - user_mfccs.flatten())
+    # Calculate cosine similarity using MFCCs
+    similarity_score = calculate_similarity(correct_mfccs, user_mfccs)
+    accuracy_percentage = similarity_score * 100
+    return distance, accuracy_percentage

app/routers/V1/__init__.py ADDED Viewed

File without changes

app/routers/V1/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (171 Bytes). View file

app/routers/V1/__pycache__/v1_routers.cpython-311.pyc ADDED Viewed

Binary file (526 Bytes). View file

app/routers/V1/v1_routers.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+v1 routes file
+all the v1 routes like auth
+profile... will be included here
+"""
+from fastapi import APIRouter
+from app.routers.V1.voice import voice_router
+""" initialize the router """
+router = APIRouter()
+""" include auth routes """
+router.include_router(voice_router.router)

app/routers/V1/voice/__init__.py ADDED Viewed

File without changes

app/routers/V1/voice/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (158 Bytes). View file

app/routers/V1/voice/__pycache__/voice_router.cpython-311.pyc ADDED Viewed

Binary file (3.17 kB). View file

app/routers/V1/voice/voice_router.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from fastapi import APIRouter, UploadFile, File, Body, HTTPException, status
+from fastapi.responses import JSONResponse
+from typing import Annotated
+import time
+import os
+from app.transcriber import get_transcription
+from app.matcher import match
+from app.mfcc import mfcc_similarty_check
+""" initialize the router """
+router = APIRouter(prefix="/voice", tags=["Voice"])
+@router.post("/transcribe")
+async def transcribe_audio(
+    file: Annotated[UploadFile, File()], matcher_text: Annotated[str, Body()]
+):
+    try:
+        # Validate file type
+        if not file.filename.endswith(".wav"):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Invalid file type. Please upload a wav file.",
+            )
+        # Read file bytes
+        file_bytes = await file.read()
+        filename = f"audio_{int(time.time())}.wav"
+        # Save the file temporarily
+        with open(filename, "wb") as buffer:
+            buffer.write(file_bytes)
+        try:
+            text = get_transcription(filename)
+            percent = match(matcher_text, text)
+            if int(percent) > 50:
+                Euclidean, Cosine = mfcc_similarty_check(filename, filename)
+                return JSONResponse(
+                    {
+                        "transcription": text,
+                        "percent": percent,
+                        "Cosine": Cosine,
+                        "Euclidean": Euclidean,
+                    }
+                )
+            else:
+                return JSONResponse(
+                    {
+                        "transcription": text,
+                        "percent": percent,
+                    }
+                )
+        finally:
+            # Clean up the temporary file
+            os.remove(filename)
+    except Exception as e:
+        print(e)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Unable to process the audio. Please try again later.",
+        )

app/routers/__init__.py ADDED Viewed

File without changes

app/routers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (168 Bytes). View file

app/routers/__pycache__/routes.cpython-311.pyc ADDED Viewed

Binary file (512 Bytes). View file

app/routers/routes.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""
+main routes file
+all the v1 and v2... routes will go here
+"""
+from fastapi import APIRouter
+from app.routers.V1 import v1_routers
+""" initialize the router """
+router = APIRouter()
+""" include the v1 routes here """
+router.include_router(v1_routers.router)

app/static/main.css ADDED Viewed

File without changes

app/templates/index.html ADDED Viewed

	@@ -0,0 +1,323 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Server Status</title>
+    <style>
+      @import url("https://fonts.googleapis.com/css2?family=DM+Mono:ital,wght@0,300;1,500&display=swap");
+      body {
+        background: linear-gradient(#3800e7, #8a15ff);
+        height: 100vh;
+        font-size: calc(14px + (26 - 14) * ((100vw - 300px) / (1600 - 300)));
+        font-family: "DM Mono", monospace;
+        font-weight: 300;
+        overflow: hidden;
+        color: white;
+        text-align: center;
+      }
+      h1 {
+        font-size: 3em;
+        margin-bottom: 0.2em;
+      }
+      h2 {
+        font-size: 2em;
+      }
+      .main {
+        height: 100vh;
+        display: flex;
+        flex-direction: column;
+        flex-wrap: wrap;
+        position: relative;
+        justify-content: center;
+        align-items: center;
+      }
+      .main:before,
+      .main:after {
+        content: "";
+        display: block;
+        position: absolute;
+        z-index: -3;
+      }
+      .main:before {
+        right: 0;
+        bottom: -19;
+        height: 30em;
+        width: 30em;
+        border-radius: 30em;
+        background: linear-gradient(#3800e7, #8a15ff);
+        align-self: flex-end;
+        animation: gradient-fade 8s ease-in-out 3s infinite alternate;
+      }
+      .main:after {
+        top: 0;
+        left: 30;
+        height: 10em;
+        width: 10em;
+        border-radius: 10em;
+        background: linear-gradient(#3800e7, #8a15ff);
+        animation: gradient-fade-alt 6s ease-in-out 3s infinite alternate;
+      }
+      .main__text-wrapper {
+        position: relative;
+        padding: 2em;
+      }
+      .main__text-wrapper:before,
+      .main__text-wrapper:after {
+        content: "";
+        display: block;
+        position: absolute;
+      }
+      .main__text-wrapper:before {
+        z-index: -1;
+        top: -3em;
+        right: -3em;
+        width: 13em;
+        height: 13em;
+        opacity: 0.7;
+        border-radius: 13em;
+        background: linear-gradient(#15e0ff, #8a15ff);
+        animation: rotation 7s linear infinite;
+      }
+      .main__text-wrapper:after {
+        z-index: -1;
+        bottom: -20em;
+        width: 20em;
+        height: 20em;
+        border-radius: 20em;
+        background: linear-gradient(#d000c5, #8a15ff);
+        animation: rotation 7s linear infinite;
+      }
+      .arrow {
+        z-index: 1000;
+        opacity: 0.5;
+        position: absolute;
+      }
+      .arrow--top {
+        top: 0;
+        left: -5em;
+      }
+      .arrow--bottom {
+        bottom: 0;
+        right: 3em;
+      }
+      .circle {
+        transform: translate(50%, -50%) rotate(0deg);
+        transform-origin: center;
+      }
+      .circle--ltblue {
+        height: 20em;
+        width: 20em;
+        border-radius: 20em;
+        background: linear-gradient(#15e0ff, #3800e7);
+      }
+      .backdrop {
+        position: absolute;
+        width: 100vw;
+        height: 100vh;
+        display: block;
+        background-color: pink;
+      }
+      .dotted-circle {
+        position: absolute;
+        top: 0;
+        right: 0;
+        opacity: 0.3;
+        animation: rotation 38s linear infinite;
+      }
+      .draw-in {
+        stroke-dasharray: 1000;
+        stroke-dashoffset: 10;
+        animation: draw 15s ease-in-out alternate infinite;
+      }
+      @keyframes draw {
+        from {
+          stroke-dashoffset: 1000;
+        }
+        to {
+          stroke-dashoffset: 0;
+        }
+      }
+      .item-to {
+        animation-duration: 10s;
+        animation-iteration-count: infinite;
+        transform-origin: bottom;
+      }
+      .bounce-1 {
+        animation-name: bounce-1;
+        animation-timing-function: ease;
+      }
+      .bounce-2 {
+        animation-name: bounce-2;
+        animation-timing-function: ease;
+      }
+      .bounce-3 {
+        animation-name: bounce-3;
+        animation-timing-function: ease;
+      }
+      @keyframes bounce-1 {
+        0% {
+          transform: translateY(0);
+        }
+        50% {
+          transform: translateY(50px);
+        }
+        100% {
+          transform: translateY(0);
+        }
+      }
+      @keyframes bounce-2 {
+        0% {
+          transform: translateY(0);
+        }
+        50% {
+          transform: translateY(-30px);
+        }
+        100% {
+          transform: translateY(0);
+        }
+      }
+      @keyframes bounce-3 {
+        0% {
+          transform: translateY(0);
+        }
+        50% {
+          transform: translateY(30px);
+        }
+        100% {
+          transform: translateY(0);
+        }
+      }
+      @keyframes rotation {
+        from {
+          transform: rotate(0deg);
+        }
+        to {
+          transform: rotate(360deg);
+        }
+      }
+      @keyframes gradient-fade {
+        from {
+          transform: translate(10%, -10%) rotate(0deg);
+        }
+        to {
+          transform: translate(50%, -50%) rotate(360deg);
+        }
+      }
+      @keyframes gradient-fade-alt {
+        from {
+          transform: translate(-20%, 20%) rotate(0deg);
+        }
+        to {
+          transform: translate(-60%, 60%) rotate(360deg);
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <div class="arrow arrow--top">
+      <svg
+        xmlns="http://www.w3.org/2000/svg"
+        width="270.11"
+        height="649.9"
+        overflow="visible"
+      >
+        <style>
+          .geo-arrow {
+            fill: none;
+            stroke: #fff;
+            stroke-width: 2;
+            stroke-miterlimit: 10;
+          }
+        </style>
+        <g class="item-to bounce-1">
+          <path
+            class="geo-arrow draw-in"
+            d="M135.06 142.564L267.995 275.5 135.06 408.434 2.125 275.499z"
+          />
+        </g>
+        <circle
+          class="geo-arrow item-to bounce-2"
+          cx="194.65"
+          cy="69.54"
+          r="7.96"
+        />
+        <circle class="geo-arrow draw-in" cx="194.65" cy="39.5" r="7.96" />
+        <circle
+          class="geo-arrow item-to bounce-3"
+          cx="194.65"
+          cy="9.46"
+          r="7.96"
+        />
+        <g class="geo-arrow item-to bounce-2">
+          <path
+            class="st0 draw-in"
+            d="M181.21 619.5l13.27 27 13.27-27zM194.48 644.5v-552"
+          />
+        </g>
+      </svg>
+    </div>
+    <div class="arrow arrow--bottom">
+      <svg
+        xmlns="http://www.w3.org/2000/svg"
+        width="31.35"
+        height="649.9"
+        overflow="visible"
+      >
+        <style>
+          .geo-arrow {
+            fill: none;
+            stroke: #fff;
+            stroke-width: 2;
+            stroke-miterlimit: 10;
+          }
+        </style>
+        <g class="item-to bounce-1">
+          <circle
+            class="geo-arrow item-to bounce-3"
+            cx="15.5"
+            cy="580.36"
+            r="7.96"
+          />
+          <circle class="geo-arrow draw-in" cx="15.5" cy="610.4" r="7.96" />
+          <circle
+            class="geo-arrow item-to bounce-2"
+            cx="15.5"
+            cy="640.44"
+            r="7.96"
+          />
+          <g class="item-to bounce-2">
+            <path
+              class="geo-arrow draw-in"
+              d="M28.94 30.4l-13.26-27-13.27 27zM15.68 5.4v552"
+            />
+          </g>
+        </g>
+      </svg>
+    </div>
+    <div class="main">
+      <div class="main__text-wrapper">
+        <h1 class="main__title">Backend Server</h1>
+        <h2>Up and Running</h2>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          class="dotted-circle"
+          width="352"
+          height="352"
+          overflow="visible"
+        >
+          <circle
+            cx="176"
+            cy="176"
+            r="174"
+            fill="none"
+            stroke="#fff"
+            stroke-width="2"
+            stroke-miterlimit="10"
+            stroke-dasharray="12.921,11.9271"
+          />
+        </svg>
+      </div>
+    </div>
+  </body>
+</html>

app/transcriber.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from datasets import load_dataset
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_id = "openai/whisper-large-v3"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id,
+    torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=30,
+    batch_size=16,
+    return_timestamps=True,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+dataset = load_dataset(
+    "distil-whisper/librispeech_long", "clean", split="validation")
+sample = dataset[0]["audio"]
+def get_transcription(file: str):
+    result = pipe(file, generate_kwargs={"language": "shona"})
+    return result["text"]

requirements.txt ADDED Viewed

Binary file (3.56 kB). View file