Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

Hammad712 commited on Mar 17

Commit

0356e8f

verified ·

1 Parent(s): e74caf7

Update main.py

Browse files

Files changed (1) hide show

main.py +15 -44

main.py CHANGED Viewed

@@ -5,13 +5,8 @@ import numpy as np
 import tempfile
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
-from librosa.sequence import dtw  # Ensure librosa==0.9.2 is installed
-app = FastAPI(
-    title="Quran Recitation Comparer API",
-    description="Compares two Quran recitations using a deep wav2vec2 model.",
-    version="1.0"
-)
 # --- Core Class Definition ---
 class QuranRecitationComparer:
@@ -21,7 +16,6 @@ class QuranRecitationComparer:
         """
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Load model and processor once during initialization
         if auth_token:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name, token=auth_token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name, token=auth_token)
@@ -31,12 +25,9 @@ class QuranRecitationComparer:
         self.model = self.model.to(self.device)
         self.model.eval()
-        # Cache for embeddings to avoid recomputation
         self.embedding_cache = {}
     def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
-        """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
         y, sr = librosa.load(file_path, sr=target_sr)
@@ -47,7 +38,6 @@ class QuranRecitationComparer:
         return y
     def get_deep_embedding(self, audio, sr=16000):
-        """Extract frame-wise deep embeddings using the pretrained model."""
         input_values = self.processor(
             audio,
             sampling_rate=sr,
@@ -62,14 +52,12 @@ class QuranRecitationComparer:
         return embedding_seq
     def compute_dtw_distance(self, features1, features2):
-        """Compute the DTW distance between two sequences of features."""
         D, wp = dtw(X=features1, Y=features2, metric='euclidean')
         distance = D[-1, -1]
         normalized_distance = distance / len(wp)
         return normalized_distance
     def interpret_similarity(self, norm_distance):
-        """Interpret the normalized distance value."""
         if norm_distance == 0:
             result = "The recitations are identical based on the deep embeddings."
             score = 100
@@ -91,48 +79,45 @@ class QuranRecitationComparer:
         return result, score
     def get_embedding_for_file(self, file_path):
-        """Get embedding for a file, using cache if available."""
         if file_path in self.embedding_cache:
             return self.embedding_cache[file_path]
         audio = self.load_audio(file_path)
         embedding = self.get_deep_embedding(audio)
-        # Store in cache for future use
         self.embedding_cache[file_path] = embedding
         return embedding
     def predict(self, file_path1, file_path2):
-        """
-        Predict the similarity between two audio files.
-        Args:
-            file_path1 (str): Path to first audio file.
-            file_path2 (str): Path to second audio file.
-        Returns:
-            (float, str): Similarity score and interpretation.
-        """
         embedding1 = self.get_embedding_for_file(file_path1)
         embedding2 = self.get_embedding_for_file(file_path2)
         norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
         interpretation, similarity_score = self.interpret_similarity(norm_distance)
-        # Optionally log the results instead of printing in production
         print(f"Similarity Score: {similarity_score:.1f}/100")
         print(f"Interpretation: {interpretation}")
         return similarity_score, interpretation
     def clear_cache(self):
-        """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
-# --- FastAPI Startup Event ---
-@app.on_event("startup")
-def startup_event():
     global comparer
-    # In production, use environment variables or configuration management for tokens.
     auth_token = os.environ.get("HF_TOKEN")
     comparer = QuranRecitationComparer(
         model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
         auth_token=auth_token
     )
     print("Model initialized and ready for predictions!")
 # --- API Endpoints ---
 @app.get("/", summary="Health Check")
@@ -141,24 +126,15 @@ async def root():
 @app.post("/predict", summary="Compare Two Audio Files", response_model=dict)
 async def predict(file1: UploadFile = File(...), file2: UploadFile = File(...)):
-    """
-    Compare two uploaded audio files and return a similarity score along with an interpretation.
-    - **file1**: The first audio file.
-    - **file2**: The second audio file.
-    """
     tmp1_path = None
     tmp2_path = None
     try:
-        # Save first file to a temporary location
         suffix1 = os.path.splitext(file1.filename)[1] or ".wav"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix1) as tmp1:
             content1 = await file1.read()
             tmp1.write(content1)
             tmp1_path = tmp1.name
-        # Save second file to a temporary location
         suffix2 = os.path.splitext(file2.filename)[1] or ".wav"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix2) as tmp2:
             content2 = await file2.read()
@@ -167,11 +143,9 @@ async def predict(file1: UploadFile = File(...), file2: UploadFile = File(...)):
         similarity_score, interpretation = comparer.predict(tmp1_path, tmp2_path)
         return {"similarity_score": similarity_score, "interpretation": interpretation}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     finally:
-        # Clean up temporary files
         if tmp1_path and os.path.exists(tmp1_path):
             os.remove(tmp1_path)
         if tmp2_path and os.path.exists(tmp2_path):
@@ -179,8 +153,5 @@ async def predict(file1: UploadFile = File(...), file2: UploadFile = File(...)):
 @app.post("/clear_cache", summary="Clear Embedding Cache", response_model=dict)
 async def clear_cache():
-    """
-    Clear the embedding cache. This can help free memory if many comparisons have been made.
-    """
     comparer.clear_cache()
     return {"message": "Cache cleared."}

 import tempfile
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+from librosa.sequence import dtw
+from contextlib import asynccontextmanager
 # --- Core Class Definition ---
 class QuranRecitationComparer:
         """
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         if auth_token:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name, token=auth_token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name, token=auth_token)
         self.model = self.model.to(self.device)
         self.model.eval()
         self.embedding_cache = {}
     def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
         y, sr = librosa.load(file_path, sr=target_sr)
         return y
     def get_deep_embedding(self, audio, sr=16000):
         input_values = self.processor(
             audio,
             sampling_rate=sr,
         return embedding_seq
     def compute_dtw_distance(self, features1, features2):
         D, wp = dtw(X=features1, Y=features2, metric='euclidean')
         distance = D[-1, -1]
         normalized_distance = distance / len(wp)
         return normalized_distance
     def interpret_similarity(self, norm_distance):
         if norm_distance == 0:
             result = "The recitations are identical based on the deep embeddings."
             score = 100
         return result, score
     def get_embedding_for_file(self, file_path):
         if file_path in self.embedding_cache:
             return self.embedding_cache[file_path]
         audio = self.load_audio(file_path)
         embedding = self.get_deep_embedding(audio)
         self.embedding_cache[file_path] = embedding
         return embedding
     def predict(self, file_path1, file_path2):
         embedding1 = self.get_embedding_for_file(file_path1)
         embedding2 = self.get_embedding_for_file(file_path2)
         norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
         interpretation, similarity_score = self.interpret_similarity(norm_distance)
         print(f"Similarity Score: {similarity_score:.1f}/100")
         print(f"Interpretation: {interpretation}")
         return similarity_score, interpretation
     def clear_cache(self):
         self.embedding_cache = {}
+# --- Lifespan Event Handler ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
     global comparer
+    # Use environment variables or a secure configuration in production
     auth_token = os.environ.get("HF_TOKEN")
     comparer = QuranRecitationComparer(
         model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
         auth_token=auth_token
     )
     print("Model initialized and ready for predictions!")
+    yield
+    print("Application shutdown: Cleanup if necessary.")
+app = FastAPI(
+    title="Quran Recitation Comparer API",
+    description="Compares two Quran recitations using a deep wav2vec2 model.",
+    version="1.0",
+    lifespan=lifespan
+)
 # --- API Endpoints ---
 @app.get("/", summary="Health Check")
 @app.post("/predict", summary="Compare Two Audio Files", response_model=dict)
 async def predict(file1: UploadFile = File(...), file2: UploadFile = File(...)):
     tmp1_path = None
     tmp2_path = None
     try:
         suffix1 = os.path.splitext(file1.filename)[1] or ".wav"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix1) as tmp1:
             content1 = await file1.read()
             tmp1.write(content1)
             tmp1_path = tmp1.name
         suffix2 = os.path.splitext(file2.filename)[1] or ".wav"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix2) as tmp2:
             content2 = await file2.read()
         similarity_score, interpretation = comparer.predict(tmp1_path, tmp2_path)
         return {"similarity_score": similarity_score, "interpretation": interpretation}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     finally:
         if tmp1_path and os.path.exists(tmp1_path):
             os.remove(tmp1_path)
         if tmp2_path and os.path.exists(tmp2_path):
 @app.post("/clear_cache", summary="Clear Embedding Cache", response_model=dict)
 async def clear_cache():
     comparer.clear_cache()
     return {"message": "Cache cleared."}