Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

Hammad712 commited on Mar 16

Commit

3e1b72c

verified ·

1 Parent(s): b7062bf

Update main.py

Browse files

Files changed (1) hide show

main.py +127 -61

main.py CHANGED Viewed

@@ -4,22 +4,35 @@ import torch
 import librosa
 import numpy as np
 import os
-from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import tempfile
 import shutil
 import uvicorn
-import scipy.spatial.distance as distance
 # Load environment variables
 HF_TOKEN = os.getenv("HF_TOKEN")
 app = FastAPI(title="Quran Recitation Comparer API")
 class ComparisonResult(BaseModel):
     similarity_score: float
     interpretation: str
-# Custom implementation of DTW to replace librosa.sequence.dtw
 def custom_dtw(X, Y, metric='euclidean'):
     """
     Custom Dynamic Time Warping implementation.
@@ -80,23 +93,27 @@ class QuranRecitationComparer:
         print(f"Using device: {self.device}")
         # Load model and processor once during initialization
-        if token:
-            print(f"Loading model {model_name} with token...")
-            self.processor = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=token)
-            self.model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token)
-        else:
-            print(f"Loading model {model_name} without token...")
-            self.processor = Wav2Vec2Processor.from_pretrained(model_name)
-            self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
-        self.model = self.model.to(self.device)
-        self.model.eval()
         # Cache for embeddings to avoid recomputation
         self.embedding_cache = {}
-        print("Model loaded successfully!")
-    def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
         """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
@@ -107,34 +124,69 @@ class QuranRecitationComparer:
         if normalize:
             y = librosa.util.normalize(y)
-        if trim_silence:
-            # Use librosa.effects.trim which should be available in most versions
-            y, _ = librosa.effects.trim(y, top_db=30)
         return y
     def get_deep_embedding(self, audio, sr=16000):
         """Extract frame-wise deep embeddings using the pretrained model."""
-        input_values = self.processor(
-            audio,
-            sampling_rate=sr,
-            return_tensors="pt"
-        ).input_values.to(self.device)
-        with torch.no_grad():
-            outputs = self.model(input_values, output_hidden_states=True)
-        hidden_states = outputs.hidden_states[-1]
-        embedding_seq = hidden_states.squeeze(0).cpu().numpy()
-        return embedding_seq
     def compute_dtw_distance(self, features1, features2):
         """Compute the DTW distance between two sequences of features."""
-        D, wp = custom_dtw(X=features1, Y=features2, metric='euclidean')
-        distance = D[-1, -1]
-        normalized_distance = distance / len(wp)
-        return normalized_distance
     def interpret_similarity(self, norm_distance):
         """Interpret the normalized distance value."""
@@ -166,14 +218,18 @@ class QuranRecitationComparer:
             return self.embedding_cache[file_path]
         print(f"Computing new embedding for {file_path}")
-        audio = self.load_audio(file_path)
-        embedding = self.get_deep_embedding(audio)
-        # Store in cache for future use
-        self.embedding_cache[file_path] = embedding
-        print(f"Embedding shape: {embedding.shape}")
-        return embedding
     def predict(self, file_path1, file_path2):
         """
@@ -189,20 +245,25 @@ class QuranRecitationComparer:
             str: Interpretation of similarity
         """
         print(f"Comparing {file_path1} and {file_path2}")
-        # Get embeddings (using cache if available)
-        embedding1 = self.get_embedding_for_file(file_path1)
-        embedding2 = self.get_embedding_for_file(file_path2)
-        # Compute DTW distance
-        print("Computing DTW distance...")
-        norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
-        print(f"Normalized distance: {norm_distance}")
-        # Interpret results
-        interpretation, similarity_score = self.interpret_similarity(norm_distance)
-        print(f"Similarity score: {similarity_score}, Interpretation: {interpretation}")
-        return similarity_score, interpretation
     def clear_cache(self):
         """Clear the embedding cache to free memory."""
@@ -212,6 +273,7 @@ class QuranRecitationComparer:
 # Global variable for the comparer instance
 comparer = None
 @app.on_event("startup")
 async def startup_event():
     """Initialize the model when the application starts."""
@@ -225,12 +287,16 @@ async def startup_event():
         print("Model initialized and ready for predictions!")
     except Exception as e:
         print(f"Error initializing model: {str(e)}")
-        raise
 @app.get("/")
 async def root():
     """Root endpoint to check if the API is running."""
-    return {"message": "Quran Recitation Comparer API is running", "status": "active"}
 @app.post("/compare", response_model=ComparisonResult)
 async def compare_files(

 import librosa
 import numpy as np
 import os
+from transformers import AutoProcessor, AutoModelForCTC
 import tempfile
 import shutil
 import uvicorn
+from fastapi.middleware.cors import CORSMiddleware
+import warnings
+# Ignore deprecation warnings
+warnings.filterwarnings("ignore")
 # Load environment variables
 HF_TOKEN = os.getenv("HF_TOKEN")
 app = FastAPI(title="Quran Recitation Comparer API")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 class ComparisonResult(BaseModel):
     similarity_score: float
     interpretation: str
+# Custom implementation of DTW
 def custom_dtw(X, Y, metric='euclidean'):
     """
     Custom Dynamic Time Warping implementation.
         print(f"Using device: {self.device}")
         # Load model and processor once during initialization
+        try:
+            if token:
+                print(f"Loading model {model_name} with token...")
+                self.processor = AutoProcessor.from_pretrained(model_name, token=token)
+                self.model = AutoModelForCTC.from_pretrained(model_name, token=token)
+            else:
+                print(f"Loading model {model_name} without token...")
+                self.processor = AutoProcessor.from_pretrained(model_name)
+                self.model = AutoModelForCTC.from_pretrained(model_name)
+            self.model = self.model.to(self.device)
+            self.model.eval()
+            print("Model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading model: {str(e)}")
+            raise
         # Cache for embeddings to avoid recomputation
         self.embedding_cache = {}
+    def load_audio(self, file_path, target_sr=16000, normalize=True):
         """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
         if normalize:
             y = librosa.util.normalize(y)
+        # Trim silence using a simplified approach
+        trim_y = []
+        threshold = 0.02  # Threshold for silence detection
+        for i in range(len(y)):
+            if abs(y[i]) > threshold:
+                trim_y.append(y[i])
+        if len(trim_y) > 0:
+            y = np.array(trim_y)
         return y
     def get_deep_embedding(self, audio, sr=16000):
         """Extract frame-wise deep embeddings using the pretrained model."""
+        try:
+            inputs = self.processor(
+                audio,
+                sampling_rate=sr,
+                return_tensors="pt"
+            ).input_values.to(self.device)
+            with torch.no_grad():
+                outputs = self.model(inputs, output_hidden_states=True)
+            hidden_states = outputs.hidden_states[-1]
+            embedding_seq = hidden_states.squeeze(0).cpu().numpy()
+            return embedding_seq
+        except Exception as e:
+            print(f"Error in get_deep_embedding: {str(e)}")
+            raise
     def compute_dtw_distance(self, features1, features2):
         """Compute the DTW distance between two sequences of features."""
+        # Make sure features are 2D arrays
+        if features1.ndim == 1:
+            features1 = features1.reshape(-1, 1)
+        if features2.ndim == 1:
+            features2 = features2.reshape(-1, 1)
+        print(f"Feature shapes: {features1.shape}, {features2.shape}")
+        # Use a subsample if the sequences are too long to avoid memory issues
+        max_length = 300
+        if features1.shape[0] > max_length or features2.shape[0] > max_length:
+            step1 = max(1, features1.shape[0] // max_length)
+            step2 = max(1, features2.shape[0] // max_length)
+            features1 = features1[::step1]
+            features2 = features2[::step2]
+            print(f"Subsampled feature shapes: {features1.shape}, {features2.shape}")
+        try:
+            D, wp = custom_dtw(X=features1, Y=features2, metric='euclidean')
+            distance = D[-1, -1]
+            normalized_distance = distance / len(wp)
+            return normalized_distance
+        except Exception as e:
+            print(f"Error in compute_dtw_distance: {str(e)}")
+            # Fallback to a basic similarity measure if DTW fails
+            mean_1 = np.mean(features1, axis=0)
+            mean_2 = np.mean(features2, axis=0)
+            euclidean_distance = np.sqrt(np.sum((mean_1 - mean_2) ** 2))
+            return euclidean_distance
     def interpret_similarity(self, norm_distance):
         """Interpret the normalized distance value."""
             return self.embedding_cache[file_path]
         print(f"Computing new embedding for {file_path}")
+        try:
+            audio = self.load_audio(file_path)
+            embedding = self.get_deep_embedding(audio)
+            # Store in cache for future use
+            self.embedding_cache[file_path] = embedding
+            print(f"Embedding shape: {embedding.shape}")
+            return embedding
+        except Exception as e:
+            print(f"Error getting embedding: {str(e)}")
+            raise
     def predict(self, file_path1, file_path2):
         """
             str: Interpretation of similarity
         """
         print(f"Comparing {file_path1} and {file_path2}")
+        try:
+            # Get embeddings (using cache if available)
+            embedding1 = self.get_embedding_for_file(file_path1)
+            embedding2 = self.get_embedding_for_file(file_path2)
+            # Compute DTW distance
+            print("Computing DTW distance...")
+            norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
+            print(f"Normalized distance: {norm_distance}")
+            # Interpret results
+            interpretation, similarity_score = self.interpret_similarity(norm_distance)
+            print(f"Similarity score: {similarity_score}, Interpretation: {interpretation}")
+            return similarity_score, interpretation
+        except Exception as e:
+            print(f"Error in predict: {str(e)}")
+            # Return a fallback response in case of error
+            return 0, f"Error comparing files: {str(e)}"
     def clear_cache(self):
         """Clear the embedding cache to free memory."""
 # Global variable for the comparer instance
 comparer = None
+# Use the new lifespan API
 @app.on_event("startup")
 async def startup_event():
     """Initialize the model when the application starts."""
         print("Model initialized and ready for predictions!")
     except Exception as e:
         print(f"Error initializing model: {str(e)}")
+        # Don't raise here, let the app continue to load even if model fails
 @app.get("/")
 async def root():
     """Root endpoint to check if the API is running."""
+    if comparer:
+        status = "active"
+    else:
+        status = "model not loaded"
+    return {"message": "Quran Recitation Comparer API is running", "status": status}
 @app.post("/compare", response_model=ComparisonResult)
 async def compare_files(