Spaces:

ozgurunlu
/

m-check

Sleeping

App Files Files Community

Ozgur Unlu commited on Nov 6, 2024

Commit

4ce18b0

1 Parent(s): 16b95b0

fixes

Browse files

Files changed (1) hide show

news_checker.py +19 -8

news_checker.py CHANGED Viewed

@@ -3,10 +3,10 @@ from newsapi import NewsApiClient
 from dotenv import load_dotenv
 import pandas as pd
 from datetime import datetime, timedelta
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 load_dotenv()
@@ -23,15 +23,26 @@ class NewsChecker:
             # Initialize sentiment analyzer
             self.sentiment_analyzer = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
             # Initialize semantic similarity model
-            self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
             print("Models initialized successfully")
         except Exception as e:
             print(f"Error initializing clients: {str(e)}")
     def get_embedding(self, text):
-        """Get embedding for a text using sentence transformer"""
         try:
-            return self.semantic_model.encode(text, convert_to_tensor=True)
         except Exception as e:
             print(f"Error getting embedding: {str(e)}")
             return None
@@ -40,8 +51,8 @@ class NewsChecker:
         """Calculate cosine similarity between two embeddings"""
         try:
             # Convert tensors to numpy arrays and reshape
-            emb1 = text1_embedding.cpu().numpy().reshape(1, -1)
-            emb2 = text2_embedding.cpu().numpy().reshape(1, -1)
             # Calculate cosine similarity
             similarity = cosine_similarity(emb1, emb2)[0][0]

 from dotenv import load_dotenv
 import pandas as pd
 from datetime import datetime, timedelta
+from transformers import pipeline, AutoTokenizer, AutoModel
+import torch
 import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
 load_dotenv()
             # Initialize sentiment analyzer
             self.sentiment_analyzer = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
             # Initialize semantic similarity model
+            self.tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
+            self.model = AutoModel.from_pretrained('distilbert-base-uncased')
             print("Models initialized successfully")
         except Exception as e:
             print(f"Error initializing clients: {str(e)}")
     def get_embedding(self, text):
+        """Get embedding for a text using DistilBERT"""
         try:
+            # Tokenize and encode the text
+            inputs = self.tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
+            # Get model outputs
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            # Use the mean of the last hidden state as the sentence embedding
+            embeddings = outputs.last_hidden_state.mean(dim=1)
+            return embeddings
         except Exception as e:
             print(f"Error getting embedding: {str(e)}")
             return None
         """Calculate cosine similarity between two embeddings"""
         try:
             # Convert tensors to numpy arrays and reshape
+            emb1 = text1_embedding.numpy().reshape(1, -1)
+            emb2 = text2_embedding.numpy().reshape(1, -1)
             # Calculate cosine similarity
             similarity = cosine_similarity(emb1, emb2)[0][0]