wozwize's picture
updating logging
1360e33
raw
history blame contribute delete
12 kB
from typing import Dict, Any, Literal
import logging
from transformers import pipeline
import torch
import numpy as np
from .headline_analyzer import HeadlineAnalyzer
from .sentiment_analyzer import SentimentAnalyzer
from .bias_analyzer import BiasAnalyzer
from .evidence_analyzer import EvidenceAnalyzer
logger = logging.getLogger(__name__)
# Define analysis mode type
AnalysisMode = Literal['ai', 'traditional']
class ModelRegistry:
"""Singleton class to manage shared model pipelines."""
_instance = None
_initialized = False
def __new__(cls):
if cls._instance is None:
cls._instance = super(ModelRegistry, cls).__new__(cls)
return cls._instance
def __init__(self):
if not self._initialized:
try:
# Use GPU if available
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}")
# Initialize shared models with larger batch sizes
self.zero_shot = pipeline(
"zero-shot-classification",
model="facebook/bart-large-mnli",
device=self.device,
batch_size=8
)
self.sentiment = pipeline(
"text-classification",
model="SamLowe/roberta-base-go_emotions",
device=self.device,
batch_size=16
)
self.nli = pipeline(
"text-classification",
model="roberta-large-mnli",
device=self.device,
batch_size=16
)
# Add toxicity pipeline
self.toxicity = pipeline(
"text-classification",
model="unitary/toxic-bert",
device=self.device,
batch_size=16
)
logger.info("Successfully initialized shared model pipelines")
self._initialized = True
except Exception as e:
logger.error(f"Failed to initialize shared models: {str(e)}")
self._initialized = False
@property
def is_available(self):
return self._initialized
class MediaScorer:
def __init__(self, use_ai: bool = True):
"""
Initialize the MediaScorer with required analyzers.
Args:
use_ai: Boolean indicating whether to use AI-powered analysis (True) or traditional analysis (False)
"""
self.use_ai = use_ai
self.analysis_mode: AnalysisMode = 'ai' if use_ai else 'traditional'
logger.info(f"Initializing MediaScorer with {self.analysis_mode} analysis")
# Initialize shared model registry if using AI
if use_ai:
self.model_registry = ModelRegistry()
if not self.model_registry.is_available:
logger.warning("Shared models not available, falling back to traditional analysis")
self.use_ai = False
self.analysis_mode = 'traditional'
# Initialize analyzers with analysis mode preference and shared models
self.headline_analyzer = HeadlineAnalyzer(
use_ai=self.use_ai,
model_registry=self.model_registry if self.use_ai else None
)
self.sentiment_analyzer = SentimentAnalyzer(
use_ai=self.use_ai,
model_registry=self.model_registry if self.use_ai else None
)
self.bias_analyzer = BiasAnalyzer(
use_ai=self.use_ai,
model_registry=self.model_registry if self.use_ai else None
)
self.evidence_analyzer = EvidenceAnalyzer(
use_ai=self.use_ai,
model_registry=self.model_registry if self.use_ai else None
)
logger.info(f"All analyzers initialized in {self.analysis_mode} mode")
def calculate_media_score(self, headline: str, content: str) -> Dict[str, Any]:
"""Calculate final media credibility score."""
try:
logger.info("\n" + "="*50)
logger.info("MEDIA SCORE CALCULATION STARTED")
logger.info("="*50)
logger.info(f"Analysis Mode: {self.analysis_mode}")
# Headline Analysis
logger.info("\n" + "-"*30)
logger.info("HEADLINE ANALYSIS")
logger.info("-"*30)
headline_analysis = self.headline_analyzer.analyze(headline, content)
logger.info(f"Headline Score: {headline_analysis.get('headline_vs_content_score', 0)}")
logger.info(f"Flagged Phrases: {headline_analysis.get('flagged_phrases', [])}")
# Sentiment Analysis
logger.info("\n" + "-"*30)
logger.info("SENTIMENT ANALYSIS")
logger.info("-"*30)
sentiment_analysis = self.sentiment_analyzer.analyze(content)
logger.info(f"Sentiment: {sentiment_analysis.get('sentiment', 'Unknown')}")
logger.info(f"Manipulation Score: {sentiment_analysis.get('manipulation_score', 0)}")
logger.info(f"Flagged Phrases: {sentiment_analysis.get('flagged_phrases', [])}")
# Bias Analysis
logger.info("\n" + "-"*30)
logger.info("BIAS ANALYSIS")
logger.info("-"*30)
bias_analysis = self.bias_analyzer.analyze(content)
logger.info(f"""Bias Results:
Label: {bias_analysis.get('bias', 'Unknown')}
Score: {bias_analysis.get('bias_score', 0)}
Percentage: {bias_analysis.get('bias_percentage', 0)}%
Flagged Phrases: {bias_analysis.get('flagged_phrases', [])}
""")
# Evidence Analysis
logger.info("\n" + "-"*30)
logger.info("EVIDENCE ANALYSIS")
logger.info("-"*30)
evidence_analysis = self.evidence_analyzer.analyze(content)
logger.info(f"Evidence Score: {evidence_analysis.get('evidence_based_score', 0)}")
logger.info(f"Flagged Phrases: {evidence_analysis.get('flagged_phrases', [])}")
# Calculate component scores with NaN handling
# For headline: 20% contradiction = 20% score (don't invert)
headline_score = headline_analysis.get("headline_vs_content_score", 0)
if isinstance(headline_score, (int, float)) and not np.isnan(headline_score):
headline_score = headline_score / 100
else:
headline_score = 0.5 # Default to neutral if score is invalid
logger.warning("Invalid headline score, using default value of 0.5")
# For manipulation: 0% = good (use directly), 100% = bad
manipulation_score = sentiment_analysis.get("manipulation_score", 0)
if isinstance(manipulation_score, (int, float)) and not np.isnan(manipulation_score):
manipulation_score = (100 - manipulation_score) / 100
else:
manipulation_score = 0.5
logger.warning("Invalid manipulation score, using default value of 0.5")
# For bias: 0% = good (use directly), 100% = bad
bias_percentage = bias_analysis.get("bias_percentage", 0)
if isinstance(bias_percentage, (int, float)) and not np.isnan(bias_percentage):
bias_score = (100 - bias_percentage) / 100
else:
bias_score = 0.5
logger.warning("Invalid bias score, using default value of 0.5")
# For evidence: higher is better
evidence_score = evidence_analysis.get("evidence_based_score", 0)
if isinstance(evidence_score, (int, float)) and not np.isnan(evidence_score):
evidence_score = evidence_score / 100
else:
evidence_score = 0.5
logger.warning("Invalid evidence score, using default value of 0.5")
logger.info(f"""Component Scores:
Headline: {headline_score * 100:.1f}% (from {headline_analysis.get("headline_vs_content_score", 0)})
Evidence: {evidence_score * 100:.1f}%
Manipulation: {manipulation_score * 100:.1f}% (100 - {sentiment_analysis.get("manipulation_score", 0)}%)
Bias: {bias_score * 100:.1f}% (100 - {bias_analysis.get("bias_percentage", 0)}%)
""")
# Calculate final score
final_score = float((
(headline_score * 0.25) +
(manipulation_score * 0.25) +
(bias_score * 0.25) +
(evidence_score * 0.25)
) * 100)
# Ensure final score is valid
if np.isnan(final_score) or not np.isfinite(final_score):
final_score = 50.0 # Default to neutral
logger.warning("Invalid final score calculated, using default value of 50.0")
# Determine rating
if final_score >= 80:
rating = "Trustworthy"
elif final_score >= 50:
rating = "Bias Present"
else:
rating = "Misleading"
result = {
"media_unmasked_score": round(float(final_score), 1),
"rating": rating,
"analysis_mode": self.analysis_mode,
"details": {
"headline_analysis": {
"headline_vs_content_score": float(headline_analysis.get("headline_vs_content_score", 0)),
"flagged_phrases": headline_analysis.get("flagged_phrases", [])
},
"sentiment_analysis": {
"sentiment": str(sentiment_analysis.get("sentiment", "Neutral")),
"manipulation_score": float(sentiment_analysis.get("manipulation_score", 0)),
"flagged_phrases": sentiment_analysis.get("flagged_phrases", [])
},
"bias_analysis": {
"bias": str(bias_analysis.get("bias", "Neutral")),
"bias_score": float(bias_analysis.get("bias_score", 0)),
"bias_percentage": float(bias_analysis.get("bias_percentage", 0)),
"flagged_phrases": bias_analysis.get("flagged_phrases", [])
},
"evidence_analysis": {
"evidence_based_score": float(evidence_analysis.get("evidence_based_score", 0)),
"flagged_phrases": evidence_analysis.get("flagged_phrases", [])
}
}
}
logger.info("\n=== Final Score Result ===")
logger.info(f"Result: {result}")
return result
except Exception as e:
logger.error(f"Error calculating media score: {str(e)}")
return {
"media_unmasked_score": 0,
"rating": "Error",
"analysis_mode": self.analysis_mode,
"details": {
"headline_analysis": {"headline_vs_content_score": 0, "flagged_phrases": []},
"sentiment_analysis": {"sentiment": "Error", "manipulation_score": 0, "flagged_phrases": []},
"bias_analysis": {"bias": "Error", "bias_score": 0.0, "bias_percentage": 0, "flagged_phrases": []},
"evidence_analysis": {"evidence_based_score": 0, "flagged_phrases": []}
}
}