|
|
|
import spacy
|
|
from spacy.tokens import Doc, Span
|
|
from typing import List, Tuple
|
|
import data_models
|
|
|
|
from rich.console import Console
|
|
import re
|
|
|
|
console = Console()
|
|
|
|
|
|
CITATION_PHRASES = {
|
|
"according to", "study shows", "research indicates", "data suggests",
|
|
"experts say", "report finds", "source:", "evidence shows", "demonstrates that",
|
|
"reported by", "stated by", "cited in"
|
|
}
|
|
URL_REGEX = r"(?:https?://|www\.)[^\s/$.?#].[^\s]*"
|
|
|
|
|
|
def has_potential_evidence_indicator(sent: Span) -> Tuple[bool, str, str]:
|
|
sent_text = sent.text; sent_text_lower = sent.text.lower()
|
|
if not sent_text.strip(): return False, "", ""
|
|
urls = re.findall(URL_REGEX, sent_text)
|
|
if urls: return True, "URL", urls[0]
|
|
if re.search(r"\b\d{3,}\b", sent_text) or '%' in sent_text or re.search(r"\b\d+(?:\.\d+)?\b", sent_text):
|
|
match = re.search(r"\b\d+(?:\.\d+)?%?\b", sent_text)
|
|
trigger_text = match.group(0) if match else "Number/Percentage"
|
|
return True, "Numerical Data", trigger_text
|
|
for phrase in CITATION_PHRASES:
|
|
if f" {phrase} " in f" {sent_text_lower} " or sent_text_lower.startswith(f"{phrase} "):
|
|
return True, "Citation Phrase", phrase
|
|
return False, "", ""
|
|
|
|
|
|
def simplified_evidence_analyzer(
|
|
doc: Doc,
|
|
argument_components: List[data_models.ArgumentComponent]
|
|
|
|
) -> List[data_models.Finding]:
|
|
"""
|
|
Tespit edilen iddiaları (Claim) inceler ve SADECE kendi cümlelerinde
|
|
basit kanıt göstergeleri olup olmadığını kontrol eder (V1 Tarzı Basit).
|
|
"""
|
|
findings = []
|
|
claims_data = [(idx, comp) for idx, comp in enumerate(argument_components) if comp.component_type == "Claim"]
|
|
sentences = list(doc.sents)
|
|
num_sentences = len(sentences)
|
|
|
|
if not claims_data:
|
|
console.print(" -> No claims found to analyze for evidence.", style="dim"); return findings
|
|
|
|
console.print(f" -> Analyzing {len(claims_data)} claims for evidence indicators (Simplified: Same sentence only)...", style="dim")
|
|
|
|
for claim_comp_idx, claim in claims_data:
|
|
claim_sentence_idx = claim.sentence_index
|
|
claim_text_snippet = claim.text[:100] + "..."
|
|
|
|
if not (0 <= claim_sentence_idx < num_sentences):
|
|
console.print(f"[yellow]Warn: Invalid sentence index {claim_sentence_idx} for claim comp_idx {claim_comp_idx}, skipping.[/yellow]"); continue
|
|
|
|
claim_sentence_span = sentences[claim_sentence_idx]
|
|
|
|
|
|
has_indicator, indicator_type, indicator_text = has_potential_evidence_indicator(claim_sentence_span)
|
|
|
|
if has_indicator:
|
|
|
|
findings.append(data_models.Finding(
|
|
finding_type="EvidenceIndicator",
|
|
description=f"Potential evidence indicator ('{indicator_type}') found in the same sentence as the claim.",
|
|
severity="Info",
|
|
span_start=claim_sentence_span.start_char,
|
|
span_end=claim_sentence_span.end_char,
|
|
details={
|
|
"indicator_type": indicator_type, "indicator_trigger": indicator_text,
|
|
"location": "same_sentence", "linked_claim_index": claim_comp_idx,
|
|
"claim_text": claim_text_snippet
|
|
}
|
|
))
|
|
else:
|
|
|
|
findings.append(data_models.Finding(
|
|
finding_type="EvidenceStatus",
|
|
description="Claim lacks explicit evidence indicator in the same sentence.",
|
|
severity="Medium",
|
|
span_start=claim_sentence_span.start_char,
|
|
span_end=claim_sentence_span.end_char,
|
|
details={"claim_text": claim_text_snippet}
|
|
))
|
|
|
|
console.print(f" -> Simplified Evidence Analyzer generated {len(findings)} findings.", style="dim")
|
|
return findings |