File size: 4,420 Bytes
98c2b46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# evidence_analyzer.py (V2 - Adım 2.4 Revizyonu: Basitleştirilmiş : GEREKLİYDİ ZAYN BAŞARAMADI :))
import spacy
from spacy.tokens import Doc, Span
from typing import List, Tuple
import data_models
# Artık nlp_utils, torch, F'e gerek yok bu basit versiyonda
from rich.console import Console
import re

console = Console()

# Sabitler (Aynı)
CITATION_PHRASES = {
    "according to", "study shows", "research indicates", "data suggests",
    "experts say", "report finds", "source:", "evidence shows", "demonstrates that",
    "reported by", "stated by", "cited in"
}
URL_REGEX = r"(?:https?://|www\.)[^\s/$.?#].[^\s]*"

# Yardımcı Fonksiyon (Aynı)
def has_potential_evidence_indicator(sent: Span) -> Tuple[bool, str, str]:
    sent_text = sent.text; sent_text_lower = sent.text.lower()
    if not sent_text.strip(): return False, "", ""
    urls = re.findall(URL_REGEX, sent_text)
    if urls: return True, "URL", urls[0]
    if re.search(r"\b\d{3,}\b", sent_text) or '%' in sent_text or re.search(r"\b\d+(?:\.\d+)?\b", sent_text):
        match = re.search(r"\b\d+(?:\.\d+)?%?\b", sent_text)
        trigger_text = match.group(0) if match else "Number/Percentage"
        return True, "Numerical Data", trigger_text
    for phrase in CITATION_PHRASES:
        if f" {phrase} " in f" {sent_text_lower} " or sent_text_lower.startswith(f"{phrase} "):
            return True, "Citation Phrase", phrase
    return False, "", ""

# Ana Analiz Fonksiyonu (Basitleştirilmiş - Sadece aynı cümleyi kontrol eder)
def simplified_evidence_analyzer(

    doc: Doc,

    argument_components: List[data_models.ArgumentComponent]

    # sentence_embeddings parametresi kaldırıldı

) -> List[data_models.Finding]:
    """

    Tespit edilen iddiaları (Claim) inceler ve SADECE kendi cümlelerinde

    basit kanıt göstergeleri olup olmadığını kontrol eder (V1 Tarzı Basit).

    """
    findings = []
    claims_data = [(idx, comp) for idx, comp in enumerate(argument_components) if comp.component_type == "Claim"]
    sentences = list(doc.sents)
    num_sentences = len(sentences)

    if not claims_data:
        console.print(" -> No claims found to analyze for evidence.", style="dim"); return findings

    console.print(f" -> Analyzing {len(claims_data)} claims for evidence indicators (Simplified: Same sentence only)...", style="dim")

    for claim_comp_idx, claim in claims_data:
        claim_sentence_idx = claim.sentence_index
        claim_text_snippet = claim.text[:100] + "..."

        if not (0 <= claim_sentence_idx < num_sentences):
            console.print(f"[yellow]Warn: Invalid sentence index {claim_sentence_idx} for claim comp_idx {claim_comp_idx}, skipping.[/yellow]"); continue

        claim_sentence_span = sentences[claim_sentence_idx]

        # Sadece iddianın kendi cümlesini kontrol et
        has_indicator, indicator_type, indicator_text = has_potential_evidence_indicator(claim_sentence_span)

        if has_indicator:
            # Gösterge varsa EvidenceIndicator ekle
            findings.append(data_models.Finding(
                finding_type="EvidenceIndicator",
                description=f"Potential evidence indicator ('{indicator_type}') found in the same sentence as the claim.",
                severity="Info",
                span_start=claim_sentence_span.start_char,
                span_end=claim_sentence_span.end_char,
                details={
                    "indicator_type": indicator_type, "indicator_trigger": indicator_text,
                    "location": "same_sentence", "linked_claim_index": claim_comp_idx,
                    "claim_text": claim_text_snippet
                }
            ))
        else:
            # Gösterge yoksa EvidenceStatus ekle
            findings.append(data_models.Finding(
                finding_type="EvidenceStatus",
                description="Claim lacks explicit evidence indicator in the same sentence.", # Açıklama basitleşti
                severity="Medium",
                span_start=claim_sentence_span.start_char, # İddia cümlesinin span'ı
                span_end=claim_sentence_span.end_char,
                details={"claim_text": claim_text_snippet}
            ))

    console.print(f" -> Simplified Evidence Analyzer generated {len(findings)} findings.", style="dim")
    return findings