File size: 1,363 Bytes
1ce1659
 
 
 
 
 
 
 
 
 
 
 
 
da7dbd0
1ce1659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da7dbd0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from transformers import pipeline

# TODO: move to a config file
DEFAULT_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta"

MODEL_HUMAN_LABEL = {DEFAULT_MODEL: "Human"}
HUMAN = "HUMAN"
MACHINE = "MACHINE"
UNKNOWN = "UNKNOWN"
PARAPHRASE = "PARAPHRASE"
NON_PARAPHRASE = "NON_PARAPHRASE"


def detect_text_by_ai_model(
    input_text: str,
    model: str = DEFAULT_MODEL,
    max_length: int = 512,
) -> tuple:
    """
    Model: chatgpt_detector_roberta
    Ref: https://huggingface.co/Hello-SimpleAI/chatgpt-detector-roberta
    
    Detects if text is human or machine generated.

    Returns:
        tuple: (label, confidence_score)
            where label is HUMAN or MACHINE.
    """
    try:
        pipe = pipeline(
            "text-classification",
            model=model,
            tokenizer=model,
            max_length=max_length,
            truncation=True,
            device_map="auto",  # good for GPU usage
        )
        result = pipe(input_text)[0]
        confidence_score = result["score"]
        if result["label"] == MODEL_HUMAN_LABEL[model]:
            label = HUMAN
        else:
            label = MACHINE
        return label, confidence_score
    except Exception as e:  # Add exception handling
        print(f"Error in Roberta model inference: {e}")
        return UNKNOWN, 50  # Return UNKNOWN and 0.0 confidence if error