Spaces:

Thanarit
/

GPT-Detection-Demo

Running

File size: 3,927 Bytes

57bafce
 
 
bd0c703
 
57bafce
582b2f2
cf62d27
57bafce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd0c703
57bafce
 
 
 
 
bd0c703
57bafce
bd0c703
 
 
57bafce
 
 
 
bd0c703
57bafce
 
 
 
 
bd0c703
 
 
 
 
 
 
 
 
 
0bd9ff0
dd9c120
bd0c703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bd9ff0
dd9c120
bd0c703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57bafce

from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader


device = torch.device("cpu")
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 2)
        self.gelu = nn.GELU()

    def forward(self, x):
        x = self.gelu(self.fc1(x))
        x = self.fc2(x)
        return x
def extract_features(text):

    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model = RobertaModel.from_pretrained("roberta-base").to(device)
    tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt")
    outputs = model(tokenized_text)
    last_hidden_states = outputs.last_hidden_state
    TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy()
    return TClassification

def RobertaSentinelOpenGPTInference(input_text):
    features = extract_features(input_text)
    loaded_model = MLP(768).to(device)
    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))

    # Define the tokenizer and model for feature extraction
    with torch.no_grad():
        inputs = torch.tensor(features).to(device)
        outputs = loaded_model(inputs.float())
        _, predicted = torch.max(outputs, 0)

        Probs = (F.softmax(outputs, dim=0).cpu().numpy())

    return Probs

def RobertaSentinelCSAbstractInference(input_text):
    features = extract_features(input_text)
    loaded_model = MLP(768).to(device)
    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))

    # Define the tokenizer and model for feature extraction
    with torch.no_grad():
        inputs = torch.tensor(features).to(device)
        outputs = loaded_model(inputs.float())
        _, predicted = torch.max(outputs, 0)

        Probs = (F.softmax(outputs, dim=0).cpu().numpy())

    return Probs


def RobertaClassifierOpenGPTInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path), map_location=torch.device("cpu"))
    model = model.to(torch.device('cpu'))
    model.eval()


    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
    attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs


def RobertaClassifierCSAbstractInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path), map_location=torch.device("cpu"))
    model = model.to(torch.device('cpu'))
    model.eval()


    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
    attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs