Spaces:
Running
Running
File size: 3,927 Bytes
57bafce bd0c703 57bafce 582b2f2 cf62d27 57bafce bd0c703 57bafce bd0c703 57bafce bd0c703 57bafce bd0c703 57bafce bd0c703 0bd9ff0 dd9c120 bd0c703 0bd9ff0 dd9c120 bd0c703 57bafce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
device = torch.device("cpu")
class MLP(nn.Module):
def __init__(self, input_dim):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, 256)
self.fc2 = nn.Linear(256, 2)
self.gelu = nn.GELU()
def forward(self, x):
x = self.gelu(self.fc1(x))
x = self.fc2(x)
return x
def extract_features(text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("roberta-base").to(device)
tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt")
outputs = model(tokenized_text)
last_hidden_states = outputs.last_hidden_state
TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy()
return TClassification
def RobertaSentinelOpenGPTInference(input_text):
features = extract_features(input_text)
loaded_model = MLP(768).to(device)
loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))
# Define the tokenizer and model for feature extraction
with torch.no_grad():
inputs = torch.tensor(features).to(device)
outputs = loaded_model(inputs.float())
_, predicted = torch.max(outputs, 0)
Probs = (F.softmax(outputs, dim=0).cpu().numpy())
return Probs
def RobertaSentinelCSAbstractInference(input_text):
features = extract_features(input_text)
loaded_model = MLP(768).to(device)
loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))
# Define the tokenizer and model for feature extraction
with torch.no_grad():
inputs = torch.tensor(features).to(device)
outputs = loaded_model(inputs.float())
_, predicted = torch.max(outputs, 0)
Probs = (F.softmax(outputs, dim=0).cpu().numpy())
return Probs
def RobertaClassifierOpenGPTInference(input_text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth"
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
model.load_state_dict(torch.load(model_path), map_location=torch.device("cpu"))
model = model.to(torch.device('cpu'))
model.eval()
tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))
# Make a prediction
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
return Probs
def RobertaClassifierCSAbstractInference(input_text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth"
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
model.load_state_dict(torch.load(model_path), map_location=torch.device("cpu"))
model = model.to(torch.device('cpu'))
model.eval()
tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(torch.device('cpu'))
attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu'))
# Make a prediction
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
return Probs
|