detecteur-ia / app.py
Anvil-ML's picture
added padding and truncation to tokenizer
b04dab8
raw
history blame
1.85 kB
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
barthez_tokenizer = AutoTokenizer.from_pretrained("moussaKam/barthez")
model = AutoModelForSequenceClassification.from_pretrained("Anvil-ML/detecteur-ia")
def interpret_pred(pred):
low_bond = -6.748472
high_bound = 6.7176056
result = "IA" if pred.argmax(dim=-1).item() == 1 else "Humain"
pred_value = pred[0][1].item()
interpreted_pred = (pred_value - low_bond) / (high_bound - low_bond)
is_ai_percent = round(100 * interpreted_pred)
return result, is_ai_percent
def interpret_pred_with_sensibility(pred):
low_bond = -6.748472
high_bound = 6.7176056
pred_value = pred[0][1].item()
interpreted_pred = (pred_value - low_bond) / (high_bound - low_bond)
if interpreted_pred < 0.5:
proba = "très faible"
elif interpreted_pred < 0.6:
proba = "faible"
elif interpreted_pred < 0.8:
proba = "modérée"
elif interpreted_pred < 0.95:
proba = "élevée"
else:
proba = "très élevée"
return proba
def main(text_sentence):
input_ids = torch.tensor(
[barthez_tokenizer.encode(text_sentence, truncation=True, padding=True, add_special_tokens=True)]
)
predict = model.forward(input_ids)[0]
#result = (
# "Résultat : {}.\nCe texte a {}% de chances d'avoir été généré par de l'IA"
# .format(interpret_pred(predict)[0], interpret_pred(predict)[1])
#)
proba = interpret_pred_with_sensibility(predict)
result = (
"La probabilité que ce texte a été généré par de l'IA est {}"
.format(proba)
)
return result
iface = gr.Interface(fn=main, inputs="text", outputs="text")
iface.launch()