Spaces:

Anvil-ML
/

detecteur-ia

Runtime error

App Files Files Community

detecteur-ia / app.py

Anvil-ML

added padding and truncation to tokenizer

b04dab8 about 2 years ago

raw

history blame

1.85 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
	barthez_tokenizer = AutoTokenizer.from_pretrained("moussaKam/barthez")
	model = AutoModelForSequenceClassification.from_pretrained("Anvil-ML/detecteur-ia")

	def interpret_pred(pred):
	low_bond = -6.748472
	high_bound = 6.7176056
	result = "IA" if pred.argmax(dim=-1).item() == 1 else "Humain"
	pred_value = pred[0][1].item()
	interpreted_pred = (pred_value - low_bond) / (high_bound - low_bond)
	is_ai_percent = round(100 * interpreted_pred)
	return result, is_ai_percent

	def interpret_pred_with_sensibility(pred):
	low_bond = -6.748472
	high_bound = 6.7176056
	pred_value = pred[0][1].item()
	interpreted_pred = (pred_value - low_bond) / (high_bound - low_bond)
	if interpreted_pred < 0.5:
	proba = "très faible"
	elif interpreted_pred < 0.6:
	proba = "faible"
	elif interpreted_pred < 0.8:
	proba = "modérée"
	elif interpreted_pred < 0.95:
	proba = "élevée"
	else:
	proba = "très élevée"

	return proba


	def main(text_sentence):

	input_ids = torch.tensor(
	[barthez_tokenizer.encode(text_sentence, truncation=True, padding=True, add_special_tokens=True)]
	)

	predict = model.forward(input_ids)[0]

	#result = (
	# "Résultat : {}.\nCe texte a {}% de chances d'avoir été généré par de l'IA"
	# .format(interpret_pred(predict)[0], interpret_pred(predict)[1])
	#)

	proba = interpret_pred_with_sensibility(predict)
	result = (
	"La probabilité que ce texte a été généré par de l'IA est {}"
	.format(proba)
	)
	return result


	iface = gr.Interface(fn=main, inputs="text", outputs="text")
	iface.launch()