Spaces:
Sleeping
Sleeping
File size: 3,258 Bytes
4eae158 b8985e3 4eae158 b8985e3 2574ff7 4eae158 2574ff7 b8985e3 2574ff7 4eae158 2574ff7 b8985e3 2574ff7 4eae158 b8985e3 4eae158 8581164 207bc33 2574ff7 d2552db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import torch
from torch.nn.functional import softmax
import shap
import requests
from transformers import RobertaTokenizer,RobertaForSequenceClassification, pipeline
from IPython.core.display import HTML
model_dir = 'temp'
tokenizer = RobertaTokenizer.from_pretrained(model_dir)
model = RobertaForSequenceClassification.from_pretrained(model_dir)
#pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
def process_text(input_text, input_file):
if input_text:
text = input_text
elif input_file is not None:
text = input_file.read().decode('utf-8')
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
probs = softmax(logits, dim=1)
max_prob, predicted_class_id = torch.max(probs, dim=1)
prob = str(round(max_prob.item() * 100, 2))
label = model.config.id2label[predicted_class_id.item()]
final_label='Human' if model.config.id2label[predicted_class_id.item()]=='LABEL_0' else 'Chat-GPT'
processed_result = text
def search(text):
query = text
api_key = 'AIzaSyClvkiiJTZrCJ8BLqUY9I38WYmbve8g-c8'
search_engine_id = '53d064810efa44ce7'
url = f'https://www.googleapis.com/customsearch/v1?key={api_key}&cx={search_engine_id}&q={query}'
try:
response = requests.get(url)
data = response.json()
return data
except Exception as e:
return {'error': str(e)}
def find_plagiarism(text):
search_results = search(text)
if 'items' not in search_results:
return []
similar_articles = []
for item in search_results['items']:
title = item.get('title', '')
link = item.get('link', '')
similar_articles.append([ title,link])
return similar_articles[:5]
prediction = pipe([text])
explainer = shap.Explainer(pipe)
shap_values = explainer([text])
shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
# HTML(shap.plots.text(shap_values, display=False))
# with open('rendered.html', 'w') as file:
# file.write(shap.plots.text(shap_values, display=False))
similar_articles = find_plagiarism(text)
return processed_result, prob, final_label, shap_plot_html,similar_articles
text_input = gr.Textbox(label="Enter text")
file_input = gr.File(label="Upload a text file")
outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Title", "Link"],row_count=5)]
title = "Group 2- ChatGPT text detection module"
description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
The probability is particularly explained by the attention plots through SHAP'''
gr.Interface(fn=process_text,title=title,description=description, inputs=[text_input, file_input], outputs=outputs).launch()
|