Spaces:

thugCodeNinja
/

ChatGPTtextdetction

Sleeping

App Files Files Community

thugCodeNinja commited on Apr 1, 2024

Commit

074df17

verified ·

1 Parent(s): 2211ff7

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -11

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 import torch
 from torch.nn.functional import softmax
-import shap
 import requests
 from bs4 import BeautifulSoup
 from sklearn.metrics.pairwise import cosine_similarity
@@ -10,6 +9,8 @@ from IPython.core.display import HTML
 model_dir = 'temp'
 tokenizer = RobertaTokenizer.from_pretrained(model_dir)
 model = RobertaForSequenceClassification.from_pretrained(model_dir)
 #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
 pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
 def process_text(input_text):
@@ -57,13 +58,13 @@ def process_text(input_text):
             article_text = get_article_text(link)
             if article_text:
             # Tokenize and encode the input text and the article text
-                encoding1 = tokenizer(text, max_length=512, truncation=True, padding=True, return_tensors="pt")
-                encoding2 = tokenizer(article_text, max_length=512, truncation=True, padding=True, return_tensors="pt")
             # Calculate embeddings using the model
                 with torch.no_grad():
-                    embedding1 = model(**encoding1).last_hidden_state.mean(dim=1)
-                    embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
             # Calculate cosine similarity between the input text and the article text embeddings
                 similarity = cosine_similarity(embedding1, embedding2)[0][0]
@@ -73,16 +74,16 @@ def process_text(input_text):
         threshold = 0.5  # Adjust the threshold as needed
         return similar_articles[:5]
-    prediction = pipe([text])
-    explainer = shap.Explainer(pipe)
-    shap_values = explainer([text])
-    shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
     similar_articles = find_plagiarism(text)
-    return processed_result, prob, final_label, shap_plot_html,similar_articles
 text_input = gr.Textbox(label="Enter text")
-outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Link", "Similarity"],row_count=5)]
 title = "Group 2- ChatGPT text detection module"
 description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
 The probability is particularly explained by the attention plots through SHAP'''

 import gradio as gr
 import torch
 from torch.nn.functional import softmax
 import requests
 from bs4 import BeautifulSoup
 from sklearn.metrics.pairwise import cosine_similarity
 model_dir = 'temp'
 tokenizer = RobertaTokenizer.from_pretrained(model_dir)
 model = RobertaForSequenceClassification.from_pretrained(model_dir)
+tokenizer1 = RobertaTokenizer.from_pretrained('roberta-base')
+model1 = RobertaModel.from_pretrained('roberta-base')
 #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
 pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
 def process_text(input_text):
             article_text = get_article_text(link)
             if article_text:
             # Tokenize and encode the input text and the article text
+                encoding1 = tokenizer1(text, max_length=512, truncation=True, padding=True, return_tensors="pt")
+                encoding2 = tokenizer1(article_text, max_length=512, truncation=True, padding=True, return_tensors="pt")
             # Calculate embeddings using the model
                 with torch.no_grad():
+                    embedding1 = model1(**encoding1).last_hidden_state.mean(dim=1)
+                    embedding2 = model1(**encoding2).last_hidden_state.mean(dim=1)
             # Calculate cosine similarity between the input text and the article text embeddings
                 similarity = cosine_similarity(embedding1, embedding2)[0][0]
         threshold = 0.5  # Adjust the threshold as needed
         return similar_articles[:5]
+    # prediction = pipe([text])
+    # explainer = shap.Explainer(pipe)
+    # shap_values = explainer([text])
+    # shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
     similar_articles = find_plagiarism(text)
+    return processed_result, prob, final_label,similar_articles
 text_input = gr.Textbox(label="Enter text")
+outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"),gr.Dataframe(label="Similar Articles", headers=["Link", "Similarity"],row_count=5)]
 title = "Group 2- ChatGPT text detection module"
 description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
 The probability is particularly explained by the attention plots through SHAP'''