thugCodeNinja commited on
Commit
074df17
·
verified ·
1 Parent(s): 2211ff7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import torch
3
  from torch.nn.functional import softmax
4
- import shap
5
  import requests
6
  from bs4 import BeautifulSoup
7
  from sklearn.metrics.pairwise import cosine_similarity
@@ -10,6 +9,8 @@ from IPython.core.display import HTML
10
  model_dir = 'temp'
11
  tokenizer = RobertaTokenizer.from_pretrained(model_dir)
12
  model = RobertaForSequenceClassification.from_pretrained(model_dir)
 
 
13
  #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
14
  pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
15
  def process_text(input_text):
@@ -57,13 +58,13 @@ def process_text(input_text):
57
  article_text = get_article_text(link)
58
  if article_text:
59
  # Tokenize and encode the input text and the article text
60
- encoding1 = tokenizer(text, max_length=512, truncation=True, padding=True, return_tensors="pt")
61
- encoding2 = tokenizer(article_text, max_length=512, truncation=True, padding=True, return_tensors="pt")
62
 
63
  # Calculate embeddings using the model
64
  with torch.no_grad():
65
- embedding1 = model(**encoding1).last_hidden_state.mean(dim=1)
66
- embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
67
 
68
  # Calculate cosine similarity between the input text and the article text embeddings
69
  similarity = cosine_similarity(embedding1, embedding2)[0][0]
@@ -73,16 +74,16 @@ def process_text(input_text):
73
  threshold = 0.5 # Adjust the threshold as needed
74
  return similar_articles[:5]
75
 
76
- prediction = pipe([text])
77
- explainer = shap.Explainer(pipe)
78
- shap_values = explainer([text])
79
- shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
80
  similar_articles = find_plagiarism(text)
81
 
82
- return processed_result, prob, final_label, shap_plot_html,similar_articles
83
 
84
  text_input = gr.Textbox(label="Enter text")
85
- outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Link", "Similarity"],row_count=5)]
86
  title = "Group 2- ChatGPT text detection module"
87
  description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
88
  The probability is particularly explained by the attention plots through SHAP'''
 
1
  import gradio as gr
2
  import torch
3
  from torch.nn.functional import softmax
 
4
  import requests
5
  from bs4 import BeautifulSoup
6
  from sklearn.metrics.pairwise import cosine_similarity
 
9
  model_dir = 'temp'
10
  tokenizer = RobertaTokenizer.from_pretrained(model_dir)
11
  model = RobertaForSequenceClassification.from_pretrained(model_dir)
12
+ tokenizer1 = RobertaTokenizer.from_pretrained('roberta-base')
13
+ model1 = RobertaModel.from_pretrained('roberta-base')
14
  #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
15
  pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
16
  def process_text(input_text):
 
58
  article_text = get_article_text(link)
59
  if article_text:
60
  # Tokenize and encode the input text and the article text
61
+ encoding1 = tokenizer1(text, max_length=512, truncation=True, padding=True, return_tensors="pt")
62
+ encoding2 = tokenizer1(article_text, max_length=512, truncation=True, padding=True, return_tensors="pt")
63
 
64
  # Calculate embeddings using the model
65
  with torch.no_grad():
66
+ embedding1 = model1(**encoding1).last_hidden_state.mean(dim=1)
67
+ embedding2 = model1(**encoding2).last_hidden_state.mean(dim=1)
68
 
69
  # Calculate cosine similarity between the input text and the article text embeddings
70
  similarity = cosine_similarity(embedding1, embedding2)[0][0]
 
74
  threshold = 0.5 # Adjust the threshold as needed
75
  return similar_articles[:5]
76
 
77
+ # prediction = pipe([text])
78
+ # explainer = shap.Explainer(pipe)
79
+ # shap_values = explainer([text])
80
+ # shap_plot_html = HTML(shap.plots.text(shap_values, display=False)).data
81
  similar_articles = find_plagiarism(text)
82
 
83
+ return processed_result, prob, final_label,similar_articles
84
 
85
  text_input = gr.Textbox(label="Enter text")
86
+ outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"),gr.Dataframe(label="Similar Articles", headers=["Link", "Similarity"],row_count=5)]
87
  title = "Group 2- ChatGPT text detection module"
88
  description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
89
  The probability is particularly explained by the attention plots through SHAP'''