thugCodeNinja commited on
Commit
2211ff7
·
verified ·
1 Parent(s): 1525307

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -11,8 +11,6 @@ model_dir = 'temp'
11
  tokenizer = RobertaTokenizer.from_pretrained(model_dir)
12
  model = RobertaForSequenceClassification.from_pretrained(model_dir)
13
  #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
14
- tokenizer1 = RobertaTokenizer.from_pretrained('roberta-base')
15
- model1 = RobertaModel.from_pretrained('roberta-base')
16
  pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
17
  def process_text(input_text):
18
  if input_text:
@@ -64,15 +62,15 @@ def process_text(input_text):
64
 
65
  # Calculate embeddings using the model
66
  with torch.no_grad():
67
- embedding1 = model1(**encoding1).last_hidden_state.mean(dim=1)
68
- embedding2 = model1(**encoding2).last_hidden_state.mean(dim=1)
69
 
70
  # Calculate cosine similarity between the input text and the article text embeddings
71
  similarity = cosine_similarity(embedding1, embedding2)[0][0]
72
- similar_articles.append({'Link': link, 'Similarity': similarity})
 
73
  similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
74
  threshold = 0.5 # Adjust the threshold as needed
75
- similar_articles = [article for article in similar_articles if article['Similarity'] > threshold]
76
  return similar_articles[:5]
77
 
78
  prediction = pipe([text])
@@ -84,7 +82,7 @@ def process_text(input_text):
84
  return processed_result, prob, final_label, shap_plot_html,similar_articles
85
 
86
  text_input = gr.Textbox(label="Enter text")
87
- outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Title", "Link"],row_count=5)]
88
  title = "Group 2- ChatGPT text detection module"
89
  description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
90
  The probability is particularly explained by the attention plots through SHAP'''
 
11
  tokenizer = RobertaTokenizer.from_pretrained(model_dir)
12
  model = RobertaForSequenceClassification.from_pretrained(model_dir)
13
  #pipe = pipeline("text-classification", model="thugCodeNinja/robertatemp")
 
 
14
  pipe = pipeline("text-classification",model=model,tokenizer=tokenizer)
15
  def process_text(input_text):
16
  if input_text:
 
62
 
63
  # Calculate embeddings using the model
64
  with torch.no_grad():
65
+ embedding1 = model(**encoding1).last_hidden_state.mean(dim=1)
66
+ embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
67
 
68
  # Calculate cosine similarity between the input text and the article text embeddings
69
  similarity = cosine_similarity(embedding1, embedding2)[0][0]
70
+ if similarity > threshold:
71
+ similar_articles.append({'Link': link, 'Similarity': similarity})
72
  similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
73
  threshold = 0.5 # Adjust the threshold as needed
 
74
  return similar_articles[:5]
75
 
76
  prediction = pipe([text])
 
82
  return processed_result, prob, final_label, shap_plot_html,similar_articles
83
 
84
  text_input = gr.Textbox(label="Enter text")
85
+ outputs = [gr.Textbox(label="Processed text"), gr.Textbox(label="Probability"), gr.Textbox(label="Label"), gr.HTML(label="SHAP Plot"),gr.Dataframe(label="Similar Articles", headers=["Link", "Similarity"],row_count=5)]
86
  title = "Group 2- ChatGPT text detection module"
87
  description = '''Please upload text files and text input responsibly and await the explainable results. The approach in place includes finetuning a Roberta model for text classification.Once the classifications are done the decision is exaplined thorugh the SHAP text plot.
88
  The probability is particularly explained by the attention plots through SHAP'''