thugCodeNinja commited on
Commit
82271e0
·
verified ·
1 Parent(s): 6aaf7b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -39,16 +39,16 @@ def process_text(input_text):
39
  except Exception as e:
40
  return {'error': str(e)}
41
  def get_article_text(url):
42
- try:
43
- response = requests.get(url)
44
- if response.status_code == 200:
45
- soup = BeautifulSoup(response.content, 'html.parser')
46
  # Extract text from the article content (you may need to adjust this based on the website's structure)
47
- article_text = ' '.join([p.get_text() for p in soup.find_all('p')])
48
  return article_text
49
- except Exception as e:
50
- print(f"An error occurred: {e}")
51
- return ''
52
  def find_plagiarism(text):
53
  search_results = search(text)
54
  if 'items' not in search_results:
@@ -68,12 +68,12 @@ def process_text(input_text):
68
  embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
69
 
70
  # Calculate cosine similarity between the input text and the article text embeddings
71
- similarity = cosine_similarity(embedding1, embedding2)[0][0]
72
- similar_articles.append({'Link': link, 'Similarity': similarity})
73
- similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
74
- threshold = 0.5 # Adjust the threshold as needed
75
- similar_articles = [article for article in similar_articles if article['Similarity'] > threshold]
76
- return similar_articles[:5]
77
 
78
  prediction = pipe([text])
79
  explainer = shap.Explainer(pipe)
 
39
  except Exception as e:
40
  return {'error': str(e)}
41
  def get_article_text(url):
42
+ try:
43
+ response = requests.get(url)
44
+ if response.status_code == 200:
45
+ soup = BeautifulSoup(response.content, 'html.parser')
46
  # Extract text from the article content (you may need to adjust this based on the website's structure)
47
+ article_text = ' '.join([p.get_text() for p in soup.find_all('p')])
48
  return article_text
49
+ except Exception as e:
50
+ print(f"An error occurred: {e}")
51
+ return ''
52
  def find_plagiarism(text):
53
  search_results = search(text)
54
  if 'items' not in search_results:
 
68
  embedding2 = model(**encoding2).last_hidden_state.mean(dim=1)
69
 
70
  # Calculate cosine similarity between the input text and the article text embeddings
71
+ similarity = cosine_similarity(embedding1, embedding2)[0][0]
72
+ similar_articles.append({'Link': link, 'Similarity': similarity})
73
+ similar_articles = sorted(similar_articles, key=lambda x: x['Similarity'], reverse=True)
74
+ threshold = 0.5 # Adjust the threshold as needed
75
+ similar_articles = [article for article in similar_articles if article['Similarity'] > threshold]
76
+ return similar_articles[:5]
77
 
78
  prediction = pipe([text])
79
  explainer = shap.Explainer(pipe)