thealper2 commited on
Commit
88a843a
·
verified ·
1 Parent(s): 7b697a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -0
app.py CHANGED
@@ -8,6 +8,30 @@ from nltk import sent_tokenize
8
  import uvicorn
9
  from fastapi import FastAPI
10
  from pydantic import BaseModel, Field
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  class AspectSentimentPipeline(Pipeline):
13
  def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
@@ -28,6 +52,7 @@ class AspectSentimentPipeline(Pipeline):
28
  main_results = []
29
  main_aspects = []
30
  for sentence in sentences:
 
31
  aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
32
  for aspect in aspects:
33
  main_aspects.append(aspect)
 
8
  import uvicorn
9
  from fastapi import FastAPI
10
  from pydantic import BaseModel, Field
11
+ import re
12
+ import emoji
13
+
14
+ stop_words = [x.strip() for x in open('stop-words.tr.txt','r', encoding="UTF8").read().split('\n')]
15
+
16
+ def preprocess_text(text):
17
+ text = re.sub(r"http\S+", "", text)
18
+ text = re.sub('http[s]?://\S+', '', text)
19
+ text = re.sub('http://\S+|https://\S+', '', text)
20
+ text = re.sub(r'http\S+', '', text)
21
+ text = re.sub(r'www\S+', '', text)
22
+ text = ' '.join(word for word in text.split() if not word[0] == "#")
23
+ text = re.sub('a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '', text)
24
+ text = re.sub(r'<[^>]+>', '', text)
25
+ text = re.sub('[0-9]+', '', text)
26
+ text = re.sub("\n", " ", text)
27
+ text = re.sub(r"\.x*", " ", text)
28
+ text = re.sub(r'[^\w\s\+\-_]', '', text)
29
+ text = emoji.replace_emoji(text)
30
+ text = ' '.join([word for word in text.split() if word.lower() not in stop_words])
31
+ text = re.sub(r'(mısın|misin|musun|müsün)$', r' \1', ' '.join(re.sub(r'(mısın|misin|musun|müsün)$', r'\1', word) for word in text.split()))
32
+ text = ' '.join(re.sub(r'(de|da)$', r' \1', word) for word in text.split())
33
+ text = ' '.join([t for t in text.split() if len(t) > 1])
34
+ return text
35
 
36
  class AspectSentimentPipeline(Pipeline):
37
  def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
 
52
  main_results = []
53
  main_aspects = []
54
  for sentence in sentences:
55
+ sentence = preprocess(sentence)
56
  aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
57
  for aspect in aspects:
58
  main_aspects.append(aspect)