Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,30 @@ from nltk import sent_tokenize
|
|
8 |
import uvicorn
|
9 |
from fastapi import FastAPI
|
10 |
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class AspectSentimentPipeline(Pipeline):
|
13 |
def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
|
@@ -28,6 +52,7 @@ class AspectSentimentPipeline(Pipeline):
|
|
28 |
main_results = []
|
29 |
main_aspects = []
|
30 |
for sentence in sentences:
|
|
|
31 |
aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
|
32 |
for aspect in aspects:
|
33 |
main_aspects.append(aspect)
|
|
|
8 |
import uvicorn
|
9 |
from fastapi import FastAPI
|
10 |
from pydantic import BaseModel, Field
|
11 |
+
import re
|
12 |
+
import emoji
|
13 |
+
|
14 |
+
stop_words = [x.strip() for x in open('stop-words.tr.txt','r', encoding="UTF8").read().split('\n')]
|
15 |
+
|
16 |
+
def preprocess_text(text):
|
17 |
+
text = re.sub(r"http\S+", "", text)
|
18 |
+
text = re.sub('http[s]?://\S+', '', text)
|
19 |
+
text = re.sub('http://\S+|https://\S+', '', text)
|
20 |
+
text = re.sub(r'http\S+', '', text)
|
21 |
+
text = re.sub(r'www\S+', '', text)
|
22 |
+
text = ' '.join(word for word in text.split() if not word[0] == "#")
|
23 |
+
text = re.sub('a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '', text)
|
24 |
+
text = re.sub(r'<[^>]+>', '', text)
|
25 |
+
text = re.sub('[0-9]+', '', text)
|
26 |
+
text = re.sub("\n", " ", text)
|
27 |
+
text = re.sub(r"\.x*", " ", text)
|
28 |
+
text = re.sub(r'[^\w\s\+\-_]', '', text)
|
29 |
+
text = emoji.replace_emoji(text)
|
30 |
+
text = ' '.join([word for word in text.split() if word.lower() not in stop_words])
|
31 |
+
text = re.sub(r'(mısın|misin|musun|müsün)$', r' \1', ' '.join(re.sub(r'(mısın|misin|musun|müsün)$', r'\1', word) for word in text.split()))
|
32 |
+
text = ' '.join(re.sub(r'(de|da)$', r' \1', word) for word in text.split())
|
33 |
+
text = ' '.join([t for t in text.split() if len(t) > 1])
|
34 |
+
return text
|
35 |
|
36 |
class AspectSentimentPipeline(Pipeline):
|
37 |
def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
|
|
|
52 |
main_results = []
|
53 |
main_aspects = []
|
54 |
for sentence in sentences:
|
55 |
+
sentence = preprocess(sentence)
|
56 |
aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
|
57 |
for aspect in aspects:
|
58 |
main_aspects.append(aspect)
|