Alper Karaca commited on
Commit
873b671
·
1 Parent(s): 9d8b1b7

Initial commit

Browse files
Files changed (3) hide show
  1. Dockerfile +27 -0
  2. app.py +147 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.10 image
2
+ FROM python:3.10
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the current directory contents into the container at /code
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Set up a new user named "user" with user ID 1000
14
+ RUN useradd -m -u 1000 user
15
+ # Switch to the "user" user
16
+ USER user
17
+ # Set home to the user's home directory
18
+ ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
19
+
20
+ # Set the working directory to the user's home directory
21
+ WORKDIR $HOME/app
22
+
23
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
24
+ COPY --chown=user . $HOME/app
25
+
26
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.middleware.cors import CORSMiddleware
2
+ import json
3
+ import torch
4
+ from transformers import BertTokenizerFast, BertTokenizer, BertForTokenClassification, BertForSequenceClassification, Pipeline
5
+ from nltk import sent_tokenize
6
+ import uvicorn
7
+ from fastapi import FastAPI
8
+ from pydantic import BaseModel, Field
9
+
10
+ class AspectSentimentPipeline(Pipeline):
11
+ def __init__(self, aspect_extraction_model, aspect_extraction_tokenizer, aspect_sentiment_model, aspect_sentiment_tokenizer, device):
12
+ super().__init__(aspect_extraction_model, aspect_extraction_tokenizer)
13
+ self.aspect_extraction_model = aspect_extraction_model
14
+ self.aspect_extraction_tokenizer = aspect_extraction_tokenizer
15
+ self.aspect_sentiment_model = aspect_sentiment_model
16
+ self.aspect_sentiment_tokenizer = aspect_sentiment_tokenizer
17
+ self.device = device
18
+
19
+ def _sanitize_parameters(self, **kwargs):
20
+ return {}, {}, {}
21
+
22
+ def preprocess(self, inputs):
23
+ return sent_tokenize(inputs)
24
+
25
+ def _forward(self, sentences):
26
+ main_results = []
27
+ main_aspects = []
28
+ for sentence in sentences:
29
+ aspects = self.extract_aspects(sentence, self.aspect_extraction_model, self.aspect_extraction_tokenizer, self.device)
30
+ for aspect in aspects:
31
+ main_aspects.append(aspect)
32
+ sentiment = self.predict_sentiment(sentence, aspect)
33
+ main_results.append({"aspect": aspect, "sentiment": sentiment})
34
+
35
+ return {"entity_list": main_aspects, "results": main_results}
36
+
37
+ def postprocess(self, model_outputs):
38
+ return model_outputs
39
+
40
+ def predict_sentiment(self, sentence, aspect):
41
+ inputs = self.aspect_sentiment_tokenizer(aspect, sentence, return_tensors="pt").to(self.device)
42
+ self.aspect_sentiment_model.to(self.device)
43
+ self.aspect_sentiment_model.eval()
44
+
45
+ with torch.no_grad():
46
+ outputs = self.aspect_sentiment_model(**inputs)
47
+ logits = outputs.logits
48
+
49
+ sentiment = torch.argmax(logits, dim=-1).item()
50
+ sentiment_label = self.aspect_sentiment_model.config.id2label[sentiment]
51
+ sentiment_id_to_label = {
52
+ "LABEL_0": "Negative",
53
+ "LABEL_1": "Neutral",
54
+ "LABEL_2": "Positive"
55
+ }
56
+
57
+ return sentiment_id_to_label[sentiment_label]
58
+
59
+ def align_word_predictions(self, tokens, predictions):
60
+ aligned_tokens = []
61
+ aligned_predictions = []
62
+ for token, prediction in zip(tokens, predictions):
63
+ if not token.startswith("##"):
64
+ aligned_tokens.append(token)
65
+ aligned_predictions.append(prediction)
66
+ else:
67
+ aligned_tokens[-1] = aligned_tokens[-1] + token[2:]
68
+ return aligned_tokens, aligned_predictions
69
+
70
+ def extract_aspects(self, review, aspect_extraction_model, aspect_extraction_tokenizer, device):
71
+ inputs = self.aspect_extraction_tokenizer(review, return_offsets_mapping=True, padding='max_length', truncation=True, max_length=64, return_tensors="pt").to(device)
72
+ self.aspect_extraction_model.to(device)
73
+ self.aspect_extraction_model.eval()
74
+ ids = inputs["input_ids"].to(device)
75
+ mask = inputs["attention_mask"].to(device)
76
+
77
+ with torch.no_grad():
78
+ outputs = self.aspect_extraction_model(ids, attention_mask=mask)
79
+ logits = outputs[0]
80
+
81
+ active_logits = logits.view(-1, self.aspect_extraction_model.num_labels)
82
+ flattened_predictions = torch.argmax(active_logits, axis=1)
83
+
84
+ tokens = self.aspect_extraction_tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
85
+ ids_to_labels = {0: 'O', 1: 'B-A', 2: 'I-A'}
86
+ token_predictions = [ids_to_labels[i] for i in flattened_predictions.cpu().numpy()]
87
+
88
+ filtered_tokens = [token for token in tokens if token not in ["[PAD]", "[CLS]", "[SEP]"]]
89
+ filtered_predictions = [pred for token, pred in zip(tokens, token_predictions) if token not in ["[PAD]", "[CLS]", "[SEP]"]]
90
+
91
+ aligned_tokens, aligned_predictions = self.align_word_predictions(filtered_tokens, filtered_predictions)
92
+
93
+ aspects = []
94
+ current_aspect = []
95
+
96
+ for token, prediction in zip(aligned_tokens, aligned_predictions):
97
+ if prediction == "B-A":
98
+ if current_aspect:
99
+ aspects.append(" ".join(current_aspect))
100
+ current_aspect = []
101
+ current_aspect.append(token)
102
+ elif prediction == "I-A":
103
+ if current_aspect:
104
+ current_aspect.append(token)
105
+ else:
106
+ if current_aspect:
107
+ aspects.append(" ".join(current_aspect))
108
+ current_aspect = []
109
+
110
+ if current_aspect:
111
+ aspects.append(" ".join(current_aspect))
112
+
113
+ return aspects
114
+
115
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
116
+
117
+ aspect_extraction_model = BertForTokenClassification.from_pretrained("thealper2/aspect-extraction-model")
118
+ aspect_extraction_tokenizer = BertTokenizerFast.from_pretrained("thealper2/aspect-extraction-model")
119
+
120
+ aspect_sentiment_model = BertForSequenceClassification.from_pretrained("thealper2/aspect-sentiment-model")
121
+ aspect_sentiment_tokenizer = BertTokenizer.from_pretrained("thealper2/aspect-sentiment-model")
122
+
123
+ pipeline = AspectSentimentPipeline(
124
+ aspect_extraction_model=aspect_extraction_model,
125
+ aspect_extraction_tokenizer=aspect_extraction_tokenizer,
126
+ aspect_sentiment_model=aspect_sentiment_model,
127
+ aspect_sentiment_tokenizer=aspect_sentiment_tokenizer,
128
+ device=device
129
+ )
130
+
131
+ app = FastAPI()
132
+
133
+ class Item(BaseModel):
134
+ text: str = Field(..., example="""Fiber 100mb SuperOnline kullanıcısıyım yaklaşık 2 haftadır @Twitch @Kick_Turkey gibi canlı yayın platformlarında 360p yayın izlerken donmalar yaşıyoruz. Başka hiç bir operatörler bu sorunu yaşamazken ben parasını verip alamadığım hizmeti neden ödeyeyim ? @Turkcell """)
135
+
136
+ @app.get("/", tags=["Home"])
137
+ def api_home():
138
+ return {"detail": "Welcome to FastAPI!"}
139
+
140
+ @app.post("/predict/", response_model=dict)
141
+ async def predict(item: Item):
142
+ result = pipeline(item.text)
143
+ return result
144
+
145
+
146
+ if __name__=="__main__":
147
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ transformers
5
+ torch
6
+ pydantic
7
+ nltk