File size: 2,849 Bytes
08db26d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ac8a66
 
08db26d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ac8a66
08db26d
 
 
5ac8a66
08db26d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import time
import openai
import random
from transformers import pipeline


class RandomAnalyser:
    def __init__(self):
        self.LABELS = ['negative', 'neutral', 'positive']

    def predict(self, X: list):
        return [random.choice(self.LABELS) for x in X]


class RoBERTaAnalyser:
    def __init__(self):
        self.analyser = pipeline(task="sentiment-analysis", model="Cloudy1225/stackoverflow-roberta-base-sentiment")

    def predict(self, X: list):
        sentiments = []
        for x in X:
            x = RoBERTaAnalyser.preprocess(x)
            prediction = self.analyser(x)
            sentiments.append(prediction[0]['label'])
        return sentiments

    @staticmethod
    def preprocess(text):
        """Preprocess text (username and link placeholders, remove line breaks)"""
        new_text = []
        for t in text.split(' '):
            t = '@user' if t.startswith('@') and len(t) > 1 else t
            t = 'http' if t.startswith('http') else t
            new_text.append(t)
        return ' '.join(new_text).strip()


class ChatGPTAnalyser:
    def __init__(self):
        # import os
        # os.environ["http_proxy"] = "http://127.0.0.1:10080"
        # os.environ["https_proxy"] = "http://127.0.0.1:10080"
        self.MODEL = "gpt-3.5-turbo"
        self.KEYs = [
            "key1",
            "key2",
        ]
        self.TASK_NAME = 'Sentiment Classification'
        self.TASK_DEFINITION = 'Given the sentence, assign a sentiment label from [negative, neutral, positive].'
        self.OUT_FORMAT = 'Return label only without any other text.'
        self.PROMPT_PREFIX = f"Please perform {self.TASK_NAME} task.{self.TASK_DEFINITION}{self.OUT_FORMAT}\nSentence:\n{{}}\nLabel:"

    def predict(self, X: list):
        sentiments = []
        for i in range(len(X)):
            prompt = self.PROMPT_PREFIX.format(X[i])
            messages = [{"role": "user", "content": prompt}]
            # openai.api_key = self.KEYs[i % len(self.KEYs)]
            openai.api_key = random.choice(self.KEYs)
            while True:
                try:
                    response = openai.ChatCompletion.create(
                        model=self.MODEL,
                        messages=messages,
                        temperature=0,
                        n=1,
                        stop=None
                    )
                    sentiment = response.choices[0].message.content
                    sentiments.append(sentiment.strip().lower())
                    break
                except openai.RateLimitError:
                    sleep_snds = 60
                    time.sleep(sleep_snds)
                    continue
                except openai.APIError:
                    sleep_snds = 60
                    time.sleep(sleep_snds)
                    continue
        return sentiments