File size: 14,184 Bytes
8d79c29
77a0774
5b50796
 
 
 
 
 
 
e58377a
20e25d2
4fbf7fa
 
da18a88
 
 
4fbf7fa
 
 
d8d4f16
c4d75ea
 
 
 
 
 
 
 
5b50796
d8d4f16
 
 
 
 
 
 
5b50796
d8d4f16
 
 
 
 
 
 
 
 
 
4fbf7fa
c4d75ea
 
 
 
 
 
 
b726416
e58377a
 
ce3343c
 
 
 
 
 
e58377a
 
 
 
 
 
 
 
 
 
3e002ee
 
 
 
c0ba949
 
e58377a
 
 
 
 
455262f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a91b6e
e58377a
 
455262f
 
 
 
 
 
 
e58377a
 
c0ba949
e58377a
93bbf6a
c0ba949
064bce5
e58377a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0ba949
 
e58377a
c852ed8
e58377a
455262f
 
 
4fbf7fa
 
 
e58377a
c0ba949
 
e58377a
c0ba949
e58377a
c0ba949
e58377a
c0ba949
820534b
e58377a
 
c0ba949
e58377a
 
c4d75ea
c0ba949
 
e58377a
 
 
 
 
 
 
c0ba949
ea92796
e58377a
 
c0ba949
34771b3
d3eaecf
 
7c1e4e2
1ebd803
 
 
 
 
 
 
e58377a
1ebd803
 
 
 
a7402b5
1ebd803
c4d75ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89c18ff
c4d75ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ebd803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea92796
5a9d9de
 
 
 
 
ea92796
 
5a9d9de
ea92796
 
 
 
 
1ebd803
a7402b5
c4d75ea
455262f
c4d75ea
 
 
 
c0ba949
8b2be8a
c0ba949
 
8b2be8a
c4d75ea
8b2be8a
c4d75ea
a7402b5
4fbf7fa
8b2be8a
 
 
c91e004
8b2be8a
 
 
 
 
 
 
 
 
4fbf7fa
ea92796
f9b5f97
1ebd803
 
 
 
 
 
c4d75ea
 
 
 
 
1ebd803
5a9d9de
1ebd803
 
c4d75ea
 
 
 
 
 
 
 
 
 
 
 
1ebd803
c4d75ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
import warnings
import numpy as np
import pandas as pd
import os
import json
import random
import gradio as gr
import torch
from sklearn.preprocessing import OneHotEncoder
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForCausalLM, pipeline
from deap import base, creator, tools, algorithms
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
import ssl
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
from gensim import corpora
from gensim.models import LdaModel
from gensim.utils import simple_preprocess
from neuralcoref import NeuralCoref

# NLTK data download
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download('words', quiet=True)
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('maxent_ne_chunker', quiet=True)

# Set NLTK data path
nltk.data.path.append('/home/user/nltk_data')

warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub.file_download')

# Load spaCy model
nlp = en_core_web_sm.load()

# Add NeuralCoref to spaCy pipeline
coref = NeuralCoref(nlp.vocab)
nlp.add_pipe(coref, name='neuralcoref')

# Initialize Example Dataset (For Emotion Prediction)
data = {
    'context': [
        'I am overjoyed', 'I am deeply saddened', 'I am seething with rage', 'I am exhilarated', 'I am tranquil',
        'I am brimming with joy', 'I am grieving profoundly', 'I am at peace', 'I am frustrated beyond measure',
        'I am determined to succeed', 'I feel resentment burning within me', 'I am feeling glorious and triumphant',
        'I am motivated and inspired', 'I am utterly surprised', 'I am gripped by fear', 'I am trusting and open',
        'I feel a sense of disgust', 'I am optimistic and hopeful', 'I am pessimistic and gloomy', 'I feel bored and listless',
        'I am envious and jealous'
    ],
    'emotion': [
        'joy', 'sadness', 'anger', 'joy', 'calmness', 'joy', 'grief', 'calmness', 'anger',
        'determination', 'resentment', 'glory', 'motivation', 'surprise', 'fear', 'trust',
        'disgust', 'optimism', 'pessimism', 'boredom', 'envy'
    ]
}
df = pd.DataFrame(data)

# Encoding the contexts using One-Hot Encoding (memory-efficient)
try:
    encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=True)
except TypeError:
    encoder = OneHotEncoder(handle_unknown='ignore', sparse=True)
contexts_encoded = encoder.fit_transform(df[['context']])

# Encoding emotions
emotions_target = pd.Categorical(df['emotion']).codes
emotion_classes = pd.Categorical(df['emotion']).categories

# Load pre-trained BERT model for emotion prediction
emotion_prediction_model = None
emotion_prediction_tokenizer = None

# Load pre-trained large language model and tokenizer for response generation
response_model = None
response_tokenizer = None

def load_models():
    global emotion_prediction_model, emotion_prediction_tokenizer, response_model, response_tokenizer
    if emotion_prediction_model is None or response_model is None:
        emotion_prediction_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
        emotion_prediction_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
        response_model_name = "gpt2-xl"
        response_tokenizer = AutoTokenizer.from_pretrained(response_model_name)
        response_model = AutoModelForCausalLM.from_pretrained(response_model_name)
        response_tokenizer.pad_token = response_tokenizer.eos_token

# Enhanced Emotional States
emotions = {
    'joy': {'percentage': 20, 'motivation': 'positive and uplifting', 'intensity': 8},
    'sadness': {'percentage': 15, 'motivation': 'reflective and introspective', 'intensity': 6},
    'anger': {'percentage': 15, 'motivation': 'passionate and driven', 'intensity': 7},
    'fear': {'percentage': 10, 'motivation': 'cautious and protective', 'intensity': 5},
    'love': {'percentage': 15, 'motivation': 'affectionate and caring', 'intensity': 7},
    'surprise': {'percentage': 10, 'motivation': 'curious and intrigued', 'intensity': 6},
    'neutral': {'percentage': 15, 'motivation': 'balanced and composed', 'intensity': 4},
}

total_percentage = 100
emotion_history_file = 'emotion_history.json'
global conversation_history
conversation_history = []
max_history_length = 1000  # Increase the maximum history length

def load_historical_data(file_path=emotion_history_file):
    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            return json.load(file)
    return []

def save_historical_data(historical_data, file_path=emotion_history_file):
    with open(file_path, 'w') as file:
        json.dump(historical_data, file)

emotion_history = load_historical_data()

def update_emotion(emotion, percentage, intensity):
    emotions[emotion]['percentage'] += percentage
    emotions[emotion]['intensity'] = intensity

    # Normalize percentages
    total = sum(e['percentage'] for e in emotions.values())
    for e in emotions:
        emotions[e]['percentage'] = (emotions[e]['percentage'] / total) * 100

def normalize_context(context):
    return context.lower().strip()

creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -0.5, -0.2))
creator.create("Individual", list, fitness=creator.FitnessMulti)

def evaluate(individual):
    emotion_values = individual[:len(emotions)]
    intensities = individual[len(emotions):]
    
    total_diff = abs(100 - sum(emotion_values))
    intensity_range = max(intensities) - min(intensities)
    emotion_balance = max(emotion_values) - min(emotion_values)
    
    return total_diff, intensity_range, emotion_balance

def evolve_emotions():
    toolbox = base.Toolbox()
    toolbox.register("attr_float", random.uniform, 0, 100)
    toolbox.register("attr_intensity", random.uniform, 0, 10)
    toolbox.register("individual", tools.initCycle, creator.Individual,
                    toolbox.register("individual", tools.initCycle, creator.Individual,
                     (toolbox.attr_float,) * len(emotions) +
                     (toolbox.attr_intensity,) * len(emotions), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
    toolbox.register("select", tools.selNSGA2)
    toolbox.register("evaluate", evaluate)

    population = toolbox.population(n=100)
    algorithms.eaMuPlusLambda(population, toolbox, mu=50, lambda_=100, cxpb=0.7, mutpb=0.2, ngen=50,
                               stats=None, halloffame=None, verbose=False)

    best_individual = tools.selBest(population, k=1)[0]
    emotion_values = best_individual[:len(emotions)]
    intensities = best_individual[len(emotions):]

def predict_emotion(context):
    load_models()
    inputs = emotion_prediction_tokenizer(context, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = emotion_prediction_model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()
    emotion_labels = ["sadness", "joy", "love", "anger", "fear", "surprise"]
    return emotion_labels[predicted_class]

def sentiment_analysis(text):
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = sia.polarity_scores(text)
    return sentiment_scores

def extract_entities(text):
    doc = nlp(text)
    
    # Named Entity Recognition
    named_entities = [(ent.text, ent.label_) for ent in doc.ents]
    
    # Noun Phrases
    noun_phrases = [chunk.text for chunk in doc.noun_chunks]
    
    # Key Phrases (using textrank algorithm)
    from textacy.extract import keyterms as kt
    keyterms = kt.textrank(doc, normalize="lemma", topn=5)
    
    # Dependency Parsing
    dependencies = [(token.text, token.dep_, token.head.text) for token in doc]
    
    # Part-of-Speech Tagging
    pos_tags = [(token.text, token.pos_) for token in doc]
    
    return {
        "named_entities": named_entities,
        "noun_phrases": noun_phrases,
        "key_phrases": keyterms,
        "dependencies": dependencies,
        "pos_tags": pos_tags
    }

def analyze_context(text):
    doc = nlp(text)
    
    # Coreference resolution
    resolved_text = doc._.coref_resolved
    
    # Topic modeling
    processed_text = simple_preprocess(resolved_text)
    dictionary = corpora.Dictionary([processed_text])
    corpus = [dictionary.doc2bow(processed_text)]
    
    lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=3, random_state=42)
    topics = lda_model.print_topics()
    
    return {
        "resolved_text": resolved_text,
        "topics": topics
    }

def analyze_text_complexity(text):
    blob = TextBlob(text)
    return {
        'word_count': len(blob.words),
        'sentence_count': len(blob.sentences),
        'average_sentence_length': len(blob.words) / len(blob.sentences) if len(blob.sentences) > 0 else 0,
        'polarity': blob.sentiment.polarity,
        'subjectivity': blob.sentiment.subjectivity
    }

def get_ai_emotion(input_text):
    predicted_emotion = predict_emotion(input_text)
    ai_emotion = predicted_emotion
    ai_emotion_percentage = emotions[predicted_emotion]['percentage']
    ai_emotion_intensity = emotions[predicted_emotion]['intensity']
    return ai_emotion, ai_emotion_percentage, ai_emotion_intensity

def generate_emotion_visualization(ai_emotion, ai_emotion_percentage, ai_emotion_intensity):
    emotion_visualization_path = 'emotional_state.png'
    try:
        plt.figure(figsize=(8, 6))
        emotions_df = pd.DataFrame([(e, d['percentage'], d['intensity']) for e, d in emotions.items()],
                                   columns=['emotion', 'percentage', 'intensity'])
        sns.barplot(x='emotion', y='percentage', data=emotions_df)
        plt.title(f'Current Emotional State: {ai_emotion.capitalize()} ({ai_emotion_percentage:.2f}%)')
        plt.xlabel('Emotion')
        plt.ylabel('Percentage')
        plt.xticks(rotation=90)
        plt.savefig(emotion_visualization_path)
        plt.close()
    except Exception as e:
        print(f"Error generating emotion visualization: {e}")
        emotion_visualization_path = None
    return emotion_visualization_path

def generate_response(ai_emotion, input_text, entities, context_analysis):
    load_models()
    prompt = f"As an AI assistant, I am currently feeling {ai_emotion}. My response will reflect this emotional state. "
    prompt += f"The input text contains the following entities: {entities['named_entities']}. "
    prompt += f"The main topics are: {context_analysis['topics']}. "
    prompt += f"Considering this context, here's my response to '{input_text}': "
    
    inputs = response_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=8192)
    
    temperature = 0.7
    if ai_emotion == 'anger':
        temperature = 0.9
    elif ai_emotion == 'joy':
        temperature = 0.5

    with torch.no_grad():
        response_ids = response_model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_length=400,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=temperature,
            pad_token_id=response_tokenizer.eos_token_id
        )
    response = response_tokenizer.decode(response_ids[0], skip_special_tokens=True)
    
    return response.strip()

def interactive_interface(input_text):
    predicted_emotion = predict_emotion(input_text)
    sentiment_scores = sentiment_analysis(input_text)
    text_complexity = analyze_text_complexity(input_text)
    ai_emotion, ai_emotion_percentage, ai_emotion_intensity = get_ai_emotion(input_text)
    emotion_visualization = generate_emotion_visualization(ai_emotion, ai_emotion_percentage, ai_emotion_intensity)
    
    entities = extract_entities(input_text)
    context_analysis = analyze_context(input_text)
    
    response = generate_response(ai_emotion, input_text, entities, context_analysis)

    conversation_history.append({'user': input_text, 'response': response})
    if len(conversation_history) > max_history_length:
        conversation_history.pop(0)
 return {
        "emotion": predicted_emotion,
        "sentiment": sentiment_scores,
        "entities": entities,
        "context_analysis": context_analysis,
        "text_complexity": text_complexity,
        "ai_emotion": ai_emotion,
        "ai_emotion_percentage": ai_emotion_percentage,
        "ai_emotion_intensity": ai_emotion_intensity,
        "emotion_visualization": emotion_visualization,
        "response": response
    }

# Gradio interface
def gradio_interface(input_text):
    result = interactive_interface(input_text)
    
    output = f"Predicted Emotion: {result['emotion']}\n"
    output += f"Sentiment: {result['sentiment']}\n"
    output += f"AI Emotion: {result['ai_emotion']} ({result['ai_emotion_percentage']:.2f}%, Intensity: {result['ai_emotion_intensity']:.2f})\n"
    output += f"Entities: {result['entities']}\n"
    output += f"Context Analysis: {result['context_analysis']}\n"
    output += f"Text Complexity: {result['text_complexity']}\n"
    output += f"AI Response: {result['response']}"
    
    return output, result['emotion_visualization']

iface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs=["text", gr.Image(type="filepath")],
    title="Enhanced AI Assistant",
    description="Enter your text to interact with the AI assistant."
)

if __name__ == "__main__":
    iface.launch()