File size: 4,771 Bytes
ea290b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from gtts import gTTS
import os
import re
import random

# Enable Dark Mode and Custom CSS
st.markdown(
    """
    <style>
        body {
            background-color: #121212;
            color: white;
        }
        .css-1d391kg {
            background-color: #333;
        }
        .stButton > button {
            background-color: #6200ee;
            color: white;
        }
        .stTextInput input {
            background-color: #333;
            color: white;
        }
    </style>
    """,
    unsafe_allow_html=True,
)

# Load models and datasets
try:
    code_llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/CodeLlama-7B-Python")
    code_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7B-Python")
except Exception as e:
    st.error(f"Error loading model: {e}")
    code_llama_model = None
    code_llama_tokenizer = None

try:
    wordlist_dataset = load_dataset("Canstralian/Wordlists")
except Exception as e:
    st.error(f"Error loading Wordlist dataset: {e}")
    wordlist_dataset = None

# Initialize chat history storage
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you?"}]

# Function to validate the prompt using regular expressions
def validate_prompt(prompt: str) -> bool:
    """
    Validates if the input prompt is not empty and meets some basic format rules.
    Args:
        prompt (str): The input prompt to be validated.
    Returns:
        bool: True if the prompt is valid, False otherwise.
    """
    # Improved validation: Allow alphanumeric characters, spaces, and punctuation
    if re.match(r'^[A-Za-z0-9\s\.,;!?(){}[\]]+$', prompt):
        return True
    return False

# Function to convert text to speech
def text_to_speech(text: str) -> None:
    """
    Converts text to speech using gTTS and saves it as an MP3 file.
    Args:
        text (str): The text to be converted to speech.
    """
    try:
        tts = gTTS(text, lang='en')
        tts.save("response.mp3")
        os.system("mpg321 response.mp3")
    except Exception as e:
        st.error(f"Error generating speech: {e}")

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

# Function to generate chatbot response
def generate_response(prompt: str) -> str:
    """
    Generates a response from the assistant based on the user input.
    Args:
        prompt (str): The user's input prompt.
    Returns:
        str: The generated response from the assistant.
    """
    if code_llama_model and code_llama_tokenizer:
        if "python" in prompt.lower():
            # Use the Code Llama model for code-related queries
            inputs = code_llama_tokenizer(prompt, return_tensors="pt")
            outputs = code_llama_model.generate(**inputs, max_length=150, num_return_sequences=1)
            response = code_llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
        else:
            response = "I'm here to assist with your queries."
    else:
        response = "Model not loaded. Please try again later."

    if "osint" in prompt.lower():
        # Respond with dataset-based OSINT information
        response = "OSINT data analysis coming soon!"
    elif "wordlist" in prompt.lower() and wordlist_dataset:
        # Fetch and display a random entry from the Wordlist dataset
        wordlist_entry = random.choice(wordlist_dataset["train"])["text"]
        response = f"Here's a random wordlist entry: {wordlist_entry}"

    return response

# User input handling
if prompt := st.chat_input():
    # Validate user input
    if validate_prompt(prompt):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.write(prompt)

        # Generate and display response with smooth animations
        with st.chat_message("assistant"):
            with st.spinner("Assistant is typing..."):
                response = generate_response(prompt)
                st.write(response)
        
        # Text-to-Speech integration for the assistant's response
        text_to_speech(response)

        # Store the assistant's response
        st.session_state.messages.append({"role": "assistant", "content": response})
    else:
        st.warning("Invalid input. Please ensure your input contains only valid characters.")

# User Feedback Section
feedback = st.selectbox("How was your experience?", ["😊 Excellent", "😐 Okay", "πŸ˜• Poor"])
if feedback:
    st.success(f"Thank you for your feedback: {feedback}", icon="βœ…")