File size: 4,847 Bytes
dd18e32
a182cbf
 
 
 
 
 
dd18e32
a182cbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd18e32
a182cbf
 
 
 
 
 
 
 
dd18e32
a182cbf
 
 
 
 
dd18e32
a182cbf
 
 
dd18e32
a182cbf
 
 
 
 
 
 
 
 
df4557d
 
dd18e32
a182cbf
 
 
 
 
 
 
 
 
 
df4557d
a182cbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd18e32
 
 
df4557d
 
 
 
 
 
 
a182cbf
df4557d
 
a182cbf
 
 
df4557d
a182cbf
df4557d
 
 
 
a182cbf
 
 
df4557d
 
 
a182cbf
 
 
 
 
 
df4557d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from gtts import gTTS
import os
import re
import random

# Enable Dark Mode and Custom CSS
st.markdown(
    """
    <style>
        body {
            background-color: #121212;
            color: white;
        }
        .css-1d391kg {
            background-color: #333;
        }
        .stButton > button {
            background-color: #6200ee;
            color: white;
        }
        .stTextInput input {
            background-color: #333;
            color: white;
        }
    </style>
    """,
    unsafe_allow_html=True,
)

# Load models and datasets
try:
    code_llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/CodeLlama-7B-Python")
    code_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7B-Python")
except Exception as e:
    st.error(f"Error loading model: {e}")
    code_llama_model = None
    code_llama_tokenizer = None

try:
    wordlist_dataset = load_dataset("Canstralian/Wordlists")
except Exception as e:
    st.error(f"Error loading Wordlist dataset: {e}")
    wordlist_dataset = None

# Initialize chat history storage
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you?"}]

# Function to validate the prompt using regular expressions
def validate_prompt(prompt: str) -> bool:
    """
    Validates if the input prompt is not empty and meets some basic format rules.
    Args:
        prompt (str): The input prompt to be validated.
    Returns:
        bool: True if the prompt is valid, False otherwise.
    """
    # Allow alphanumeric characters, spaces, and punctuation
    return bool(re.match(r'^[A-Za-z0-9\s\.,;!?(){}[\]]+$', prompt))

# Function to convert text to speech
def text_to_speech(text: str) -> None:
    """
    Converts text to speech using gTTS and saves it as an MP3 file.
    Args:
        text (str): The text to be converted to speech.
    """
    try:
        tts = gTTS(text, lang='en')
        tts.save("response.mp3")
        os.system("mpg321 response.mp3")  # Use mpg321 to play the audio
    except Exception as e:
        st.error(f"Error generating speech: {e}")

# Function to generate chatbot response
def generate_response(prompt: str) -> str:
    """
    Generates a response from the assistant based on the user input.
    Args:
        prompt (str): The user's input prompt.
    Returns:
        str: The generated response from the assistant.
    """
    if code_llama_model and code_llama_tokenizer:
        if "python" in prompt.lower():
            # Use the Code Llama model for code-related queries
            inputs = code_llama_tokenizer(prompt, return_tensors="pt")
            outputs = code_llama_model.generate(**inputs, max_length=150, num_return_sequences=1)
            response = code_llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
        else:
            response = "I'm here to assist with your queries."
    else:
        response = "Model not loaded. Please try again later."

    if "osint" in prompt.lower():
        # Respond with dataset-based OSINT information
        response = "OSINT data analysis coming soon!"
    elif "wordlist" in prompt.lower() and wordlist_dataset:
        # Fetch and display a random entry from the Wordlist dataset
        wordlist_entry = random.choice(wordlist_dataset["train"])["text"]
        response = f"Here's a random wordlist entry: {wordlist_entry}"

    return response

# Display chat history using native Streamlit components
for message in st.session_state.messages:
    if message["role"] == "user":
        st.markdown(f"**You:** {message['content']}")
    elif message["role"] == "assistant":
        st.markdown(f"**Assistant:** {message['content']}")

# User input handling
prompt = st.text_input("Your message:", key="chat_input")
if prompt:
    # Validate user input
    if validate_prompt(prompt):
        st.session_state.messages.append({"role": "user", "content": prompt})
        st.markdown(f"**You:** {prompt}")

        # Generate assistant response
        with st.spinner("Assistant is typing..."):
            response = generate_response(prompt)
            st.markdown(f"**Assistant:** {response}")

        # Store the assistant's response
        st.session_state.messages.append({"role": "assistant", "content": response})

        # Text-to-Speech integration for the assistant's response
        text_to_speech(response)
    else:
        st.warning("Invalid input. Please ensure your input contains only valid characters.")

# User Feedback Section
feedback = st.selectbox("How was your experience?", ["😊 Excellent", "😐 Okay", "πŸ˜• Poor"])
if feedback:
    st.success(f"Thank you for your feedback: {feedback}", icon="βœ…")