|
import streamlit as st |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from datasets import load_dataset |
|
from gtts import gTTS |
|
import os |
|
import re |
|
import random |
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
body { |
|
background-color: #121212; |
|
color: white; |
|
} |
|
.css-1d391kg { |
|
background-color: #333; |
|
} |
|
.stButton > button { |
|
background-color: #6200ee; |
|
color: white; |
|
} |
|
.stTextInput input { |
|
background-color: #333; |
|
color: white; |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
try: |
|
code_llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/CodeLlama-7B-Python") |
|
code_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7B-Python") |
|
except Exception as e: |
|
st.error(f"Error loading model: {e}") |
|
code_llama_model = None |
|
code_llama_tokenizer = None |
|
|
|
try: |
|
wordlist_dataset = load_dataset("Canstralian/Wordlists") |
|
except Exception as e: |
|
st.error(f"Error loading Wordlist dataset: {e}") |
|
wordlist_dataset = None |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you?"}] |
|
|
|
|
|
def validate_prompt(prompt: str) -> bool: |
|
""" |
|
Validates if the input prompt is not empty and meets some basic format rules. |
|
Args: |
|
prompt (str): The input prompt to be validated. |
|
Returns: |
|
bool: True if the prompt is valid, False otherwise. |
|
""" |
|
|
|
return bool(re.match(r'^[A-Za-z0-9\s\.,;!?(){}[\]]+$', prompt)) |
|
|
|
|
|
def text_to_speech(text: str) -> None: |
|
""" |
|
Converts text to speech using gTTS and saves it as an MP3 file. |
|
Args: |
|
text (str): The text to be converted to speech. |
|
""" |
|
try: |
|
tts = gTTS(text, lang='en') |
|
tts.save("response.mp3") |
|
os.system("mpg321 response.mp3") |
|
except Exception as e: |
|
st.error(f"Error generating speech: {e}") |
|
|
|
|
|
def generate_response(prompt: str) -> str: |
|
""" |
|
Generates a response from the assistant based on the user input. |
|
Args: |
|
prompt (str): The user's input prompt. |
|
Returns: |
|
str: The generated response from the assistant. |
|
""" |
|
if code_llama_model and code_llama_tokenizer: |
|
if "python" in prompt.lower(): |
|
|
|
inputs = code_llama_tokenizer(prompt, return_tensors="pt") |
|
outputs = code_llama_model.generate(**inputs, max_length=150, num_return_sequences=1) |
|
response = code_llama_tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
else: |
|
response = "I'm here to assist with your queries." |
|
else: |
|
response = "Model not loaded. Please try again later." |
|
|
|
if "osint" in prompt.lower(): |
|
|
|
response = "OSINT data analysis coming soon!" |
|
elif "wordlist" in prompt.lower() and wordlist_dataset: |
|
|
|
wordlist_entry = random.choice(wordlist_dataset["train"])["text"] |
|
response = f"Here's a random wordlist entry: {wordlist_entry}" |
|
|
|
return response |
|
|
|
|
|
for message in st.session_state.messages: |
|
if message["role"] == "user": |
|
st.markdown(f"**You:** {message['content']}") |
|
elif message["role"] == "assistant": |
|
st.markdown(f"**Assistant:** {message['content']}") |
|
|
|
|
|
prompt = st.text_input("Your message:", key="chat_input") |
|
if prompt: |
|
|
|
if validate_prompt(prompt): |
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
st.markdown(f"**You:** {prompt}") |
|
|
|
|
|
with st.spinner("Assistant is typing..."): |
|
response = generate_response(prompt) |
|
st.markdown(f"**Assistant:** {response}") |
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
|
|
|
|
text_to_speech(response) |
|
else: |
|
st.warning("Invalid input. Please ensure your input contains only valid characters.") |
|
|
|
|
|
feedback = st.selectbox("How was your experience?", ["π Excellent", "π Okay", "π Poor"]) |
|
if feedback: |
|
st.success(f"Thank you for your feedback: {feedback}", icon="β
") |
|
|