KASOTI / app.py
iisadia's picture
Update app.py
4a0eb2e verified
raw
history blame
22.7 kB
import streamlit as st
import time
import requests
from streamlit.components.v1 import html
import os
from dotenv import load_dotenv
import numpy as np
import torchaudio
from audio_recorder_streamlit import audio_recorder
import torch
from io import BytesIO
import hashlib
from transformers import pipeline
# Load Whisper model (cached)
@st.cache_resource
def load_model():
return pipeline("automatic-speech-recognition", model="openai/whisper-base")
# Audio processing function
def process_audio(audio_bytes):
waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
if waveform.shape[0] > 1: # Convert stereo to mono
waveform = torch.mean(waveform, dim=0, keepdim=True)
if sample_rate != 16000: # Resample to 16kHz if needed
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
waveform = resampler(waveform)
return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
# Voice input component
# Updated voice_input function
def voice_input(key, prompt_text, default_text=""):
# Initialize session state keys if they don't exist
if f"text_{key}" not in st.session_state:
st.session_state[f"text_{key}"] = default_text
col1, col2 = st.columns([4, 1])
with col1:
# Create the text input with the current session state value
text_value = st.text_input(prompt_text, value=st.session_state[f"text_{key}"], key=f"text_input_{key}")
with col2:
audio_bytes = audio_recorder(
pause_threshold=0.8,
text="🎤 Speak",
recording_color="#e8b622",
neutral_color="#6aa36f",
key=f"recorder_{key}"
)
# Process audio if new recording is available
if audio_bytes:
current_hash = hashlib.md5(audio_bytes).hexdigest()
if f"last_audio_hash_{key}" not in st.session_state or current_hash != st.session_state[f"last_audio_hash_{key}"]:
st.session_state[f"last_audio_hash_{key}"] = current_hash
try:
audio_input = process_audio(audio_bytes)
whisper = load_model()
transcribed_text = whisper(audio_input)["text"]
# Update the session state value (this happens before widget creation)
st.session_state[f"text_{key}"] = transcribed_text
st.rerun()
except Exception as e:
st.error(f"Error in voice input: {str(e)}")
# Return the current text value (from either manual input or voice)
return st.session_state[f"text_{key}"]
# Enhanced Custom CSS with modern design
def inject_custom_css():
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css');
* {
font-family: 'Inter', sans-serif;
}
body {
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
}
.title {
font-size: 2.8rem !important;
font-weight: 800 !important;
background: linear-gradient(45deg, #6C63FF, #3B82F6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
text-align: center;
margin: 1rem 0;
letter-spacing: -1px;
}
.subtitle {
font-size: 1.1rem !important;
text-align: center;
color: #64748B !important;
margin-bottom: 2.5rem;
animation: fadeInSlide 1s ease;
}
.question-box {
background: white;
border-radius: 20px;
padding: 2rem;
margin: 1.5rem 0;
box-shadow: 0 10px 25px rgba(0,0,0,0.08);
border: 1px solid #e2e8f0;
position: relative;
transition: transform 0.2s ease;
color: black;
}
.question-box:hover {
transform: translateY(-3px);
}
.question-box::before {
content: "🕹️";
position: absolute;
left: -15px;
top: -15px;
background: white;
border-radius: 50%;
padding: 8px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
font-size: 1.2rem;
}
.input-box {
background: white;
border-radius: 12px;
padding: 1.5rem;
margin: 1rem 0;
box-shadow: 0 4px 6px rgba(0,0,0,0.05);
}
.stTextInput input {
border: 2px solid #e2e8f0 !important;
border-radius: 10px !important;
padding: 12px 16px !important;
transition: all 0.3s ease !important;
}
.stTextInput input:focus {
border-color: #6C63FF !important;
box-shadow: 0 0 0 3px rgba(108, 99, 255, 0.2) !important;
}
button {
background: linear-gradient(45deg, #6C63FF, #3B82F6) !important;
color: white !important;
border: none !important;
border-radius: 10px !important;
padding: 12px 24px !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
}
button:hover {
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(108, 99, 255, 0.3) !important;
}
.final-reveal {
animation: fadeInUp 1s ease;
font-size: 2.8rem;
background: linear-gradient(45deg, #6C63FF, #3B82F6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
text-align: center;
margin: 2rem 0;
font-weight: 800;
}
.help-chat {
background: rgba(255,255,255,0.9);
backdrop-filter: blur(10px);
border-radius: 15px;
padding: 1rem;
margin: 1rem 0;
box-shadow: 0 8px 30px rgba(0,0,0,0.12);
}
@keyframes fadeInSlide {
0% { opacity: 0; transform: translateY(20px); }
100% { opacity: 1; transform: translateY(0); }
}
@keyframes fadeInUp {
0% { opacity: 0; transform: translateY(30px); }
100% { opacity: 1; transform: translateY(0); }
}
.progress-bar {
height: 6px;
background: #e2e8f0;
border-radius: 3px;
margin: 1.5rem 0;
overflow: hidden;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #6C63FF, #3B82F6);
transition: width 0.5s ease;
}
.question-count {
color: #6C63FF;
font-weight: 600;
font-size: 0.9rem;
margin-bottom: 0.5rem;
}
</style>
""", unsafe_allow_html=True)
# Confetti animation (enhanced)
def show_confetti():
html("""
<canvas id="confetti-canvas" class="confetti"></canvas>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/confetti.browser.min.js"></script>
<script>
const count = 200;
const defaults = {
origin: { y: 0.7 },
zIndex: 1050
};
function fire(particleRatio, opts) {
confetti(Object.assign({}, defaults, opts, {
particleCount: Math.floor(count * particleRatio)
}));
}
fire(0.25, { spread: 26, startVelocity: 55 });
fire(0.2, { spread: 60 });
fire(0.35, { spread: 100, decay: 0.91, scalar: 0.8 });
fire(0.1, { spread: 120, startVelocity: 25, decay: 0.92, scalar: 1.2 });
fire(0.1, { spread: 120, startVelocity: 45 });
</script>
""")
# Enhanced AI question generation for guessing game using Llama model
def ask_llama(conversation_history, category, is_final_guess=False):
api_url = "https://api.groq.com/openai/v1/chat/completions"
headers = {
"Authorization": "Bearer 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",
"Content-Type": "application/json"
}
system_prompt = f"""You're playing 20 questions to guess a {category}. Follow these rules:
1. Ask strategic, non-repeating yes/no questions that narrow down possibilities
2. Consider all previous answers carefully before asking next question
3. If you're very confident (80%+ sure), respond with "Final Guess: [your guess]"
4. For places: ask about continent, climate, famous landmarks, country, city or population
5. For people: ask about fictional or real, profession, gender, alive/dead, nationality, or fame
6. For objects: ask about size, color, usage, material, or where it's found
7. Never repeat questions and always make progress toward guessing"""
if is_final_guess:
prompt = f"""Based on these answers about a {category}, provide ONLY your final guess with no extra text:
{conversation_history}"""
else:
prompt = "Ask your next strategic yes/no question that will best narrow down the possibilities."
messages = [
{"role": "system", "content": system_prompt},
*conversation_history,
{"role": "user", "content": prompt}
]
data = {
"model": "llama-3.3-70b-versatile",
"messages": messages,
"temperature": 0.7 if is_final_guess else 0.8,
"max_tokens": 100
}
try:
response = requests.post(api_url, headers=headers, json=data)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
except Exception as e:
st.error(f"Error calling Llama API: {str(e)}")
return "Could not generate question"
# New function for the help AI assistant using the Hugging Face InferenceClient
MISTRAL_API_KEY = "wm5eLl09b9I9cOxR3E9n5rrRr1CRQQjn"
def ask_help_agent(query):
try:
# Prepare Mistral API request
url = "https://api.mistral.ai/v1/chat/completions"
headers = {
"Authorization": f"Bearer {MISTRAL_API_KEY}",
"Content-Type": "application/json"
}
system_message = "You are a friendly Chatbot."
# Build message history
messages = [{"role": "system", "content": system_message}]
if "help_conversation" in st.session_state:
for msg in st.session_state.help_conversation:
if msg.get("query"):
messages.append({"role": "user", "content": msg["query"]})
if msg.get("response"):
messages.append({"role": "assistant", "content": msg["response"]})
# Add current user query
messages.append({"role": "user", "content": query})
# API payload
payload = {
"model": "mistral-tiny",
"messages": messages,
"temperature": 0.7,
"top_p": 0.95
}
# Send POST request
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
else:
return f"API Error {response.status_code}: {response.text}"
except Exception as e:
return f"Error in help agent: {str(e)}"
# Import transformers and cache the help agent for performance
@st.cache_resource
def get_help_agent():
from transformers import pipeline
# Using BlenderBot 400M Distill as the public conversational model (used elsewhere)
return pipeline("conversational", model="facebook/blenderbot-400M-distill")
# Main game logic with enhanced UI
def main():
inject_custom_css()
st.markdown('<div class="title">KASOTI</div>', unsafe_allow_html=True)
st.markdown('<div class="subtitle">AI-Powered Guessing Game Challenge</div>', unsafe_allow_html=True)
if 'game_state' not in st.session_state:
st.session_state.game_state = "start"
st.session_state.questions = []
st.session_state.current_q = 0
st.session_state.answers = []
st.session_state.conversation_history = []
st.session_state.category = None
st.session_state.final_guess = None
st.session_state.help_conversation = [] # separate history for help agent
# Start screen with enhanced layout
if st.session_state.game_state == "start":
with st.container():
st.markdown("""
<div class="question-box">
<h3 style="color: #6C63FF; margin-bottom: 1.5rem;">🎮 Welcome to KASOTI</h3>
<p style="line-height: 1.6; color: #64748B;">
Think of something and I'll try to guess it in 20 questions or less!<br>
Choose from these categories:
</p>
<div style="display: grid; gap: 1rem; margin: 2rem 0;">
<div style="padding: 1.5rem; background: #f8f9fa; border-radius: 12px;">
<h4 style="margin: 0; color: #6C63FF;">🧑 Person</h4>
<p style="margin: 0.5rem 0 0; color: #64748B;">Celebrity, fictional character, historical figure</p>
</div>
<div style="padding: 1.5rem; background: #f8f9fa; border-radius: 12px;">
<h4 style="margin: 0; color: #6C63FF;">🌍 Place</h4>
<p style="margin: 0.5rem 0 0; color: #64748B;">City, country, landmark, geographical location</p>
</div>
<div style="padding: 1.5rem; background: #f8f9fa; border-radius: 12px;">
<h4 style="margin: 0; color: #6C63FF;">🎯 Object</h4>
<p style="margin: 0.5rem 0 0; color: #64748B;">Everyday item, tool, vehicle, or concept</p>
</div>
</div>
</div>
""", unsafe_allow_html=True)
with st.form("start_form"):
# Replace text input with voice input component
category_input = voice_input("category", "Enter category (person/place/object):").strip().lower()
if st.form_submit_button("Start Game"):
if not category_input:
st.error("Please enter a category!")
elif category_input not in ["person", "place", "object"]:
st.error("Please enter either 'person', 'place', or 'object'!")
else:
st.session_state.category = category_input
first_question = ask_llama([
{"role": "user", "content": "Ask your first strategic yes/no question."}
], category_input)
st.session_state.questions = [first_question]
st.session_state.conversation_history = [
{"role": "assistant", "content": first_question}
]
st.session_state.game_state = "gameplay"
st.experimental_rerun()
# Gameplay screen with voice answer input
elif st.session_state.game_state == "gameplay":
with st.container():
progress = (st.session_state.current_q + 1) / 20
st.markdown(f"""
<div class="question-count">QUESTION {st.session_state.current_q + 1} OF 20</div>
<div class="progress-bar">
<div class="progress-fill" style="width: {progress * 100}%"></div>
</div>
""", unsafe_allow_html=True)
current_question = st.session_state.questions[st.session_state.current_q]
st.markdown(f'''
<div class="question-box">
<div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1.5rem;">
<div style="background: #6C63FF; width: 40px; height: 40px; border-radius: 50%;
display: flex; align-items: center; justify-content: center; color: white;">
<i class="fas fa-robot"></i>
</div>
<h3 style="margin: 0; color: #1E293B;">AI Question</h3>
</div>
<p style="font-size: 1.1rem; line-height: 1.6; color: #1E293B;">{current_question}</p>
</div>
''', unsafe_allow_html=True)
if "Final Guess:" in current_question:
st.session_state.final_guess = current_question.split("Final Guess:")[1].strip()
st.session_state.game_state = "confirm_guess"
st.experimental_rerun()
with st.form("answer_form"):
# Replace text input with voice input component for answers
answer_input = voice_input(f"answer_{st.session_state.current_q}",
"Your answer (yes/no/both):").strip().lower()
if st.form_submit_button("Submit"):
if answer_input not in ["yes", "no", "both"]:
st.error("Please answer with 'yes', 'no', or 'both'!")
else:
st.session_state.answers.append(answer_input)
st.session_state.conversation_history.append(
{"role": "user", "content": answer_input}
)
next_response = ask_llama(
st.session_state.conversation_history,
st.session_state.category
)
if "Final Guess:" in next_response:
st.session_state.final_guess = next_response.split("Final Guess:")[1].strip()
st.session_state.game_state = "confirm_guess"
else:
st.session_state.questions.append(next_response)
st.session_state.conversation_history.append(
{"role": "assistant", "content": next_response}
)
st.session_state.current_q += 1
if st.session_state.current_q >= 20:
st.session_state.game_state = "result"
st.experimental_rerun()
# Help assistant with voice input
with st.expander("Need Help? Chat with AI Assistant"):
# Replace help query input with voice input
help_query = voice_input("help_query", "Enter your help query:")
if st.button("Send", key="send_help"):
if help_query:
help_response = ask_help_agent(help_query)
st.session_state.help_conversation.append({"query": help_query, "response": help_response})
else:
st.error("Please enter a query!")
if st.session_state.help_conversation:
for msg in st.session_state.help_conversation:
st.markdown(f"**You:** {msg['query']}")
st.markdown(f"**Help Assistant:** {msg['response']}")
# Guess confirmation with voice input
elif st.session_state.game_state == "confirm_guess":
st.markdown(f'''
<div class="question-box">
<div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1.5rem;">
<div style="background: #6C63FF; width: 40px; height: 40px; border-radius: 50%;
display: flex; align-items: center; justify-content: center; color: white;">
<i class="fas fa-lightbulb"></i>
</div>
<h3 style="margin: 0; color: #1E293B;">AI's Final Guess</h3>
</div>
<p style="font-size: 1.2rem; line-height: 1.6; color: #1E293B;">
Is it <strong style="color: #6C63FF;">{st.session_state.final_guess}</strong>?
</p>
</div>
''', unsafe_allow_html=True)
with st.form("confirm_form"):
# Replace confirmation input with voice input
confirm_input = voice_input("confirm_input",
"Type your answer (yes/no/both):").strip().lower()
if st.form_submit_button("Submit"):
if confirm_input not in ["yes", "no", "both"]:
st.error("Please answer with 'yes', 'no', or 'both'!")
else:
if confirm_input == "yes":
st.session_state.game_state = "result"
st.experimental_rerun()
st.stop()
else:
st.session_state.conversation_history.append(
{"role": "user", "content": "no"}
)
st.session_state.game_state = "gameplay"
next_response = ask_llama(
st.session_state.conversation_history,
st.session_state.category
)
st.session_state.questions.append(next_response)
st.session_state.conversation_history.append(
{"role": "assistant", "content": next_response}
)
st.session_state.current_q += 1
st.experimental_rerun()
# Result screen (unchanged)
elif st.session_state.game_state == "result":
if not st.session_state.final_guess:
qa_history = "\n".join(
[f"Q{i+1}: {q}\nA: {a}"
for i, (q, a) in enumerate(zip(st.session_state.questions, st.session_state.answers))]
)
final_guess = ask_llama(
[{"role": "user", "content": qa_history}],
st.session_state.category,
is_final_guess=True
)
st.session_state.final_guess = final_guess.split("Final Guess:")[-1].strip()
show_confetti()
st.markdown(f'<div class="final-reveal">🎉 It\'s...</div>', unsafe_allow_html=True)
time.sleep(1)
st.markdown(f'<div class="final-reveal" style="font-size:3.5rem;color:#6C63FF;">{st.session_state.final_guess}</div>',
unsafe_allow_html=True)
st.markdown(f"<p style='text-align:center; color:#64748B;'>Guessed in {len(st.session_state.questions)} questions</p>",
unsafe_allow_html=True)
if st.button("Play Again", key="play_again"):
st.session_state.clear()
st.experimental_rerun()
if __name__ == "__main__":
main()