import streamlit as st import time import requests from streamlit.components.v1 import html import os from dotenv import load_dotenv # New imports for voice input import torchaudio import numpy as np import torch from io import BytesIO import hashlib from audio_recorder_streamlit import audio_recorder from transformers import pipeline from datetime import datetime ###################################### # Voice Input Helper Functions ###################################### @st.cache_resource def load_voice_model(): # Loading the Whisper model (which automatically detects both English and Urdu) return pipeline("automatic-speech-recognition", model="openai/whisper-base") def process_audio(audio_bytes): waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes)) if waveform.shape[0] > 1: # Convert stereo to mono waveform = torch.mean(waveform, dim=0, keepdim=True) if sample_rate != 16000: # Resample to 16kHz if needed resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) waveform = resampler(waveform) return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000} def get_voice_transcription(state_key, input_container): """Display audio recorder for a given key. If new audio is recorded, transcribe it and update the session state. """ if state_key not in st.session_state: st.session_state[state_key] = "" # Create a unique key for the recorder widget recorder_key = f"{state_key}_audio_{hash(input_container) if input_container else ''}" # Use columns to place mic button inside input field col1, col2 = input_container.columns([0.85, 0.15]) # Audio recorder with custom styling audio_bytes = audio_recorder(key=recorder_key, pause_threshold=1.5, # Shorter pause for quick responses text="", recording_color="#e8b62c", neutral_color="#6aa36f", icon_name="microphone", icon_size="1.5em") if audio_bytes: current_hash = hashlib.md5(audio_bytes).hexdigest() last_hash_key = state_key + "_last_hash" if st.session_state.get(last_hash_key, "") != current_hash: st.session_state[last_hash_key] = current_hash # Show processing indicator processing_placeholder = input_container.empty() start_time = datetime.now() processing_placeholder.markdown( f"
" f" Processing speech..." f"
", unsafe_allow_html=True ) try: audio_input = process_audio(audio_bytes) whisper = load_voice_model() # Measure processing time processing_start = time.time() transcribed_text = whisper(audio_input)["text"] processing_time = time.time() - processing_start # For short responses (yes/no/both), use a simpler model if available if len(transcribed_text.split()) <= 2: transcribed_text = transcribed_text.lower().strip() st.info(f"📝 Transcribed: {transcribed_text}") # Show processing time feedback processing_placeholder.markdown( f"
" f" Processed in {processing_time:.1f}s" f"
", unsafe_allow_html=True ) # Append (or set) new transcription st.session_state[state_key] = transcribed_text st.experimental_rerun() except Exception as e: st.error(f"Voice input error: {str(e)}") processing_placeholder.empty() return st.session_state[state_key] ###################################### # Existing Game Helper Functions ###################################### @st.cache_resource def get_help_agent(): from transformers import pipeline # Using BlenderBot 400M Distill as the public conversational model (used elsewhere) return pipeline("conversational", model="facebook/blenderbot-400M-distill") def inject_custom_css(): st.markdown(""" """, unsafe_allow_html=True) def show_confetti(): html(""" """) def ask_llama(conversation_history, category, is_final_guess=False): api_url = "https://api.groq.com/openai/v1/chat/completions" headers = { "Authorization": "Bearer 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", "Content-Type": "application/json" } system_prompt = f"""You're playing 20 questions to guess a {category}. Follow these rules: 1. Ask strategic, non-repeating yes/no questions that narrow down possibilities 2. Consider all previous answers carefully before asking next question 3. If you're very confident (80%+ sure), respond with "Final Guess: [your guess]" 4. For places: ask about continent, climate, famous landmarks, country, city or population 5. For people: ask about fictional or real, profession, gender, alive/dead, nationality, or fame 6. For objects: ask about size, color, usage, material, or where it's found 7. Never repeat questions and always make progress toward guessing""" if is_final_guess: prompt = f"""Based on these answers about a {category}, provide ONLY your final guess with no extra text: {conversation_history}""" else: prompt = "Ask your next strategic yes/no question that will best narrow down the possibilities." messages = [ {"role": "system", "content": system_prompt}, *conversation_history, {"role": "user", "content": prompt} ] data = { "model": "llama-3.3-70b-versatile", "messages": messages, "temperature": 0.7 if is_final_guess else 0.8, "max_tokens": 100 } try: response = requests.post(api_url, headers=headers, json=data) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] except Exception as e: st.error(f"Error calling Llama API: {str(e)}") return "Could not generate question" MISTRAL_API_KEY = "wm5eLl09b9I9cOxR3E9n5rrRr1CRQQjn" def ask_help_agent(query): try: url = "https://api.mistral.ai/v1/chat/completions" headers = { "Authorization": f"Bearer {MISTRAL_API_KEY}", "Content-Type": "application/json" } system_message = "You are a friendly Chatbot." messages = [{"role": "system", "content": system_message}] if "help_conversation" in st.session_state: for msg in st.session_state.help_conversation: if msg.get("query"): messages.append({"role": "user", "content": msg["query"]}) if msg.get("response"): messages.append({"role": "assistant", "content": msg["response"]}) messages.append({"role": "user", "content": query}) payload = { "model": "mistral-tiny", "messages": messages, "temperature": 0.7, "top_p": 0.95 } response = requests.post(url, headers=headers, json=payload) if response.status_code == 200: result = response.json() return result["choices"][0]["message"]["content"] else: return f"API Error {response.status_code}: {response.text}" except Exception as e: return f"Error in help agent: {str(e)}" ###################################### # Main Game Logic with Voice Integration ###################################### def main(): inject_custom_css() st.markdown('
KASOTI
', unsafe_allow_html=True) st.markdown('
AI-Powered Guessing Game Challenge
', unsafe_allow_html=True) if 'game_state' not in st.session_state: st.session_state.game_state = "start" st.session_state.questions = [] st.session_state.current_q = 0 st.session_state.answers = [] st.session_state.conversation_history = [] st.session_state.category = None st.session_state.final_guess = None st.session_state.help_conversation = [] # separate history for help agent # Start screen with enhanced layout if st.session_state.game_state == "start": with st.container(): st.markdown("""

🎮 Welcome to KASOTI

Think of something and I'll try to guess it in 20 questions or less!
Choose from these categories:

🧑 Person

Celebrity, fictional character, historical figure

🌍 Place

City, country, landmark, geographical location

đŸŽ¯ Object

Everyday item, tool, vehicle, or concept

""", unsafe_allow_html=True) with st.form("start_form"): # --- Voice Input for Category --- st.markdown("#### Speak your category (person/place/object)") category_container = st.empty() category_input = category_container.text_input( "Enter category (person/place/object):", key="category_input" ) # Get voice transcription and update the input field voice_category = get_voice_transcription("voice_category", category_container) if voice_category and voice_category != category_input: category_container.text_input( "Enter category (person/place/object):", value=voice_category.strip(), key="category_input_updated" ) if st.form_submit_button("Start Game"): final_category = st.session_state.get("voice_category", "").strip() or category_input.strip().lower() if not final_category: st.error("Please enter a category!") elif final_category not in ["person", "place", "object"]: st.error("Please enter either 'person', 'place', or 'object'!") else: st.session_state.category = final_category first_question = ask_llama([ {"role": "user", "content": "Ask your first strategic yes/no question."} ], final_category) st.session_state.questions = [first_question] st.session_state.conversation_history = [ {"role": "assistant", "content": first_question} ] st.session_state.game_state = "gameplay" st.experimental_rerun() # Gameplay screen with progress bar elif st.session_state.game_state == "gameplay": with st.container(): progress = (st.session_state.current_q + 1) / 20 st.markdown(f"""
QUESTION {st.session_state.current_q + 1} OF 20
""", unsafe_allow_html=True) current_question = st.session_state.questions[st.session_state.current_q] st.markdown(f'''

AI Question

{current_question}

''', unsafe_allow_html=True) if "Final Guess:" in current_question: st.session_state.final_guess = current_question.split("Final Guess:")[1].strip() st.session_state.game_state = "confirm_guess" st.experimental_rerun() with st.form("answer_form"): # --- Voice Input for Answer --- st.markdown("#### Speak your answer (yes/no/both)") answer_container = st.empty() answer_input = answer_container.text_input( "Your answer (yes/no/both):", key=f"answer_{st.session_state.current_q}" ) # Get voice transcription and update the input field voice_answer = get_voice_transcription("voice_answer", answer_container) if voice_answer and voice_answer != answer_input: answer_container.text_input( "Your answer (yes/no/both):", value=voice_answer.strip(), key=f"answer_updated_{st.session_state.current_q}" ) if st.form_submit_button("Submit"): final_answer = st.session_state.get("voice_answer", "").strip().lower() or answer_input.strip().lower() if final_answer not in ["yes", "no", "both"]: st.error("Please answer with 'yes', 'no', or 'both'!") else: st.session_state.answers.append(final_answer) st.session_state.conversation_history.append( {"role": "user", "content": final_answer} ) next_response = ask_llama( st.session_state.conversation_history, st.session_state.category ) if "Final Guess:" in next_response: st.session_state.final_guess = next_response.split("Final Guess:")[1].strip() st.session_state.game_state = "confirm_guess" else: st.session_state.questions.append(next_response) st.session_state.conversation_history.append( {"role": "assistant", "content": next_response} ) st.session_state.current_q += 1 if st.session_state.current_q >= 20: st.session_state.game_state = "result" st.experimental_rerun() with st.expander("Need Help? Chat with AI Assistant"): # --- Voice Input for Help Query --- st.markdown("#### Speak your help query") help_container = st.empty() help_query = help_container.text_input( "Enter your help query:", key="help_query" ) # Get voice transcription and update the input field voice_help = get_voice_transcription("voice_help", help_container) if voice_help and voice_help != help_query: help_container.text_input( "Enter your help query:", value=voice_help.strip(), key="help_query_updated" ) if st.button("Send", key="send_help"): final_help_query = st.session_state.get("voice_help", "").strip() or help_query.strip() if final_help_query: help_response = ask_help_agent(final_help_query) st.session_state.help_conversation.append({"query": final_help_query, "response": help_response}) st.experimental_rerun() else: st.error("Please enter a query!") if st.session_state.help_conversation: for msg in st.session_state.help_conversation: st.markdown(f"**You:** {msg['query']}") st.markdown(f"**Help Assistant:** {msg['response']}") elif st.session_state.game_state == "confirm_guess": st.markdown(f'''

AI's Final Guess

Is it {st.session_state.final_guess}?

''', unsafe_allow_html=True) with st.form("confirm_form"): # --- Voice Input for Confirmation --- confirm_container = st.empty() confirm_input = confirm_container.text_input( "Type your answer (yes/no/both):", key="confirm_input" ) # Get voice transcription and update the input field voice_confirm = get_voice_transcription("voice_confirm", confirm_container) if voice_confirm and voice_confirm != confirm_input: confirm_container.text_input( "Type your answer (yes/no/both):", value=voice_confirm.strip(), key="confirm_input_updated" ) if st.form_submit_button("Submit"): final_confirm = st.session_state.get("voice_confirm", "").strip().lower() or confirm_input.strip().lower() if final_confirm not in ["yes", "no", "both"]: st.error("Please answer with 'yes', 'no', or 'both'!") else: if final_confirm == "yes": st.session_state.game_state = "result" st.experimental_rerun() else: st.session_state.conversation_history.append( {"role": "user", "content": "no"} ) st.session_state.game_state = "gameplay" next_response = ask_llama( st.session_state.conversation_history, st.session_state.category ) st.session_state.questions.append(next_response) st.session_state.conversation_history.append( {"role": "assistant", "content": next_response} ) st.session_state.current_q += 1 st.experimental_rerun() elif st.session_state.game_state == "result": if not st.session_state.final_guess: qa_history = "\n".join( [f"Q{i+1}: {q}\nA: {a}" for i, (q, a) in enumerate(zip(st.session_state.questions, st.session_state.answers))] ) final_guess = ask_llama( [{"role": "user", "content": qa_history}], st.session_state.category, is_final_guess=True ) st.session_state.final_guess = final_guess.split("Final Guess:")[-1].strip() show_confetti() st.markdown(f'
🎉 It\'s...
', unsafe_allow_html=True) time.sleep(1) st.markdown(f'
{st.session_state.final_guess}
', unsafe_allow_html=True) st.markdown(f"

Guessed in {len(st.session_state.questions)} questions

", unsafe_allow_html=True) if st.button("Play Again", key="play_again"): st.session_state.clear() st.experimental_rerun() if __name__ == "__main__": main()