Spaces:

siviku
/

demo

Running

File size: 1,871 Bytes
import streamlit as st
from transformers import pipeline
import whisper
from gtts import gTTS
import os

# Load Whisper model for speech-to-text
@st.cache_resource
def load_whisper():
    return whisper.load_model("base")

asr_model = load_whisper()

# Load a small instruction-tuned model (for Hugging Face free GPU)
@st.cache_resource
def load_llm():
    return pipeline("text-generation", 
                    model="tiiuae/falcon-7b-instruct", 
                    tokenizer="tiiuae/falcon-7b-instruct",
                    max_new_tokens=100,
                    do_sample=True,
                    temperature=0.7)
llm = load_llm()

# Convert text to speech using gTTS
def speak(text, filename="response.mp3"):
    tts = gTTS(text)
    tts.save(filename)
    audio_file = open(filename, "rb")
    audio_bytes = audio_file.read()
    st.audio(audio_bytes, format="audio/mp3")
    os.remove(filename)

# UI
st.set_page_config(page_title="AI Learning Buddy", page_icon="🧸")
st.title("🧸 AI Learning Buddy for Kids (4–7)")

input_type = st.radio("Choose how to ask your question:", ["Type", "Speak"])

if input_type == "Type":
    user_input = st.text_input("Ask something fun or educational:")
else:
    audio = st.file_uploader("Upload a voice file (wav/mp3)", type=["wav", "mp3"])
    if audio:
        with open("temp_audio.wav", "wb") as f:
            f.write(audio.read())
        result = asr_model.transcribe("temp_audio.wav")
        user_input = result["text"]
        st.success(f"You said: {user_input}")
        os.remove("temp_audio.wav")

if st.button("Ask the Buddy") and user_input:
    prompt = f"You are a fun and friendly teacher for a 5-year-old. Question: {user_input} Answer:"
    result = llm(prompt)[0]["generated_text"]
    answer = result.split("Answer:")[-1].strip()
    st.markdown(f"**AI Buddy says:** {answer}")
    speak(answer)