import asyncio
import ollama

async def generate_stream(query: str):
    """Generates streamed responses from Ollama using LLaMA 3 or Mistral."""
    try:
        stream = ollama.chat(
            model="llama3.2",  # Change to 'mistral' if needed
            messages=[{"role": "user", "content": query}],
            stream=True
        )

        # Stream the response in real-time
        for chunk in stream:
            if "message" in chunk and "content" in chunk["message"]:
                yield chunk["message"]["content"]
                await asyncio.sleep(0)

    except Exception as e:
        yield f"⚠️ Error: {str(e)}"