import asyncio import ollama async def generate_stream(query: str): """Generates streamed responses from Ollama using LLaMA 3.""" try: stream = ollama.chat( model="llama3.2", messages=[{"role": "user", "content": query}], stream=True ) # Stream output without unnecessary delay for chunk in stream: if "message" in chunk and "content" in chunk["message"]: yield chunk["message"]["content"] # ✅ No sleep needed except Exception as e: yield f"⚠️ Error: {str(e)}"