import asyncio import ollama async def generate_stream(query: str): """Generates streamed responses from Ollama using LLaMA 3 or Mistral.""" try: stream = ollama.chat( model="llama3.2", # Change to 'mistral' if needed messages=[{"role": "user", "content": query}], stream=True ) # Stream the response in real-time for chunk in stream: if "message" in chunk and "content" in chunk["message"]: yield chunk["message"]["content"] await asyncio.sleep(0) except Exception as e: yield f"⚠️ Error: {str(e)}"