Spaces:
Running
Running
import asyncio | |
import ollama | |
async def generate_stream(query: str): | |
"""Generates streamed responses from Ollama using LLaMA 3 or Mistral.""" | |
try: | |
stream = ollama.chat( | |
model="llama3.2", # Change to 'mistral' if needed | |
messages=[{"role": "user", "content": query}], | |
stream=True | |
) | |
# Stream the response in real-time | |
for chunk in stream: | |
if "message" in chunk and "content" in chunk["message"]: | |
yield chunk["message"]["content"] | |
await asyncio.sleep(0) | |
except Exception as e: | |
yield f"⚠️ Error: {str(e)}" | |