Spaces:

HumbleBeeAI
/

llm_host

Running

File size: 640 Bytes

8b883c8
4fb1c18
e4f5d4a
8b883c8
4fb1c18
 
 
de06a50
4fb1c18

import asyncio
import ollama

async def generate_stream(query: str):
    """Generates streamed responses from Ollama using LLaMA 3 or Mistral."""
    try:
        stream = ollama.chat(
            model="llama3.2",  # Change to 'mistral' if needed
            messages=[{"role": "user", "content": query}],
            stream=True
        )

        # Stream the response in real-time
        for chunk in stream:
            if "message" in chunk and "content" in chunk["message"]:
                yield chunk["message"]["content"]
                await asyncio.sleep(0)

    except Exception as e:
        yield f"⚠️ Error: {str(e)}"