Spaces:

HumbleBeeAI
/

llm_host

Running

llm_host / utils.py

Bahodir Nematjonov

model changed

de06a50 3 months ago

640 Bytes

	import asyncio
	import ollama

	async def generate_stream(query: str):
	"""Generates streamed responses from Ollama using LLaMA 3 or Mistral."""
	try:
	stream = ollama.chat(
	model="llama3.2", # Change to 'mistral' if needed
	messages=[{"role": "user", "content": query}],
	stream=True
	)

	# Stream the response in real-time
	for chunk in stream:
	if "message" in chunk and "content" in chunk["message"]:
	yield chunk["message"]["content"]
	await asyncio.sleep(0)

	except Exception as e:
	yield f"⚠️ Error: {str(e)}"