Spaces:

HumbleBeeAI
/

llm_host

Running

llm_host / utils.py

Bahodir Nematjonov

debuging docker

a4ac1ab 3 months ago

1.21 kB

	import asyncio
	import ollama
	from typing import List
	import time
	def cosine_similarity(embedding_0, embedding_1):
	pass

	def generate_embedding(model, text: str, model_type: str) -> List[float]:
	pass

	async def generate_stream(query: str):
	"""Stream responses from Ollama with automatic retries."""
	max_retries = 5 # Retry 5 times
	delay = 3 # Wait 3 seconds before retrying

	for attempt in range(max_retries):
	try:
	stream = ollama.chat(
	model="mistral", # Use your preferred model
	messages=[{"role": "user", "content": query}],
	stream=True
	)
	for chunk in stream:
	if "message" in chunk and "content" in chunk["message"]:
	yield chunk["message"]["content"]
	await asyncio.sleep(0)
	return
	except Exception as e:
	print(f"❌ Ollama connection failed (Attempt {attempt+1}/{max_retries}): {str(e)}")
	if attempt < max_retries - 1:
	time.sleep(delay) # Wait before retrying
	else:
	yield "⚠️ Error: Could not connect to Ollama after multiple attempts."