Spaces:
Running
Running
import asyncio | |
import ollama | |
from typing import List | |
import time | |
def cosine_similarity(embedding_0, embedding_1): | |
pass | |
def generate_embedding(model, text: str, model_type: str) -> List[float]: | |
pass | |
async def generate_stream(query: str): | |
"""Stream responses from Ollama with automatic retries.""" | |
max_retries = 5 # Retry 5 times | |
delay = 3 # Wait 3 seconds before retrying | |
for attempt in range(max_retries): | |
try: | |
stream = ollama.chat( | |
model="mistral", # Use your preferred model | |
messages=[{"role": "user", "content": query}], | |
stream=True | |
) | |
for chunk in stream: | |
if "message" in chunk and "content" in chunk["message"]: | |
yield chunk["message"]["content"] | |
await asyncio.sleep(0) | |
return | |
except Exception as e: | |
print(f"❌ Ollama connection failed (Attempt {attempt+1}/{max_retries}): {str(e)}") | |
if attempt < max_retries - 1: | |
time.sleep(delay) # Wait before retrying | |
else: | |
yield "⚠️ Error: Could not connect to Ollama after multiple attempts." | |