Spaces:
Running
Running
File size: 1,208 Bytes
8b883c8 e4f5d4a 8b883c8 a4ac1ab e4f5d4a 8b883c8 a4ac1ab e4f5d4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import asyncio
import ollama
from typing import List
import time
def cosine_similarity(embedding_0, embedding_1):
pass
def generate_embedding(model, text: str, model_type: str) -> List[float]:
pass
async def generate_stream(query: str):
"""Stream responses from Ollama with automatic retries."""
max_retries = 5 # Retry 5 times
delay = 3 # Wait 3 seconds before retrying
for attempt in range(max_retries):
try:
stream = ollama.chat(
model="mistral", # Use your preferred model
messages=[{"role": "user", "content": query}],
stream=True
)
for chunk in stream:
if "message" in chunk and "content" in chunk["message"]:
yield chunk["message"]["content"]
await asyncio.sleep(0)
return
except Exception as e:
print(f"❌ Ollama connection failed (Attempt {attempt+1}/{max_retries}): {str(e)}")
if attempt < max_retries - 1:
time.sleep(delay) # Wait before retrying
else:
yield "⚠️ Error: Could not connect to Ollama after multiple attempts."
|