import asyncio import ollama from typing import List def cosine_similarity(embedding_0, embedding_1): pass def generate_embedding(model, text: str, model_type: str) -> List[float]: pass async def generate_stream(query: str): """Stream responses from Ollama in real-time.""" stream = ollama.chat( model="llama3.2", # Choose your model (mistral, llama2, gemma) messages=[{"role": "user", "content": query}], stream=True # Enable streaming ) for chunk in stream: if "message" in chunk and "content" in chunk["message"]: yield chunk["message"]["content"] await asyncio.sleep(0) # Allow async executi