File size: 688 Bytes
8b883c8
e4f5d4a
8b883c8
e4f5d4a
 
 
 
 
 
 
8b883c8
 
 
 
 
 
 
 
 
 
 
 
e4f5d4a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import asyncio
import ollama
from typing import List

def cosine_similarity(embedding_0, embedding_1):
    pass

def generate_embedding(model, text: str, model_type: str) -> List[float]:
    pass

async def generate_stream(query: str):
    """Stream responses from Ollama in real-time."""
    stream = ollama.chat(
        model="llama3.2",  # Choose your model (mistral, llama2, gemma)
        messages=[{"role": "user", "content": query}],
        stream=True  # Enable streaming
    )
    
    for chunk in stream:
        if "message" in chunk and "content" in chunk["message"]:
            yield chunk["message"]["content"]
            await asyncio.sleep(0)  # Allow async executi