llm_host / utils.py
Bahodir Nematjonov
streaming updated
9f8a30b
raw
history blame
1.04 kB
import asyncio
import ollama
import json
async def generate_stream(query: str):
"""Generates streamed responses from Ollama using LLaMA 3 in JSON format."""
try:
stream = ollama.chat(
model="llama3.2",
messages=[{"role": "user", "content": query}],
stream=True
)
for chunk in stream:
if "message" in chunk and "content" in chunk["message"]:
response_data = json.dumps({"content": chunk["message"]["content"]})
yield f"data: {response_data}\n\n" # SSE format
except Exception as e:
error_data = json.dumps({"error": str(e)})
yield f"data: {error_data}\n\n"
async def generate_response(query: str):
"""Returns a non-streamed response."""
try:
response = ollama.chat(
model="llama3.2",
messages=[{"role": "user", "content": query}]
)
return {"content": response["message"]["content"]}
except Exception as e:
return {"error": str(e)}