Spaces:
Running
Running
import asyncio | |
import ollama | |
import json | |
async def generate_stream(query: str): | |
"""Generates streamed responses from Ollama using LLaMA 3 in JSON format.""" | |
try: | |
stream = ollama.chat( | |
model="llama3.2", | |
messages=[{"role": "user", "content": query}], | |
stream=True | |
) | |
for chunk in stream: | |
if "message" in chunk and "content" in chunk["message"]: | |
response_data = json.dumps({"content": chunk["message"]["content"]}) | |
yield f"data: {response_data}\n\n" # SSE format | |
except Exception as e: | |
error_data = json.dumps({"error": str(e)}) | |
yield f"data: {error_data}\n\n" | |
async def generate_response(query: str): | |
"""Returns a non-streamed response.""" | |
try: | |
response = ollama.chat( | |
model="llama3.2", | |
messages=[{"role": "user", "content": query}] | |
) | |
return {"content": response["message"]["content"]} | |
except Exception as e: | |
return {"error": str(e)} | |