import asyncio import ollama import json async def generate_stream(query: str): """Generates streamed responses from Ollama using LLaMA 3 in JSON format.""" try: stream = ollama.chat( model="llama3.2", messages=[{"role": "user", "content": query}], stream=True ) for chunk in stream: if "message" in chunk and "content" in chunk["message"]: response_data = json.dumps({"content": chunk["message"]["content"]}) yield f"data: {response_data}\n\n" # SSE format except Exception as e: error_data = json.dumps({"error": str(e)}) yield f"data: {error_data}\n\n" async def generate_response(query: str): """Returns a non-streamed response.""" try: response = ollama.chat( model="llama3.2", messages=[{"role": "user", "content": query}] ) return {"content": response["message"]["content"]} except Exception as e: return {"error": str(e)}