Spaces:
Running
Running
File size: 1,721 Bytes
8b883c8 4fb1c18 9f8a30b 39308e2 e4f5d4a 8b883c8 39308e2 4fb1c18 667244c 4fb1c18 39308e2 4fb1c18 9f8a30b 39308e2 4fb1c18 9f8a30b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import asyncio
import ollama
import json
import signal
shutdown_event = asyncio.Event()
def shutdown_handler(sig, frame):
"""Handles shutdown signals like Ctrl + C."""
print("\n⛔ Shutdown requested! Stopping API...")
shutdown_event.set()
# Attach signal handlers for graceful termination
signal.signal(signal.SIGINT, shutdown_handler)
signal.signal(signal.SIGTERM, shutdown_handler)
async def generate_stream(query: str):
"""Generates streamed responses with cancellation support."""
try:
stream = ollama.chat(
model="llama3.2",
messages=[{"role": "user", "content": query}],
stream=True
)
for chunk in stream:
if shutdown_event.is_set():
print("⛔ Stopping content generation...")
break # Exit loop when shutdown is requested
if "message" in chunk and "content" in chunk["message"]:
response_data = json.dumps({"content": chunk["message"]["content"]})
yield f"data: {response_data}\n\n"
await asyncio.sleep(0.1) # Allow graceful processing
except asyncio.CancelledError:
print("⛔ Stream cancelled by user.")
raise # Propagate cancellation
except Exception as e:
error_data = json.dumps({"error": str(e)})
yield f"data: {error_data}\n\n"
async def generate_response(query: str):
"""Returns a non-streamed response."""
try:
response = ollama.chat(
model="llama3.2",
messages=[{"role": "user", "content": query}]
)
return {"content": response["message"]["content"]}
except Exception as e:
return {"error": str(e)}
|