Spaces:

HumbleBeeAI
/

llm_host

Running

File size: 1,721 Bytes

8b883c8
4fb1c18
9f8a30b
39308e2
 
 
 
 
 
 
 
 
 
 
 
e4f5d4a
8b883c8
39308e2
4fb1c18
 
667244c
4fb1c18
 
 
 
 
39308e2
 
 
 
4fb1c18
9f8a30b
39308e2
 
 
 
 
 
4fb1c18
 
9f8a30b

import asyncio
import ollama
import json
import signal

shutdown_event = asyncio.Event()

def shutdown_handler(sig, frame):
    """Handles shutdown signals like Ctrl + C."""
    print("\n⛔ Shutdown requested! Stopping API...")
    shutdown_event.set()

# Attach signal handlers for graceful termination
signal.signal(signal.SIGINT, shutdown_handler)
signal.signal(signal.SIGTERM, shutdown_handler)

async def generate_stream(query: str):
    """Generates streamed responses with cancellation support."""
    try:
        stream = ollama.chat(
            model="llama3.2",
            messages=[{"role": "user", "content": query}],
            stream=True
        )

        for chunk in stream:
            if shutdown_event.is_set():
                print("⛔ Stopping content generation...")
                break  # Exit loop when shutdown is requested

            if "message" in chunk and "content" in chunk["message"]:
                response_data = json.dumps({"content": chunk["message"]["content"]})
                yield f"data: {response_data}\n\n"
                await asyncio.sleep(0.1)  # Allow graceful processing

    except asyncio.CancelledError:
        print("⛔ Stream cancelled by user.")
        raise  # Propagate cancellation

    except Exception as e:
        error_data = json.dumps({"error": str(e)})
        yield f"data: {error_data}\n\n"

async def generate_response(query: str):
    """Returns a non-streamed response."""
    try:
        response = ollama.chat(
            model="llama3.2",
            messages=[{"role": "user", "content": query}]
        )
        return {"content": response["message"]["content"]}
    except Exception as e:
        return {"error": str(e)}