llm_host / gen.py
eli02's picture
Add FastAPI application with WebSocket support and authentication
546720a
raw
history blame
544 Bytes
from fastapi import WebSocket
from transformers import pipeline
import asyncio
# Load the model
model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
generator = pipeline("text-generation", model=model_name)
async def generate_text_stream(prompt: str, websocket: WebSocket):
for i in range(10): # Simulate streaming (replace with actual model inference)
chunk = generator(prompt, max_length=i + 10, do_sample=True)[0]["generated_text"]
await websocket.send_text(chunk)
await asyncio.sleep(0.1) # Simulate delay