from fastapi import WebSocket from transformers import pipeline import asyncio # Load the model model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" generator = pipeline("text-generation", model=model_name) async def generate_text_stream(prompt: str, websocket: WebSocket): for i in range(10): # Simulate streaming (replace with actual model inference) chunk = generator(prompt, max_length=i + 10, do_sample=True)[0]["generated_text"] await websocket.send_text(chunk) await asyncio.sleep(0.1) # Simulate delay