Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from vllm import LLM, SamplingParams | |
app = FastAPI() | |
# Initialize the model (we'll use a small model for this example) | |
model = LLM(model="EleutherAI/gpt-neo-125M") | |
class GenerateRequest(BaseModel): | |
prompt: str | |
async def generate(request: GenerateRequest): | |
try: | |
sampling_params = SamplingParams(temperature=0.7, max_tokens=100) | |
outputs = model.generate([request.prompt], sampling_params) | |
return {"generated_text": outputs[0].outputs[0].text} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def root(): | |
return {"message": "vLLM server is running"} |