Spaces:

abhijit2k01
/

vllm-benchmark

Sleeping

abhijit2k01 commited on Sep 16, 2024

Commit

4c931c6

verified ·

1 Parent(s): 3931e9f

GPT-Neo 125M created in app/main.py

Files changed (1) hide show

app/main.py ADDED Viewed

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from vllm import LLM, SamplingParams
+app = FastAPI()
+# Initialize the model (we'll use a small model for this example)
+model = LLM(model="EleutherAI/gpt-neo-125M")
+class GenerateRequest(BaseModel):
+    prompt: str
+@app.post("/generate")
+async def generate(request: GenerateRequest):
+    try:
+        sampling_params = SamplingParams(temperature=0.7, max_tokens=100)
+        outputs = model.generate([request.prompt], sampling_params)
+        return {"generated_text": outputs[0].outputs[0].text}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    return {"message": "vLLM server is running"}