abhijit2k01 commited on
Commit
4c931c6
·
verified ·
1 Parent(s): 3931e9f

GPT-Neo 125M created in app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +24 -0
app/main.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from vllm import LLM, SamplingParams
4
+
5
+ app = FastAPI()
6
+
7
+ # Initialize the model (we'll use a small model for this example)
8
+ model = LLM(model="EleutherAI/gpt-neo-125M")
9
+
10
+ class GenerateRequest(BaseModel):
11
+ prompt: str
12
+
13
+ @app.post("/generate")
14
+ async def generate(request: GenerateRequest):
15
+ try:
16
+ sampling_params = SamplingParams(temperature=0.7, max_tokens=100)
17
+ outputs = model.generate([request.prompt], sampling_params)
18
+ return {"generated_text": outputs[0].outputs[0].text}
19
+ except Exception as e:
20
+ raise HTTPException(status_code=500, detail=str(e))
21
+
22
+ @app.get("/")
23
+ async def root():
24
+ return {"message": "vLLM server is running"}