Spaces:

Wolf369
/

vllm

Runtime error

File size: 484 Bytes

78e119e
8b64a94
690145e
78e119e
 
 
 
8b64a94
 
a2415d5
8b64a94
 
 
 
63464ea
8b64a94
63464ea
a2415d5
63464ea
8b64a94

from fastapi import FastAPI
from typing import List
from vllm import LLM, SamplingParams

app = FastAPI()


@app.get("/llm_inference")
def read_root(
        prompt: str,
        model: str = "meta-llama/Llama-2-7b-hf",
        temperature: float = 0.,
        max_tokens: int = 1024) -> List:
    sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)

    llm = LLM(model=model)

    response = llm.generate([prompt], sampling_params)

    return response