Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from typing import List | |
from vllm import LLM, SamplingParams | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
token: str = os.environ.get("HUGGINGFACE_TOKEN") | |
app = FastAPI() | |
def read_root( | |
prompt: str, | |
model: str = "meta-llama/Llama-2-7b-hf", | |
temperature: float = 0., | |
max_tokens: int = 1024) -> List: | |
sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens) | |
llm = LLM(model=model) | |
response = llm.generate([prompt], sampling_params, token=token) | |
return response | |