Spaces:
Runtime error
Runtime error
File size: 603 Bytes
78e119e 8b64a94 690145e 81f3106 78e119e 8b64a94 a2415d5 81f3106 8b64a94 81f3106 63464ea 8b64a94 63464ea a2415d5 63464ea 8b64a94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from fastapi import FastAPI
from typing import List
from vllm import LLM, SamplingParams
import os
from dotenv import load_dotenv
load_dotenv()
token: str = os.environ.get("HUGGINGFACE_TOKEN")
app = FastAPI()
@app.get("/llm_inference")
def read_root(
prompt: str,
model: str = "meta-llama/Llama-2-7b-hf",
temperature: float = 0.,
max_tokens: int = 1024) -> List:
sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens, token=token)
llm = LLM(model=model)
response = llm.generate([prompt], sampling_params)
return response
|