Spaces:
Sleeping
Sleeping
File size: 1,482 Bytes
bebe766 98d2712 d75ac70 bebe766 98d2712 25f269c 25d3011 25f269c 98d2712 d75ac70 98d2712 25d3011 bebe766 98d2712 25d3011 98d2712 25d3011 25f269c 25d3011 25f269c 98d2712 bebe766 98d2712 73b6736 98d2712 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from fastapi import FastAPI
import os
from typing import Union
from custom_llm import CustomLLM
from pydantic import BaseModel
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEndpoint
class ConversationPost(BaseModel):
tenant: Union[str, None] = None
module: Union[str, None] = None
question: str
API_TOKEN = os.environ['HF_API_KEY']
os.environ["HUGGINGFACEHUB_API_TOKEN"] = API_TOKEN
app = FastAPI()
prompt = PromptTemplate.from_template("""<|im_start|>system
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant
""")
# llm = prompt | HuggingFacePipeline.from_model_id(
# model_id="Qwen/Qwen2-1.5B-Instruct",
# task="text-generation",
# pipeline_kwargs={
# "max_new_tokens": 150,
# "return_full_text":False
# },
# )
llm = prompt | HuggingFaceEndpoint(
repo_id="Qwen/Qwen-VL-Chat",
task="text-generation",
max_new_tokens=150,
do_sample=False,
)
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.post("/conversation")
async def conversation(data : ConversationPost):
return {"output":llm.invoke({"question":data.question})} |