from fastapi import FastAPI import os from typing import Union from custom_llm import CustomLLM from pydantic import BaseModel from langchain.prompts import PromptTemplate from langchain_huggingface import HuggingFacePipeline from langchain_huggingface import HuggingFaceEndpoint class ConversationPost(BaseModel): tenant: Union[str, None] = None module: Union[str, None] = None question: str API_TOKEN = os.environ['HF_API_KEY'] os.environ["HUGGINGFACEHUB_API_TOKEN"] = API_TOKEN app = FastAPI() prompt = PromptTemplate.from_template("""<|im_start|>system Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|im_end|> <|im_start|>user {question}<|im_end|> <|im_start|>assistant """) # llm = prompt | HuggingFacePipeline.from_model_id( # model_id="Qwen/Qwen2-1.5B-Instruct", # task="text-generation", # pipeline_kwargs={ # "max_new_tokens": 150, # "return_full_text":False # }, # ) llm = prompt | HuggingFaceEndpoint( repo_id="Qwen/Qwen-VL-Chat", task="text-generation", max_new_tokens=150, do_sample=False, ) # llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>']) @app.get("/") def greet_json(): return {"Hello": "World!"} @app.post("/conversation") async def conversation(data : ConversationPost): return {"output":llm.invoke({"question":data.question})}