File size: 1,482 Bytes
bebe766
98d2712
d75ac70
bebe766
98d2712
 
 
 
25f269c
25d3011
25f269c
98d2712
 
d75ac70
 
98d2712
 
 
 
25d3011
 
 
bebe766
98d2712
25d3011
98d2712
 
 
 
25d3011
 
 
 
 
 
 
 
 
 
 
25f269c
25d3011
 
25f269c
 
 
98d2712
bebe766
 
 
98d2712
 
 
73b6736
98d2712
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from fastapi import FastAPI
import os
from typing import Union

from custom_llm import CustomLLM

from pydantic import BaseModel
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEndpoint


class ConversationPost(BaseModel):
    tenant: Union[str, None] = None
    module: Union[str, None] = None
    question: str


API_TOKEN = os.environ['HF_API_KEY']

os.environ["HUGGINGFACEHUB_API_TOKEN"] = API_TOKEN

app = FastAPI()
prompt = PromptTemplate.from_template("""<|im_start|>system
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant
""")
# llm = prompt | HuggingFacePipeline.from_model_id(
#     model_id="Qwen/Qwen2-1.5B-Instruct",
#     task="text-generation",
#     pipeline_kwargs={
#         "max_new_tokens": 150,
#         "return_full_text":False
#     },
# )

llm = prompt | HuggingFaceEndpoint(
    repo_id="Qwen/Qwen-VL-Chat",
    task="text-generation",
    max_new_tokens=150,
    do_sample=False,
)

# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])


@app.get("/")
def greet_json():
    return {"Hello": "World!"}


@app.post("/conversation")
async def conversation(data : ConversationPost):
    return {"output":llm.invoke({"question":data.question})}