Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
import os | |
from typing import Union | |
from custom_llm import CustomLLM | |
from pydantic import BaseModel | |
from langchain.prompts import PromptTemplate | |
from langchain_huggingface import HuggingFacePipeline | |
from langchain_huggingface import HuggingFaceEndpoint | |
class ConversationPost(BaseModel): | |
tenant: Union[str, None] = None | |
module: Union[str, None] = None | |
question: str | |
API_TOKEN = os.environ['HF_API_KEY'] | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = API_TOKEN | |
app = FastAPI() | |
prompt = PromptTemplate.from_template("""<|im_start|>system | |
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|im_end|> | |
<|im_start|>user | |
{question}<|im_end|> | |
<|im_start|>assistant | |
""") | |
# llm = prompt | HuggingFacePipeline.from_model_id( | |
# model_id="Qwen/Qwen2-1.5B-Instruct", | |
# task="text-generation", | |
# pipeline_kwargs={ | |
# "max_new_tokens": 150, | |
# "return_full_text":False | |
# }, | |
# ) | |
llm = prompt | HuggingFaceEndpoint( | |
repo_id="Qwen/Qwen-VL-Chat", | |
task="text-generation", | |
max_new_tokens=150, | |
do_sample=False, | |
) | |
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>']) | |
def greet_json(): | |
return {"Hello": "World!"} | |
async def conversation(data : ConversationPost): | |
return {"output":llm.invoke({"question":data.question})} |