Spaces:
Sleeping
Sleeping
File size: 3,292 Bytes
bebe766 98d2712 d75ac70 bebe766 98d2712 25f269c 25d3011 25f269c 98d2712 d75ac70 98d2712 0f1a312 1fd682a 98d2712 25d3011 bebe766 0f1a312 25d3011 98d2712 0f1a312 25d3011 0f1a312 e6bc530 1fd682a e6bc530 0f1a312 25f269c 1fd682a 25d3011 25f269c d7e0f2f 1fd682a d7e0f2f 0f1a312 d7e0f2f 25f269c 98d2712 bebe766 98d2712 1fd682a 73b6736 98d2712 e6bc530 0f1a312 d7e0f2f 0f1a312 52612f6 0f1a312 d7e0f2f 0f1a312 a43c0c5 0f1a312 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from fastapi import FastAPI
import os
from typing import Union
from custom_llm import CustomLLM
from pydantic import BaseModel
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEndpoint
class ConversationPost(BaseModel):
tenant: Union[str, None] = None
module: Union[str, None] = None
question: str
class InferencePost(BaseModel):
question: str
with_template: Union[str, None] = None
class LLMPost(BaseModel):
model: str
question: str
API_TOKEN = os.environ['HF_API_KEY']
os.environ["HUGGINGFACEHUB_API_TOKEN"] = API_TOKEN
app = FastAPI()
prompt_qwen = PromptTemplate.from_template("""<|im_start|>system
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant
""")
prompt_llama = PromptTemplate.from_template("""<|start_header_id|>system<|end_header_id|>
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|eot_id|><|start_header_id|>user<|end_header_id|>
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""")
# llm = prompt | HuggingFacePipeline.from_model_id(
# model_id="Qwen/Qwen2-1.5B-Instruct",
# task="text-generation",
# pipeline_kwargs={
# "max_new_tokens": 150,
# "return_full_text":False
# },
# )
llama = HuggingFaceEndpoint(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
task="text-generation",
max_new_tokens=4096,
do_sample=False,
)
qwen = HuggingFaceEndpoint(
repo_id="Qwen/Qwen1.5-4B-Chat",
task="text-generation",
max_new_tokens=4096,
do_sample=False,
)
qwen2 = HuggingFaceEndpoint(
repo_id="Qwen/Qwen2-1.5B-Instruct",
task="text-generation",
max_new_tokens=4096,
do_sample=False,
)
llm = prompt_qwen | qwen
llm2 = prompt_llama | llama
llm3 = prompt_qwen | qwen2
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.post("/chat")
async def chat(data: LLMPost):
if data.model == 'llama':
return {"data":llama.invoke(data.question)}
elif data.model == 'qwen':
return {"data":qwen.invoke(data.question)}
else:
return {"data":qwen2.invoke(data.question)}
@app.post("/conversation")
async def conversation(data : ConversationPost):
return {"output":llm.invoke({"question":data.question})}
@app.post("/conversation2")
async def conversation2(data : ConversationPost):
return {"output":llm2.invoke({"question":data.question})}
@app.post("/conversation3")
async def conversation3(data : ConversationPost):
return {"output":llm3.invoke({"question":data.question})}
@app.post("/inference")
async def inference(data : InferencePost):
if data.with_template == 'llama':
out = llm2.invoke(data.question)
elif data.with_template == 'qwen':
out = llm.invoke(data.question)
elif data.with_template == 'qwen2':
out = llm3.invoke(data.question)
else:
out = llama.invoke(data.question)
return {"output":out} |