Spaces:

Hndsguy
/

813-MindSearch

Sleeping

813-MindSearch / mindsearch /agent /models.py

handsomeguy001

support litellm

8723eb9 9 months ago

8.97 kB

	from lagent.llms import BaseAPIModel
	from typing import List, Optional, Union
	from litellm import completion

	from lagent.schema import ModelStatusCode
	from lagent.utils.util import filter_suffix
	import os

	from lagent.llms import (GPTAPI, INTERNLM2_META, HFTransformerCasualLM,
	LMDeployClient, LMDeployServer)

	internlm_server = dict(type=LMDeployServer,
	path='internlm/internlm2_5-7b-chat',
	model_name='internlm2',
	meta_template=INTERNLM2_META,
	top_p=0.8,
	top_k=1,
	temperature=0,
	max_new_tokens=8192,
	repetition_penalty=1.02,
	stop_words=['<\|im_end\|>'])

	internlm_client = dict(type=LMDeployClient,
	model_name='internlm2_5-7b-chat',
	url='http://127.0.0.1:23333',
	meta_template=INTERNLM2_META,
	top_p=0.8,
	top_k=1,
	temperature=0,
	max_new_tokens=8192,
	repetition_penalty=1.02,
	stop_words=['<\|im_end\|>'])

	internlm_hf = dict(type=HFTransformerCasualLM,
	path='internlm/internlm2_5-7b-chat',
	meta_template=INTERNLM2_META,
	top_p=0.8,
	top_k=None,
	temperature=1e-6,
	max_new_tokens=8192,
	repetition_penalty=1.02,
	stop_words=['<\|im_end\|>'])
	# openai_api_base needs to fill in the complete chat api address, such as: https://api.openai.com/v1/chat/completions
	gpt4 = dict(type=GPTAPI,
	model_type='gpt-4-turbo',
	key=os.environ.get('OPENAI_API_KEY', 'YOUR OPENAI API KEY'),
	openai_api_base=os.environ.get(
	'OPENAI_API_BASE', 'https://api.openai.com/v1/chat/completions'),
	)

	url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation'
	qwen = dict(type=GPTAPI,
	model_type='qwen-max-longcontext',
	key=os.environ.get('QWEN_API_KEY', 'YOUR QWEN API KEY'),
	openai_api_base=url,
	meta_template=[
	dict(role='system', api_role='system'),
	dict(role='user', api_role='user'),
	dict(role='assistant', api_role='assistant'),
	dict(role='environment', api_role='system')
	],
	top_p=0.8,
	top_k=1,
	temperature=0,
	max_new_tokens=4096,
	repetition_penalty=1.02,
	stop_words=['<\|im_end\|>'])

	internlm_silicon = dict(type=GPTAPI,
	model_type='internlm/internlm2_5-7b-chat',
	key=os.environ.get(
	'SILICON_API_KEY', 'YOUR SILICON API KEY'),
	openai_api_base='https://api.siliconflow.cn/v1/chat/completions',
	meta_template=[
	dict(role='system', api_role='system'),
	dict(role='user', api_role='user'),
	dict(role='assistant', api_role='assistant'),
	dict(role='environment', api_role='system')
	],
	top_p=0.8,
	top_k=1,
	temperature=0,
	max_new_tokens=8192,
	repetition_penalty=1.02,
	stop_words=['<\|im_end\|>'])


	class litellmCompletion(BaseAPIModel):
	"""

	Args:
	path (str): The path to the model.
	It could be one of the following options:
	- i) A local directory path of a turbomind model which is
	converted by `lmdeploy convert` command or download
	from ii) and iii).
	- ii) The model_id of a lmdeploy-quantized model hosted
	inside a model repo on huggingface.co, such as
	"InternLM/internlm-chat-20b-4bit",
	"lmdeploy/llama2-chat-70b-4bit", etc.
	- iii) The model_id of a model hosted inside a model repo
	on huggingface.co, such as "internlm/internlm-chat-7b",
	"Qwen/Qwen-7B-Chat ", "baichuan-inc/Baichuan2-7B-Chat"
	and so on.
	model_name (str): needed when model_path is a pytorch model on
	huggingface.co, such as "internlm-chat-7b",
	"Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on.
	tp (int): tensor parallel
	pipeline_cfg (dict): config of pipeline
	"""

	def __init__(self,
	path='',
	model_name="command-r",
	**kwargs):
	self.model_name = model_name
	super().__init__(path, **kwargs)

	def generate(self,
	inputs: Union[str, List[str]],
	do_preprocess: bool = None,
	skip_special_tokens: bool = False,
	**kwargs):
	"""Return the chat completions in non-stream mode.

	Args:
	inputs (Union[str, List[str]]): input texts to be completed.
	do_preprocess (bool): whether pre-process the messages. Default to
	True, which means chat_template will be applied.
	skip_special_tokens (bool): Whether or not to remove special tokens
	in the decoding. Default to be False.
	Returns:
	(a list of/batched) text/chat completion
	"""

	batched = True
	if isinstance(inputs, str):
	inputs = [inputs]
	prompts = inputs
	messages = [{"role": "user", "content": prompt}for prompt in prompts]
	gen_params = self.update_gen_params(**kwargs)
	response = completion(model=self.model_name, messages=messages)
	response = [resp.message.content for resp in response.choices]
	# remove stop_words
	response = filter_suffix(response, self.gen_params.get('stop_words'))
	if batched:
	return response
	return response[0]

	def stream_chat(self,
	inputs: List[dict],
	stream: bool = True,
	ignore_eos: bool = False,
	skip_special_tokens: Optional[bool] = False,
	timeout: int = 30,
	**kwargs):
	"""Start a new round conversation of a session. Return the chat
	completions in stream mode.

	Args:
	session_id (int): the identical id of a session
	inputs (List[dict]): user's inputs in this round conversation
	sequence_start (bool): start flag of a session
	sequence_end (bool): end flag of a session
	stream (bool): return in a streaming format if enabled
	ignore_eos (bool): indicator for ignoring eos
	skip_special_tokens (bool): Whether or not to remove special tokens
	in the decoding. Default to be False.
	timeout (int): max time to wait for response
	Returns:
	tuple(Status, str, int): status, text/chat completion,
	generated token number
	"""
	gen_params = self.update_gen_params(**kwargs)
	max_new_tokens = gen_params.pop('max_new_tokens')
	gen_params.update(max_tokens=max_new_tokens)

	resp = ''
	finished = False
	stop_words = gen_params.get('stop_words')
	if stop_words is None:
	stop_words = []
	messages = self.template_parser._prompt2api(inputs)

	for text in completion(
	self.model_name,
	messages,
	stream=stream,
	**gen_params):
	if not text.choices[0].delta.content:
	continue
	resp += text.choices[0].delta.content
	if not resp:
	continue
	# remove stop_words
	for sw in stop_words:
	if sw in resp:
	resp = filter_suffix(resp, stop_words)
	finished = True
	break
	yield ModelStatusCode.STREAM_ING, resp, None
	if finished:
	break
	yield ModelStatusCode.END, resp, None


	litellm_completion = dict(type=litellmCompletion,
	# model_name="deepseek/deepseek-chat",
	meta_template=[
	dict(role='system', api_role='system'),
	dict(role='user', api_role='user'),
	dict(role='assistant', api_role='assistant'),
	dict(role='environment', api_role='system')
	]
	)