TwT-6's picture
Upload 2667 files
256a159 verified
raw
history blame contribute delete
4.98 kB
import json
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Union
import requests
from opencompass.registry import MODELS
from opencompass.utils.logging import get_logger
from opencompass.utils.prompt import PromptList
from .base_api import BaseAPIModel
PromptType = Union[PromptList, str]
@MODELS.register_module()
class KrGPT(BaseAPIModel):
is_api: bool = True
def __init__(
self,
path: str = 'KrGPT',
url: str = 'http://101.69.162.5:9300/v1/chat/completions',
max_seq_len: int = 2048,
meta_template: Optional[Dict] = None,
retry: int = 2,
generation_kwargs: Optional[Dict] = dict(),
):
super().__init__(
path=path,
max_seq_len=max_seq_len,
meta_template=meta_template,
retry=retry,
generation_kwargs=generation_kwargs,
)
self.logger = get_logger()
self.url = url
self.generation_kwargs = generation_kwargs
self.max_out_len = self.generation_kwargs.get('max_new_tokens', 1024)
def generate(self, inputs: List[str], max_out_len: int,
**kwargs) -> List[str]:
"""Generate results given a list of inputs.
Args:
inputs (List[str]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with ThreadPoolExecutor() as executor:
results = list(
executor.map(self._generate, inputs,
[self.max_out_len] * len(inputs)))
return results
def _generate(self,
input: PromptType,
max_out_len: int,
temperature: float = 0.0) -> str:
"""Generate results given a list of inputs.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic.
Returns:
str: The generated string.
"""
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
messages = [{'role': 'user', 'content': input}]
else:
messages = []
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
elif item['role'] == 'SYSTEM':
msg['role'] = 'system'
messages.append(msg)
max_num_retries = 0
while max_num_retries < self.retry:
header = {'content-type': 'application/json'}
try:
data = dict(messages=messages)
raw_response = requests.post(self.url,
headers=header,
data=json.dumps(data))
except requests.ConnectionError:
self.logger.error('Got connection error, retrying...')
continue
try:
response = raw_response.json()
except requests.JSONDecodeError:
self.logger.error('JsonDecode error, got',
str(raw_response.content))
continue
try:
return response['choices'][0]['message']['content'].strip()
except KeyError:
self.logger.error('Find error message in response: ',
str(response))
# if 'error' in response:
# if response['error']['code'] == 'rate_limit_exceeded':
# time.sleep(1)
# continue
# elif response['error']['code'] == 'insufficient_quota':
# self.invalid_keys.add(key)
# self.logger.warn(f'insufficient_quota key: {key}')
# continue
# self.logger.error('Find error message in response: ',
# str(response['error']))
max_num_retries += 1
raise RuntimeError('Calling OpenAI failed after retrying for '
f'{max_num_retries} times. Check the logs for '
'details.')