Spaces:
Sleeping
Sleeping
File size: 1,846 Bytes
7aa9175 ea874bb 7aa9175 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
from typing import Any, Dict, List, Optional
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
import json
import requests
class CustomLlama3(LLM):
url: str = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
bearer_token: str
max_new_tokens: int = 1024
top_p: float = 0.7
temperature: float = 0.1
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
if stop is not None:
raise ValueError("stop kwargs are not permitted.")
json_body = {
"inputs": prompt,
"parameters": {"max_new_tokens":self.max_new_tokens, "top_p":self.top_p, "temperature":self.temperature}
}
data = json.dumps(json_body)
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.bearer_token}'
}
response = requests.request("POST", self.url, headers=headers, data=data)
response.raise_for_status()
try:
result=json.loads(response.content.decode("utf-8"))[0]['generated_text']
target_substring = '<|end_header_id|>'
substring_length = len(target_substring)
last_occurrence_index = result.rfind(target_substring)
if last_occurrence_index != -1:
return result[last_occurrence_index + substring_length:].strip()
except:
return response
@property
def _identifying_params(self) -> Dict[str, Any]:
"""Return a dictionary of identifying parameters."""
return {"llmUrl": self.url}
@property
def _llm_type(self) -> str:
return "CustomLlama3" |