File size: 1,822 Bytes
57d4532 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from dotenv import load_dotenv
from openai import OpenAI
import os
MODEL_ALIAS = {'llama3_8b': 'Meta-Llama-3.1-8B-Instruct',
'llama3_70b': 'Meta-Llama-3.1-70B-Instruct',
'llama3_3_70b': 'Meta-Llama-3.3-70B-Instruct',
'llama3_405b': 'Meta-Llama-3.1-405B-Instruct',
'llama3_1b': "Meta-Llama-3.2-1B-Instruct",
'llama3_3b': "Meta-Llama-3.2-3B-Instruct"}
load_dotenv()
client = OpenAI(
base_url="https://api.sambanova.ai/v1", # the endpoint IP running on vLLM cloud.sambanova.ai, https://api.sambanova.ai. fast-api.snova.ai
api_key=os.environ.get("SAMBA_API_KEY"),
)
def call_llama(system_prompt, prompt, model="Meta-Llama-3.1-8B-Instruct", **kwargs):
"""
kwargs:
temperature = 0.1,
top_p = 0.1
max_tokens = 50
"""
try:
completion = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
stream=True,
**kwargs,
)
response = ""
for chunk in completion:
response += chunk.choices[0].delta.content or ""
return response
except Exception as e:
print('API Error = {}'.format(e))
return ""
def call_llama_chat(messages, model="Meta-Llama-3.1-8B-Instruct", **kwargs):
"""
kwargs:
temperature = 0.1,
top_p = 0.1
"""
try:
completion = client.chat.completions.create(
model=model,
messages=messages,
stream=True,
**kwargs,
)
response = ""
for chunk in completion:
response += chunk.choices[0].delta.content or ""
return response
except Exception as e:
print('API Error = {}'.format(e))
return ""
|