paper-slides-summary / sambaAPI.py
zolicsaki's picture
Upload 7 files
57d4532 verified
from dotenv import load_dotenv
from openai import OpenAI
import os
MODEL_ALIAS = {'llama3_8b': 'Meta-Llama-3.1-8B-Instruct',
'llama3_70b': 'Meta-Llama-3.1-70B-Instruct',
'llama3_3_70b': 'Meta-Llama-3.3-70B-Instruct',
'llama3_405b': 'Meta-Llama-3.1-405B-Instruct',
'llama3_1b': "Meta-Llama-3.2-1B-Instruct",
'llama3_3b': "Meta-Llama-3.2-3B-Instruct"}
load_dotenv()
client = OpenAI(
base_url="https://api.sambanova.ai/v1", # the endpoint IP running on vLLM cloud.sambanova.ai, https://api.sambanova.ai. fast-api.snova.ai
api_key=os.environ.get("SAMBA_API_KEY"),
)
def call_llama(system_prompt, prompt, model="Meta-Llama-3.1-8B-Instruct", **kwargs):
"""
kwargs:
temperature = 0.1,
top_p = 0.1
max_tokens = 50
"""
try:
completion = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
stream=True,
**kwargs,
)
response = ""
for chunk in completion:
response += chunk.choices[0].delta.content or ""
return response
except Exception as e:
print('API Error = {}'.format(e))
return ""
def call_llama_chat(messages, model="Meta-Llama-3.1-8B-Instruct", **kwargs):
"""
kwargs:
temperature = 0.1,
top_p = 0.1
"""
try:
completion = client.chat.completions.create(
model=model,
messages=messages,
stream=True,
**kwargs,
)
response = ""
for chunk in completion:
response += chunk.choices[0].delta.content or ""
return response
except Exception as e:
print('API Error = {}'.format(e))
return ""