Spaces:
Paused
Paused
""" | |
LLM implementation using Hugging Face Inference Endpoint with OpenAI compatibility. | |
""" | |
import requests | |
import os | |
import json | |
import logging | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
# Endpoint configuration | |
HF_API_KEY = os.environ.get("HF_API_KEY", "") | |
ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "https://cg01ow7izccjx1b2.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions") | |
# Verify configuration | |
if not HF_API_KEY: | |
logger.warning("HF_API_KEY environment variable not set") | |
if not ENDPOINT_URL: | |
logger.warning("ENDPOINT_URL environment variable not set") | |
def run_llm(prompt, max_tokens=512, temperature=0.7): | |
""" | |
Process input text through HF Inference Endpoint. | |
Args: | |
prompt: Input prompt to process | |
max_tokens: Maximum tokens to generate | |
temperature: Temperature for sampling | |
Returns: | |
Generated response text | |
""" | |
headers = { | |
"Authorization": f"Bearer {HF_API_KEY}", | |
"Content-Type": "application/json" | |
} | |
# Format messages in OpenAI format | |
messages = [ | |
{"role": "system", "content": "You are a helpful AI assistant for a telecom service. Answer questions clearly and concisely."}, | |
{"role": "user", "content": prompt} | |
] | |
payload = { | |
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", | |
"messages": messages, | |
"max_tokens": max_tokens, | |
"temperature": temperature | |
} | |
logger.info(f"Sending request to endpoint: {ENDPOINT_URL[:30]}...") | |
try: | |
response = requests.post(ENDPOINT_URL, headers=headers, json=payload) | |
response.raise_for_status() | |
result = response.json() | |
response_text = result["choices"][0]["message"]["content"] | |
return response_text | |
except requests.exceptions.RequestException as e: | |
error_msg = f"Error calling endpoint: {str(e)}" | |
if hasattr(e, 'response') and e.response is not None: | |
error_msg += f" - Status code: {e.response.status_code}, Response: {e.response.text}" | |
logger.error(error_msg) | |
return f"Error generating response: {str(e)}" |