""" LLM implementation using Hugging Face Inference Endpoint with OpenAI compatibility. """ import requests import os import json import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Endpoint configuration HF_API_KEY = os.environ.get("HF_API_KEY", "") ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "https://cg01ow7izccjx1b2.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions") # Verify configuration if not HF_API_KEY: logger.warning("HF_API_KEY environment variable not set") if not ENDPOINT_URL: logger.warning("ENDPOINT_URL environment variable not set") def run_llm(prompt, max_tokens=512, temperature=0.7): """ Process input text through HF Inference Endpoint. Args: prompt: Input prompt to process max_tokens: Maximum tokens to generate temperature: Temperature for sampling Returns: Generated response text """ headers = { "Authorization": f"Bearer {HF_API_KEY}", "Content-Type": "application/json" } # Format messages in OpenAI format messages = [ {"role": "system", "content": "You are a helpful AI assistant for a telecom service. Answer questions clearly and concisely."}, {"role": "user", "content": prompt} ] payload = { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "messages": messages, "max_tokens": max_tokens, "temperature": temperature } logger.info(f"Sending request to endpoint: {ENDPOINT_URL[:30]}...") try: response = requests.post(ENDPOINT_URL, headers=headers, json=payload) response.raise_for_status() result = response.json() response_text = result["choices"][0]["message"]["content"] return response_text except requests.exceptions.RequestException as e: error_msg = f"Error calling endpoint: {str(e)}" if hasattr(e, 'response') and e.response is not None: error_msg += f" - Status code: {e.response.status_code}, Response: {e.response.text}" logger.error(error_msg) return f"Error generating response: {str(e)}"