Spaces:

tommytracx
/

FluentQ

Paused

File size: 2,283 Bytes

"""
LLM implementation using Hugging Face Inference Endpoint with OpenAI compatibility.
"""
import requests
import os
import json
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Endpoint configuration
HF_API_KEY = os.environ.get("HF_API_KEY", "")
ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "https://cg01ow7izccjx1b2.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions")

# Verify configuration
if not HF_API_KEY:
    logger.warning("HF_API_KEY environment variable not set")
if not ENDPOINT_URL:
    logger.warning("ENDPOINT_URL environment variable not set")

def run_llm(prompt, max_tokens=512, temperature=0.7):
    """
    Process input text through HF Inference Endpoint.
    
    Args:
        prompt: Input prompt to process
        max_tokens: Maximum tokens to generate
        temperature: Temperature for sampling
        
    Returns:
        Generated response text
    """
    headers = {
        "Authorization": f"Bearer {HF_API_KEY}",
        "Content-Type": "application/json"
    }
    
    # Format messages in OpenAI format
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant for a telecom service. Answer questions clearly and concisely."},
        {"role": "user", "content": prompt}
    ]
    
    payload = {
        "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    
    logger.info(f"Sending request to endpoint: {ENDPOINT_URL[:30]}...")
    
    try:
        response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
        response.raise_for_status()
        
        result = response.json()
        response_text = result["choices"][0]["message"]["content"]
        return response_text
        
    except requests.exceptions.RequestException as e:
        error_msg = f"Error calling endpoint: {str(e)}"
        if hasattr(e, 'response') and e.response is not None:
            error_msg += f" - Status code: {e.response.status_code}, Response: {e.response.text}"
        logger.error(error_msg)
        return f"Error generating response: {str(e)}"