FluentQ / local_llm.py
tommytracx's picture
Rename models/local_llm.py to local_llm.py
f3b5ce7 verified
raw
history blame
2.28 kB
"""
LLM implementation using Hugging Face Inference Endpoint with OpenAI compatibility.
"""
import requests
import os
import json
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Endpoint configuration
HF_API_KEY = os.environ.get("HF_API_KEY", "")
ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "https://cg01ow7izccjx1b2.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions")
# Verify configuration
if not HF_API_KEY:
logger.warning("HF_API_KEY environment variable not set")
if not ENDPOINT_URL:
logger.warning("ENDPOINT_URL environment variable not set")
def run_llm(prompt, max_tokens=512, temperature=0.7):
"""
Process input text through HF Inference Endpoint.
Args:
prompt: Input prompt to process
max_tokens: Maximum tokens to generate
temperature: Temperature for sampling
Returns:
Generated response text
"""
headers = {
"Authorization": f"Bearer {HF_API_KEY}",
"Content-Type": "application/json"
}
# Format messages in OpenAI format
messages = [
{"role": "system", "content": "You are a helpful AI assistant for a telecom service. Answer questions clearly and concisely."},
{"role": "user", "content": prompt}
]
payload = {
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
logger.info(f"Sending request to endpoint: {ENDPOINT_URL[:30]}...")
try:
response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
response_text = result["choices"][0]["message"]["content"]
return response_text
except requests.exceptions.RequestException as e:
error_msg = f"Error calling endpoint: {str(e)}"
if hasattr(e, 'response') and e.response is not None:
error_msg += f" - Status code: {e.response.status_code}, Response: {e.response.text}"
logger.error(error_msg)
return f"Error generating response: {str(e)}"