Spaces:
Paused
Paused
File size: 2,283 Bytes
9f5d5d3 d134f64 9f5d5d3 46f013b 9f5d5d3 46f013b 9f5d5d3 46f013b 9f5d5d3 46f013b 9f5d5d3 46f013b 9f5d5d3 46f013b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
"""
LLM implementation using Hugging Face Inference Endpoint with OpenAI compatibility.
"""
import requests
import os
import json
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Endpoint configuration
HF_API_KEY = os.environ.get("HF_API_KEY", "")
ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "https://cg01ow7izccjx1b2.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions")
# Verify configuration
if not HF_API_KEY:
logger.warning("HF_API_KEY environment variable not set")
if not ENDPOINT_URL:
logger.warning("ENDPOINT_URL environment variable not set")
def run_llm(prompt, max_tokens=512, temperature=0.7):
"""
Process input text through HF Inference Endpoint.
Args:
prompt: Input prompt to process
max_tokens: Maximum tokens to generate
temperature: Temperature for sampling
Returns:
Generated response text
"""
headers = {
"Authorization": f"Bearer {HF_API_KEY}",
"Content-Type": "application/json"
}
# Format messages in OpenAI format
messages = [
{"role": "system", "content": "You are a helpful AI assistant for a telecom service. Answer questions clearly and concisely."},
{"role": "user", "content": prompt}
]
payload = {
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
logger.info(f"Sending request to endpoint: {ENDPOINT_URL[:30]}...")
try:
response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
response_text = result["choices"][0]["message"]["content"]
return response_text
except requests.exceptions.RequestException as e:
error_msg = f"Error calling endpoint: {str(e)}"
if hasattr(e, 'response') and e.response is not None:
error_msg += f" - Status code: {e.response.status_code}, Response: {e.response.text}"
logger.error(error_msg)
return f"Error generating response: {str(e)}" |