Spaces:

tommytracx
/

FluentQ

Paused

App Files Files Community

FluentQ / local_llm.py

tommytracx

Rename models/local_llm.py to local_llm.py

f3b5ce7 verified 25 days ago

raw

history blame

2.28 kB

	"""
	LLM implementation using Hugging Face Inference Endpoint with OpenAI compatibility.
	"""
	import requests
	import os
	import json
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Endpoint configuration
	HF_API_KEY = os.environ.get("HF_API_KEY", "")
	ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "https://cg01ow7izccjx1b2.us-east-1.aws.endpoints.huggingface.cloud/v1/chat/completions")

	# Verify configuration
	if not HF_API_KEY:
	logger.warning("HF_API_KEY environment variable not set")
	if not ENDPOINT_URL:
	logger.warning("ENDPOINT_URL environment variable not set")

	def run_llm(prompt, max_tokens=512, temperature=0.7):
	"""
	Process input text through HF Inference Endpoint.

	Args:
	prompt: Input prompt to process
	max_tokens: Maximum tokens to generate
	temperature: Temperature for sampling

	Returns:
	Generated response text
	"""
	headers = {
	"Authorization": f"Bearer {HF_API_KEY}",
	"Content-Type": "application/json"
	}

	# Format messages in OpenAI format
	messages = [
	{"role": "system", "content": "You are a helpful AI assistant for a telecom service. Answer questions clearly and concisely."},
	{"role": "user", "content": prompt}
	]

	payload = {
	"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature
	}

	logger.info(f"Sending request to endpoint: {ENDPOINT_URL[:30]}...")

	try:
	response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
	response.raise_for_status()

	result = response.json()
	response_text = result["choices"][0]["message"]["content"]
	return response_text

	except requests.exceptions.RequestException as e:
	error_msg = f"Error calling endpoint: {str(e)}"
	if hasattr(e, 'response') and e.response is not None:
	error_msg += f" - Status code: {e.response.status_code}, Response: {e.response.text}"
	logger.error(error_msg)
	return f"Error generating response: {str(e)}"