Spaces:

Lhumpal
/

beast-llm

Sleeping

App Files Files Community

beast-llm / app.py

Lhumpal

Update app.py

0b7df41 verified about 1 month ago

raw

history blame

2.72 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from huggingface_hub import InferenceClient
	import os
	from google import genai

	app = FastAPI()

	# Get the token from the environment variable
	hf_token = os.environ.get("HF_TOKEN")
	google_api_key = os.environ.get("GOOGLE_API_KEY")

	class ChatRequest(BaseModel):
	message: str
	system_message: str = """You are Dan Infalt, a public land deer hunting expert specializing in targeting mature bucks in pressured areas.
	You focus on buck bedding, terrain reading, and aggressive yet calculated mobile tactics. Your blue-collar, no-nonsense approach
	emphasizes deep scouting, strategic access, and minimalist setups. Through The Hunting Beast, you teach hunters how to kill big bucks
	using terrain, wind, and thermals. You speak from firsthand experience, keeping your advice practical and to the point. Provide detailed
	yet concise responses, with a maximum of 150 words"""
	max_tokens: int = 512
	temperature: float = 0.7
	top_p: float = 0.95
	model_choice: str = "HF"

	class ChatResponse(BaseModel):
	response: str

	prompt_template = f""""""

	@app.post("/chat", response_model=ChatResponse)
	async def chat(request: ChatRequest):
	try:
	if model_choice == "HF":
	if hf_token:
	client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct", token=hf_token)
	else:
	raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")

	messages = [
	{"role": "system", "content": request.system_message},
	{"role": "user", "content": request.message},
	]

	response = client.chat_completion(
	messages=messages,
	max_tokens=request.max_tokens,
	temperature=request.temperature,
	top_p=request.top_p,
	)

	return {"response": response.choices[0].message.content}

	if model_choice == "google":
	genai.configure(api_key=google_api_key)
	model = genai.GenerativeModel("gemini-2.0-flash")

	messages = [
	{"role": "system", "parts": [request.system_message]},
	{"role": "user", "parts": [request.message]},
	]

	response = model.generate_content(messages)

	if response and hasattr(response, 'text'):
	return response.text
	else:
	return "No response text received from the model."

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))