Spaces:

cpg716
/

staffmanager-llama4-scout

Running

App Files Files Community

staffmanager-llama4-scout / app.py

cpg716

Update app.py

b949952 verified 19 days ago

raw

history blame contribute delete

6.88 kB

	import gradio as gr
	import torch
	import sys
	import traceback
	import os
	from huggingface_hub import login, list_repo_files

	def system_info():
	try:
	import transformers

	result = []
	result.append(f"Python version: {sys.version}")
	result.append(f"PyTorch version: {torch.__version__}")
	result.append(f"Transformers version: {transformers.__version__}")

	# Check GPU availability
	if torch.cuda.is_available():
	result.append(f"GPU available: {torch.cuda.get_device_name(0)}")
	result.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
	else:
	result.append("No GPU available")

	result.append("System info test successful!")

	return "\n".join(result)
	except Exception as e:
	return f"Error: {str(e)}\n\n{traceback.format_exc()}"

	def list_gemma3_files():
	try:
	result = []
	result.append("Listing files in Gemma 3 repository...")

	# Get token from environment
	token = os.environ.get("HUGGINGFACE_TOKEN", "")
	if token:
	result.append(f"Token found: {token[:5]}...")
	else:
	result.append("No token found in environment variables!")
	return "\n".join(result)

	# Login to Hugging Face
	try:
	login(token=token)
	result.append("Successfully logged in to Hugging Face Hub")
	except Exception as e:
	result.append(f"Error logging in: {e}")
	return "\n".join(result)

	# List files in the repository
	model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
	result.append(f"Listing files in {model_id}...")

	files = list_repo_files(model_id, token=token)
	result.append("Files found:")
	for file in files:
	result.append(f"- {file}")

	return "\n".join(result)
	except Exception as e:
	return f"Error: {str(e)}\n\n{traceback.format_exc()}"

	def test_gemma3():
	try:
	result = []
	result.append("Testing Gemma 3 model...")

	# Get token from environment
	token = os.environ.get("HUGGINGFACE_TOKEN", "")
	if token:
	result.append(f"Token found: {token[:5]}...")
	else:
	result.append("No token found in environment variables!")
	return "\n".join(result)

	# Login to Hugging Face
	try:
	login(token=token)
	result.append("Successfully logged in to Hugging Face Hub")
	except Exception as e:
	result.append(f"Error logging in: {e}")
	return "\n".join(result)

	# Use Gemma 3 GGUF model
	model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"

	# First, list the files to find the correct filename
	result.append(f"Listing files in {model_id} to find the model file...")
	from huggingface_hub import list_repo_files

	files = list_repo_files(model_id, token=token)
	gguf_files = [f for f in files if f.endswith('.gguf')]

	if not gguf_files:
	result.append("No GGUF files found in the repository!")
	return "\n".join(result)

	model_filename = gguf_files[0] # Use the first GGUF file found
	result.append(f"Found model file: {model_filename}")

	result.append(f"Downloading {model_id}/{model_filename}...")
	from huggingface_hub import hf_hub_download

	model_path = hf_hub_download(
	repo_id=model_id,
	filename=model_filename,
	token=token
	)
	result.append(f"Model downloaded to: {model_path}")

	# Load the model
	result.append("Loading model...")
	try:
	import llama_cpp
	except ImportError:
	result.append("llama-cpp-python not installed. Installing now...")
	import subprocess
	subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python"])
	import llama_cpp

	from llama_cpp import Llama

	llm = Llama(
	model_path=model_path,
	n_ctx=2048, # Context window size
	n_gpu_layers=-1 # Use all available GPU layers
	)

	# Generate text
	result.append("Generating text...")
	prompt = "Write a short poem about artificial intelligence."

	output = llm(
	prompt,
	max_tokens=100,
	temperature=0.7,
	top_p=0.95,
	echo=False
	)

	generated_text = output["choices"][0]["text"]
	result.append(f"Generated text: {generated_text}")
	result.append("Gemma 3 test successful!")

	return "\n".join(result)
	except Exception as e:
	return f"Error: {str(e)}\n\n{traceback.format_exc()}"

	# Create Gradio interface
	with gr.Blocks(title="StaffManager AI Assistant") as demo:
	gr.Markdown("# StaffManager AI Assistant")
	gr.Markdown("Testing Gemma 3 model for StaffManager application.")

	with gr.Tab("System Info"):
	with gr.Row():
	with gr.Column():
	info_button = gr.Button("Get System Info")
	with gr.Column():
	info_result = gr.Textbox(label="System Information", lines=10)

	info_button.click(
	fn=system_info,
	inputs=[],
	outputs=[info_result]
	)

	with gr.Tab("List Gemma 3 Files"):
	with gr.Row():
	with gr.Column():
	list_files_button = gr.Button("List Gemma 3 Files")
	with gr.Column():
	list_files_result = gr.Textbox(label="Files in Repository", lines=20)

	list_files_button.click(
	fn=list_gemma3_files,
	inputs=[],
	outputs=[list_files_result]
	)

	with gr.Tab("Gemma 3 Test"):
	with gr.Row():
	with gr.Column():
	gemma_button = gr.Button("Test Gemma 3")
	with gr.Column():
	gemma_result = gr.Textbox(label="Test Results", lines=20)

	gemma_button.click(
	fn=test_gemma3,
	inputs=[],
	outputs=[gemma_result]
	)

	with gr.Tab("About"):
	gr.Markdown("""
	## About StaffManager AI Assistant

	This Space tests the Gemma 3 model for the StaffManager application.

	- Gemma 3: Google's 27B parameter model in GGUF format for efficient inference

	This model requires authentication with a Hugging Face token that has been granted access to the model.
	""")

	# Launch the app
	demo.launch()