Spaces:

thyecust
/

vllm-ui

Sleeping

App Files Files Community

vllm-ui / app.py

thyecust

Add application file

8c56112 19 days ago

raw

history blame contribute delete

5.46 kB

	import gradio as gr
	import json

	def generate_vllm_command(model, task="auto", tokenizer=None, dtype="auto", tensor_parallel_size=1,
	max_model_len=None, quantization=None, device="auto", **kwargs):
	command = ["vllm", "serve"]

	# Add required arguments
	if model:
	command.extend(["--model", model])

	# Add optional arguments
	if task != "auto":
	command.extend(["--task", task])

	if tokenizer:
	command.extend(["--tokenizer", tokenizer])

	if dtype != "auto":
	command.extend(["--dtype", dtype])

	if tensor_parallel_size != 1:
	command.extend(["--tensor-parallel-size", str(tensor_parallel_size)])

	if max_model_len:
	command.extend(["--max-model-len", str(max_model_len)])

	if quantization and quantization != "None":
	command.extend(["--quantization", quantization])

	if device != "auto":
	command.extend(["--device", device])

	# Add any additional arguments
	for key, value in kwargs.items():
	if value:
	# Convert underscores back to hyphens for command line
	arg_name = f"--{key.replace('_', '-')}"
	command.extend([arg_name, str(value)])

	return " ".join(command)

	# Define the interface
	with gr.Blocks(title="VLLM Command Generator") as app:
	gr.Markdown("# VLLM Command Generator")
	gr.Markdown("Fill out the form to generate a vllm serve command")

	with gr.Tab("Basic Settings"):
	model = gr.Textbox(label="Model Path or Name", placeholder="e.g., meta-llama/Llama-2-7b-chat-hf")
	task = gr.Dropdown(
	choices=["auto", "generate", "embedding", "embed", "classify", "score", "reward", "transcription"],
	value="auto",
	label="Task"
	)
	tokenizer = gr.Textbox(label="Tokenizer (optional)", placeholder="Path to tokenizer")
	dtype = gr.Dropdown(
	choices=["auto", "half", "float16", "bfloat16", "float", "float32"],
	value="auto",
	label="Data Type"
	)
	device = gr.Dropdown(
	choices=["auto", "cuda", "neuron", "cpu", "tpu", "xpu", "hpu"],
	value="auto",
	label="Device"
	)

	with gr.Tab("Performance Settings"):
	tensor_parallel_size = gr.Slider(minimum=1, maximum=8, value=1, step=1, label="Tensor Parallel Size")
	max_model_len = gr.Number(label="Max Model Length", precision=0)
	quantization = gr.Dropdown(
	choices=[
	"None", "aqlm", "awq", "deepspeedfp", "tpu_int8", "fp8", "ptpc_fp8", "fbgemm_fp8",
	"modelopt", "nvfp4", "marlin", "bitblas", "gguf", "gptq_marlin_24", "gptq_marlin",
	"gptq_bitblas", "awq_marlin", "gptq", "compressed-tensors", "bitsandbytes",
	"qqq", "hqq", "experts_int8", "neuron_quant", "ipex", "quark", "moe_wna16", "torchao"
	],
	value="None",
	label="Quantization"
	)
	gpu_memory_utilization = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="GPU Memory Utilization")

	with gr.Tab("Advanced Settings"):
	trust_remote_code = gr.Checkbox(label="Trust Remote Code")
	max_num_batched_tokens = gr.Number(label="Max Number of Batched Tokens", precision=0)
	max_num_seqs = gr.Number(label="Max Number of Sequences", precision=0)
	seed = gr.Number(label="Random Seed", precision=0)
	additional_args = gr.Textbox(
	label="Additional Arguments (JSON format)",
	placeholder='{"arg1": "value1", "arg2": "value2"}'
	)

	output = gr.Textbox(label="Generated Command")

	def process_form(args, *kwargs):
	# Extract named arguments from the form
	form_data = {
	"model": args[0],
	"task": args[1],
	"tokenizer": args[2],
	"dtype": args[3],
	"device": args[4],
	"tensor_parallel_size": args[5],
	"max_model_len": args[6],
	"quantization": args[7],
	"gpu_memory_utilization": args[8],
	"trust_remote_code": args[9],
	"max_num_batched_tokens": args[10],
	"max_num_seqs": args[11],
	"seed": args[12]
	}

	# Process additional arguments
	additional_args_text = args[13]
	additional_args = {}
	if additional_args_text:
	try:
	additional_args = json.loads(additional_args_text)
	except json.JSONDecodeError:
	return "Error: Additional arguments must be valid JSON"

	# Filter out None values
	form_data = {k: v for k, v in form_data.items() if v is not None and v != ""}

	# Add boolean flags correctly
	if form_data.get("trust_remote_code"):
	form_data["trust_remote_code"] = ""

	# Merge with additional args
	form_data.update(additional_args)

	# Generate command
	return generate_vllm_command(**form_data)

	submit_btn = gr.Button("Generate Command")
	submit_btn.click(
	process_form,
	inputs=[
	model, task, tokenizer, dtype, device,
	tensor_parallel_size, max_model_len, quantization, gpu_memory_utilization,
	trust_remote_code, max_num_batched_tokens, max_num_seqs, seed,
	additional_args
	],
	outputs=output
	)

	# Launch the app
	if __name__ == "__main__":
	app.launch()