liuhaotian commited on
Commit
8f4dd40
·
1 Parent(s): 19421d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -522,8 +522,12 @@ def start_controller():
522
  return subprocess.Popen(controller_command)
523
 
524
 
525
- def start_worker(model_path: str):
526
  logger.info(f"Starting the model worker for the model {model_path}")
 
 
 
 
527
  worker_command = [
528
  "python",
529
  "-m",
@@ -535,9 +539,10 @@ def start_worker(model_path: str):
535
  "--model-path",
536
  model_path,
537
  "--model-name",
538
- "llava-v1.5-13b-4bit",
539
- "--load-4bit",
540
  ]
 
 
541
  return subprocess.Popen(worker_command)
542
 
543
 
@@ -586,8 +591,10 @@ if __name__ == "__main__":
586
  logger.info(f"args: {args}")
587
 
588
  model_path = "liuhaotian/llava-v1.5-13b"
 
 
589
 
590
- preload_models(model_path)
591
 
592
  controller_proc = start_controller()
593
  worker_proc = start_worker(model_path)
 
522
  return subprocess.Popen(controller_command)
523
 
524
 
525
+ def start_worker(model_path: str, bits=16):
526
  logger.info(f"Starting the model worker for the model {model_path}")
527
+ model_name = model_path.strip('/').split('/')[-1]
528
+ assert bits in [4, 8, 16], "It can be only loaded with 16-bit, 8-bit, and 4-bit."
529
+ if bits != 16:
530
+ model_name += f'-{bits}bit'
531
  worker_command = [
532
  "python",
533
  "-m",
 
539
  "--model-path",
540
  model_path,
541
  "--model-name",
542
+ model_name,
 
543
  ]
544
+ if bits != 16:
545
+ worker_command += [f'--load-{bits}bit']
546
  return subprocess.Popen(worker_command)
547
 
548
 
 
591
  logger.info(f"args: {args}")
592
 
593
  model_path = "liuhaotian/llava-v1.5-13b"
594
+ bits = 4
595
+ # set bits=4 for T4, bits=8 for A10G (24G), and bits=16 for A100 (40G)
596
 
597
+ preload_models(model_path, bits=bits)
598
 
599
  controller_proc = start_controller()
600
  worker_proc = start_worker(model_path)