liuhaotian commited on
Commit
6c799d8
·
1 Parent(s): ff48163

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -324,7 +324,16 @@ title_markdown = """
324
  # 🌋 LLaVA: Large Language and Vision Assistant
325
  [[Project Page]](https://llava-vl.github.io) [[Paper]](https://arxiv.org/abs/2304.08485) [[Code]](https://github.com/haotian-liu/LLaVA) [[Model]](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)
326
 
327
- ONLY WORKS WITH GPU! 8-bit is used by default to load 13B model in A10G-Large (24G). Set environment `bits` to 4 to load it in T4-medium (15G).
 
 
 
 
 
 
 
 
 
328
  """
329
 
330
  tos_markdown = """
@@ -593,13 +602,13 @@ if __name__ == "__main__":
593
  model_path = "liuhaotian/llava-v1.5-13b"
594
  bits = int(os.getenv("bits", 8))
595
 
596
- preload_models(model_path)
597
 
598
  controller_proc = start_controller()
599
  worker_proc = start_worker(model_path, bits=bits)
600
 
601
  # Wait for worker and controller to start
602
- time.sleep(10)
603
 
604
  try:
605
  start_demo(args)
 
324
  # 🌋 LLaVA: Large Language and Vision Assistant
325
  [[Project Page]](https://llava-vl.github.io) [[Paper]](https://arxiv.org/abs/2304.08485) [[Code]](https://github.com/haotian-liu/LLaVA) [[Model]](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)
326
 
327
+ ONLY WORKS WITH GPU!
328
+
329
+ You can load the model with 8-bit or 4-bit quantization to make it fit in smaller hardwares. Setting the environment variable `bits` to control the quantization.
330
+
331
+ Recommended configurations:
332
+ | Hardware | Bits |
333
+ |--------------------|----------------|
334
+ | A10G-Large (24G) | 8 (default) |
335
+ | T4-Medium (15G) | 4 |
336
+ | A100-Large (40G) | 16 |
337
  """
338
 
339
  tos_markdown = """
 
602
  model_path = "liuhaotian/llava-v1.5-13b"
603
  bits = int(os.getenv("bits", 8))
604
 
605
+ # preload_models(model_path)
606
 
607
  controller_proc = start_controller()
608
  worker_proc = start_worker(model_path, bits=bits)
609
 
610
  # Wait for worker and controller to start
611
+ # time.sleep(10)
612
 
613
  try:
614
  start_demo(args)