Spaces:

bennoweck
/

NatureLM-audio

Running on Zero

Bomme commited on 2 days ago

Commit

af70a95

1 Parent(s): 993b9ab

bring back the config file

Files changed (2) hide show

app.py CHANGED Viewed

@@ -8,26 +8,14 @@ import gradio as gr
 import spaces
 import torch
 from NatureLM.models.NatureLM import NatureLM
 from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
-CONFIG = None
 MODEL: NatureLM = None
-class DummyConfig:
-    def __init__(self):
-        self.generate = {
-            "max_new_tokens": 300,
-            "num_beams": 2,
-            "do_sample": False,
-            "min_length": 1,
-            "temperature": 0.1,
-            "repetition_penalty": 1.0,
-            "length_penalty": 1.0,
-        }
 @spaces.GPU
 def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
     cuda_enabled = torch.cuda.is_available()
@@ -277,8 +265,12 @@ def _long_recording_tab():
     )
-def main(assets_dir: Path, device: str = "cuda"):
-    cfg = DummyConfig()
     model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
     model.to(device)
     model.eval()
@@ -335,7 +327,12 @@ if __name__ == "__main__":
         default=Path(__file__).parent / "assets",
         help="Directory containing the assets (favicon, examples, etc.)",
     )
     args = parser.parse_args()
-    main(args.assets_dir)

 import spaces
 import torch
+from NatureLM.config import Config
 from NatureLM.models.NatureLM import NatureLM
 from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
+CONFIG: Config = None
 MODEL: NatureLM = None
 @spaces.GPU
 def prompt_lm(audios: list[str], messages: list[dict[str, str]]):
     cuda_enabled = torch.cuda.is_available()
     )
+def main(
+    assets_dir: Path,
+    cfg_path: str | Path,
+    device: str = "cuda",
+):
+    cfg = Config.from_sources(yaml_file=cfg_path)
     model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio")
     model.to(device)
     model.eval()
         default=Path(__file__).parent / "assets",
         help="Directory containing the assets (favicon, examples, etc.)",
     )
+    parser.add_argument(
+        "--cfg-path",
+        type=str,
+        default=Path(__file__).parent / "configs/inference.yml",
+        help="Path to the config file",
+    )
     args = parser.parse_args()
+    main(args.assets_dir, args.cfg_path)

configs/inference.yml ADDED Viewed

+model:
+  llama_path: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+  freeze_beats: True
+  use_audio_Qformer: True
+  max_pooling: False
+  downsample_factor: 8
+  freeze_audio_QFormer: False
+  window_level_Qformer: True
+  num_audio_query_token: 1
+  second_per_window: 0.333333
+  second_stride: 0.333333
+  audio_llama_proj_model: ""
+  freeze_audio_llama_proj: False
+  lora: True
+  lora_rank: 32
+  lora_alpha: 32
+  lora_dropout: 0.1
+  prompt_template: "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+  max_txt_len: 160
+  end_sym: <|end_of_text|>
+  beats_cfg:
+    input_patch_size: 16
+    embed_dim: 512
+    conv_bias: False
+    encoder_layers: 12
+    encoder_embed_dim: 768
+    encoder_ffn_embed_dim: 3072
+    encoder_attention_heads: 12
+    activation_fn: "gelu"
+    layer_wise_gradient_decay_ratio: 0.6
+    layer_norm_first: False
+    deep_norm: True
+    dropout: 0.0
+    attention_dropout: 0.0
+    activation_dropout: 0.0
+    encoder_layerdrop: 0.05
+    dropout_input: 0.0
+    conv_pos: 128
+    conv_pos_groups: 16
+    relative_position_embedding: True
+    num_buckets: 320
+    max_distance: 800
+    gru_rel_pos: True
+    finetuned_model: True
+    predictor_dropout: 0.0
+    predictor_class: 527
+generate:
+  max_new_tokens: 300
+  num_beams: 2
+  do_sample: False
+  min_length: 1
+  temperature: 0.1
+  repetition_penalty: 1.0
+  length_penalty: 1.0