Phil Sobrepena commited on
Commit
9ea24c7
·
1 Parent(s): f47eaa6

docker dependencies

Browse files
Files changed (2) hide show
  1. Dockerfile +18 -10
  2. app.py +0 -128
Dockerfile CHANGED
@@ -4,7 +4,8 @@ WORKDIR /code
4
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
- python3.9 \
 
8
  python3-pip \
9
  git \
10
  ffmpeg \
@@ -12,19 +13,25 @@ RUN apt-get update && apt-get install -y \
12
  libxext6 \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
- # Clone MMAudio and install dependencies
16
- RUN git clone https://github.com/hkchengrex/MMAudio.git && \
17
- cd MMAudio && \
18
- # Install PyTorch first as specified in README
19
- pip3 install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 && \
20
- # Install additional dependencies
21
- pip3 install -r requirements.txt && \
22
- # Install MMAudio
23
- pip3 install -e .
24
 
25
  # Set working directory to MMAudio
26
  WORKDIR /code/MMAudio
27
 
 
 
 
 
 
 
 
28
  # Create output directory
29
  RUN mkdir -p output/gradio && chmod 777 output/gradio
30
 
@@ -32,6 +39,7 @@ RUN mkdir -p output/gradio && chmod 777 output/gradio
32
  ENV PYTHONUNBUFFERED=1
33
  ENV GRADIO_SERVER_NAME=0.0.0.0
34
  ENV GRADIO_SERVER_PORT=7860
 
35
 
36
  # Expose Gradio port
37
  EXPOSE 7860
 
4
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
+ python3.10 \
8
+ python3.10-distutils \
9
  python3-pip \
10
  git \
11
  ffmpeg \
 
13
  libxext6 \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
+ # Ensure we're using Python 3.10
17
+ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
18
+
19
+ # Install pip for Python 3.10
20
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
21
+
22
+ # Clone MMAudio
23
+ RUN git clone https://github.com/hkchengrex/MMAudio.git
 
24
 
25
  # Set working directory to MMAudio
26
  WORKDIR /code/MMAudio
27
 
28
+ # Install dependencies
29
+ RUN pip3 install --no-cache-dir numpy && \
30
+ pip3 install --no-cache-dir torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 && \
31
+ pip3 install --no-cache-dir colorlog && \
32
+ pip3 install --no-cache-dir -r requirements.txt && \
33
+ pip3 install -e .
34
+
35
  # Create output directory
36
  RUN mkdir -p output/gradio && chmod 777 output/gradio
37
 
 
39
  ENV PYTHONUNBUFFERED=1
40
  ENV GRADIO_SERVER_NAME=0.0.0.0
41
  ENV GRADIO_SERVER_PORT=7860
42
+ ENV PYTHONPATH=/code/MMAudio
43
 
44
  # Expose Gradio port
45
  EXPOSE 7860
app.py DELETED
@@ -1,128 +0,0 @@
1
- import gc
2
- import logging
3
- from datetime import datetime
4
- from fractions import Fraction
5
- from pathlib import Path
6
-
7
- import gradio as gr
8
- import torch
9
- import torchaudio
10
-
11
- from mmaudio.eval_utils import (ModelConfig, VideoInfo, all_model_cfg, generate, load_image,
12
- load_video, make_video, setup_eval_logging)
13
- from mmaudio.model.flow_matching import FlowMatching
14
- from mmaudio.model.networks import MMAudio, get_my_mmaudio
15
- from mmaudio.model.sequence_config import SequenceConfig
16
- from mmaudio.model.utils.features_utils import FeaturesUtils
17
-
18
- # Setup logging
19
- setup_eval_logging()
20
- log = logging.getLogger()
21
-
22
- # Configure device and dtype
23
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
24
- if device == 'cpu':
25
- log.warning('CUDA is not available, running on CPU')
26
- dtype = torch.bfloat16
27
-
28
- # Configure model and paths
29
- model: ModelConfig = all_model_cfg['large_44k_v2']
30
- model.download_if_needed()
31
- output_dir = Path('./output/gradio')
32
- output_dir.mkdir(exist_ok=True, parents=True)
33
-
34
- def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
35
- seq_cfg = model.seq_cfg
36
-
37
- net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
38
- net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
39
- log.info(f'Loaded weights from {model.model_path}')
40
-
41
- feature_utils = FeaturesUtils(tod_vae_ckpt=model.vae_path,
42
- synchformer_ckpt=model.synchformer_ckpt,
43
- enable_conditions=True,
44
- mode=model.mode,
45
- bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
46
- need_vae_encoder=False)
47
- feature_utils = feature_utils.to(device, dtype).eval()
48
-
49
- return net, feature_utils, seq_cfg
50
-
51
- # Load model once at startup
52
- net, feature_utils, seq_cfg = get_model()
53
-
54
- @torch.inference_mode()
55
- def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
56
- cfg_strength: float, duration: float):
57
- try:
58
- rng = torch.Generator(device=device)
59
- if seed >= 0:
60
- rng.manual_seed(seed)
61
- else:
62
- rng.seed()
63
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
64
-
65
- video_info = load_video(video, duration)
66
- clip_frames = video_info.clip_frames.unsqueeze(0)
67
- sync_frames = video_info.sync_frames.unsqueeze(0)
68
- duration = video_info.duration_sec
69
-
70
- seq_cfg.duration = duration
71
- net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
72
-
73
- audios = generate(clip_frames, sync_frames, [prompt],
74
- negative_text=[negative_prompt],
75
- feature_utils=feature_utils,
76
- net=net,
77
- fm=fm,
78
- rng=rng,
79
- cfg_strength=cfg_strength)
80
- audio = audios.float().cpu()[0]
81
-
82
- current_time_string = datetime.now().strftime('%Y%m%d_%H%M%S')
83
- video_save_path = output_dir / f'{current_time_string}.mp4'
84
- make_video(video_info, video_save_path, audio, sampling_rate=seq_cfg.sampling_rate)
85
-
86
- gc.collect()
87
- torch.cuda.empty_cache()
88
-
89
- return video_save_path
90
- except Exception as e:
91
- log.error(f"Error in video_to_audio: {str(e)}")
92
- raise gr.Error(f"An error occurred: {str(e)}")
93
-
94
- # Create the Gradio interface
95
- demo = gr.Interface(
96
- fn=video_to_audio,
97
- title="MMAudio — Video-to-Audio Synthesis",
98
- description="""
99
- Generate realistic audio for your videos using MMAudio!
100
-
101
- Project page: [MMAudio](https://hkchengrex.com/MMAudio/)
102
- Code: [GitHub](https://github.com/hkchengrex/MMAudio)
103
-
104
- Note: Processing high-resolution videos (>384px on shorter side) takes longer and doesn't improve results.
105
- """,
106
- inputs=[
107
- gr.Video(label="Upload Video"),
108
- gr.Text(label="Prompt", placeholder="Describe the audio you want to generate..."),
109
- gr.Text(label="Negative prompt", value="music", placeholder="What you don't want in the audio..."),
110
- gr.Number(label="Seed (-1: random)", value=-1, precision=0, minimum=-1),
111
- gr.Number(label="Number of steps", value=25, precision=0, minimum=1),
112
- gr.Slider(label="Guidance Strength", value=4.5, minimum=1, maximum=10, step=0.5),
113
- gr.Slider(label="Duration (seconds)", value=8, minimum=1, maximum=30, step=1),
114
- ],
115
- outputs=gr.Video(label="Generated Result"),
116
- examples=[
117
- ["https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_beach.mp4",
118
- "waves, seagulls", "", 0, 25, 4.5, 10],
119
- ["https://huggingface.co/hkchengrex/MMAudio/resolve/main/examples/sora_serpent.mp4",
120
- "", "music", 0, 25, 4.5, 10],
121
- ],
122
- cache_examples=True,
123
- )
124
-
125
- # Launch the app
126
- if __name__ == "__main__":
127
- demo.launch(server_name="0.0.0.0", server_port=7860)
128
-