#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse from urllib.parse import urlparse from pathlib import Path import requests import pandas as pd from project_settings import project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--audio_file", default="audio.xlsx", type=str, ) parser.add_argument( "--output_dir", default=(project_path / "temp/audio_trim/origin").as_posix(), type=str, ) args = parser.parse_args() return args def main(): args = get_args() output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) df = pd.read_excel(args.audio_file) for i, row in df.iterrows(): name = row["name"] scene_id = row["scene_id"] audio_id = row["audio_id"] audio_url = row["audio_url"] schema = urlparse(audio_url) path = schema.path filename = output_dir / path[1:] filename.parent.mkdir(parents=True, exist_ok=True) resp = requests.get(audio_url) with open(filename.as_posix(), "wb") as f: f.write(resp.content) return if __name__ == "__main__": main()