Spaces:

tianyaogavin
/

faster-whisper-small

Running

App Files Files Community

tianyaogavin commited on Apr 24

Commit

25666e3

1 Parent(s): 5b39f9e

aggr voice segment test

Browse files

Files changed (14) hide show

dataset/transcripts/test1_segment_10_aggregated_9_to_15_20250424_111404.json +132 -0
dataset/transcripts/test1_segment_10_aggregated_9_to_16_20250424_111418.json +158 -0
dataset/transcripts/test1_segment_17_aggregated_8_to_9_20250424_105049.json +24 -0
dataset/transcripts/test1_segment_1_20250423_163518.json +168 -0
dataset/transcripts/test1_segment_1_20250423_190011.json +168 -0
dataset/transcripts/test1_segment_1_20250423_190044.json +168 -0
dataset/transcripts/test1_segment_1_20250423_201934.json +168 -0
dataset/transcripts/test1_segment_1_aggregated_0_to_8_20250424_111323.json +166 -0
dataset/transcripts/test1_segment_8_aggregated_7_to_8_20250424_110257.json +32 -0
dataset/transcripts/test1_segment_8_aggregated_7_to_8_20250424_110758.json +32 -0
dataset/transcripts/test1_segment_9_aggregated_8_to_9_20250424_110138.json +32 -0
vad/README.md +32 -5
vad/audio_transcriber.py +53 -5
vad/main.py +168 -3

dataset/transcripts/test1_segment_10_aggregated_9_to_15_20250424_111404.json ADDED Viewed

	@@ -0,0 +1,132 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_10_aggregated_9_to_15.wav",
+  "timestamp": "20250424_111404",
+  "segments": [
+    {
+      "text": "你会学习到如何使用音频数据集",
+      "start_time": 0.0,
+      "end_time": 1.62,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    },
+    {
+      "text": "包括音频数据加载",
+      "start_time": 1.62,
+      "end_time": 3.0,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    },
+    {
+      "text": "音频数据预处理",
+      "start_time": 3.0,
+      "end_time": 3.86,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    },
+    {
+      "text": "以及高效加载大规模音频数据集的流逝加载方法",
+      "start_time": 3.86,
+      "end_time": 7.2,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    },
+    {
+      "text": "完成本单元的学习后",
+      "start_time": 7.2,
+      "end_time": 8.28,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    },
+    {
+      "text": "你会掌握基础的音频相关数",
+      "start_time": 8.28,
+      "end_time": 10.88,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    },
+    {
+      "text": "并且掌握针对不同应用的音频数据处理工具",
+      "start_time": 10.88,
+      "end_time": 14.42,
+      "confidence": 0.862060546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15
+      ]
+    }
+  ]
+}

dataset/transcripts/test1_segment_10_aggregated_9_to_16_20250424_111418.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_10_aggregated_9_to_16.wav",
+  "timestamp": "20250424_111418",
+  "segments": [
+    {
+      "text": "你会学习到如何使用音频数据集",
+      "start_time": 0.0,
+      "end_time": 1.62,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "包括音频数据加载",
+      "start_time": 1.62,
+      "end_time": 3.0,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "音频数据预处理",
+      "start_time": 3.0,
+      "end_time": 3.86,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "以及高效加载大规模音频数据集的流逝加载方法",
+      "start_time": 3.86,
+      "end_time": 7.2,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "完成本单元的学习后",
+      "start_time": 7.2,
+      "end_time": 8.28,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "你会掌握基础的音频相关数",
+      "start_time": 8.28,
+      "end_time": 10.88,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "并且掌握针对不同应用的音频数据处理工具",
+      "start_time": 10.88,
+      "end_time": 14.42,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    },
+    {
+      "text": "本单元的知识会成为后面章节的基础",
+      "start_time": 14.42,
+      "end_time": 16.8,
+      "confidence": 0.8543701171875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16
+      ]
+    }
+  ]
+}

dataset/transcripts/test1_segment_17_aggregated_8_to_9_20250424_105049.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_17_aggregated_8_to_9.wav",
+  "timestamp": "20250424_105049",
+  "segments": [
+    {
+      "text": "本单元的知识会成为后面章节的基础",
+      "start_time": 0.0,
+      "end_time": 2.32,
+      "confidence": 0.657958984375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据处理",
+      "start_time": 2.32,
+      "end_time": 3.56,
+      "confidence": 0.657958984375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    }
+  ]
+}

dataset/transcripts/test1_segment_1_20250423_163518.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_1.wav",
+  "timestamp": "20250423_163518",
+  "segments": [
+    {
+      "text": "第一單元",
+      "start_time": 3.26,
+      "end_time": 3.9,
+      "confidence": 0.546142578125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据处理",
+      "start_time": 4.34,
+      "end_time": 5.74,
+      "confidence": 0.302734375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "单元简介",
+      "start_time": 7.1,
+      "end_time": 7.859999999999999,
+      "confidence": 0.642578125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "所有音频或语音相关的任务都需要使用音频文件",
+      "start_time": 8.8,
+      "end_time": 12.4,
+      "confidence": 0.93402099609375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "在我们深入了解这些任务之前,我们需要了解音频文件的实际内容。",
+      "start_time": 12.8,
+      "end_time": 16.8,
+      "confidence": 0.844482421875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "以及如何利用一瓶文件",
+      "start_time": 17.32,
+      "end_time": 18.72,
+      "confidence": 0.802001953125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本来原将为你介绍的",
+      "start_time": 19.76,
+      "end_time": 21.040000000000003,
+      "confidence": 0.7650146484375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 21.62,
+      "end_time": 25.62,
+      "confidence": 0.87860107421875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "包括波形彩虹绿和冰补涂",
+      "start_time": 26.28,
+      "end_time": 28.080000000000002,
+      "confidence": 0.93768310546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会学习到如何使用音频数语集",
+      "start_time": 28.56,
+      "end_time": 30.36,
+      "confidence": 0.90057373046875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "包括音频数语加载",
+      "start_time": 30.36,
+      "end_time": 31.599999999999998,
+      "confidence": 0.90057373046875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据预处理以及",
+      "start_time": 31.98,
+      "end_time": 33.22,
+      "confidence": 0.597412109375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "高效加载大规模音频数据集的流式加载方法",
+      "start_time": 33.54,
+      "end_time": 36.54,
+      "confidence": 0.76708984375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "完成本大圆的学习后",
+      "start_time": 37.82,
+      "end_time": 38.94,
+      "confidence": 0.88128662109375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会掌握",
+      "start_time": 39.34,
+      "end_time": 40.34,
+      "confidence": 0.375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "基础的音频相关数",
+      "start_time": 40.86,
+      "end_time": 42.38,
+      "confidence": 0.30810546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "并且掌握针对不同应用的音频数据处理工具",
+      "start_time": 43.04,
+      "end_time": 46.6,
+      "confidence": 0.9736175537109375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本大员的支持会成为后面章节的基础",
+      "start_time": 47.5,
+      "end_time": 49.8,
+      "confidence": 0.82470703125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    }
+  ]
+}

dataset/transcripts/test1_segment_1_20250423_190011.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_1.wav",
+  "timestamp": "20250423_190011",
+  "segments": [
+    {
+      "text": "第一单元",
+      "start_time": 3.26,
+      "end_time": 3.9,
+      "confidence": 0.85302734375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据出来",
+      "start_time": 4.34,
+      "end_time": 5.56,
+      "confidence": 0.4482421875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "单元简介",
+      "start_time": 7.1,
+      "end_time": 7.8,
+      "confidence": 0.854736328125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "所有音频或语音相关的任务都需要使用音频文件",
+      "start_time": 8.8,
+      "end_time": 12.4,
+      "confidence": 0.981781005859375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "在我们深入了解这些任务之前",
+      "start_time": 12.8,
+      "end_time": 14.600000000000001,
+      "confidence": 0.8140869140625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "我们需要了解音频文件的实际内容",
+      "start_time": 14.600000000000001,
+      "end_time": 16.78,
+      "confidence": 0.8140869140625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "以及如何利用音频文件",
+      "start_time": 17.32,
+      "end_time": 18.68,
+      "confidence": 0.793212890625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元将为你介绍的",
+      "start_time": 19.76,
+      "end_time": 21.12,
+      "confidence": 0.852783203125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 21.62,
+      "end_time": 25.580000000000002,
+      "confidence": 0.9444580078125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "包括波形,彩虹率和冰普渡",
+      "start_time": 26.28,
+      "end_time": 28.28,
+      "confidence": 0.732666015625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会学习到如何使用音频数据集,包括音频数据加载。",
+      "start_time": 28.56,
+      "end_time": 31.56,
+      "confidence": 0.953521728515625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据处理以及",
+      "start_time": 31.98,
+      "end_time": 33.18,
+      "confidence": 0.685791015625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "高效加载大规模音频数据集的流逝加载方法。",
+      "start_time": 33.54,
+      "end_time": 36.5,
+      "confidence": 0.88739013671875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "完成本单元的学习后",
+      "start_time": 37.82,
+      "end_time": 38.94,
+      "confidence": 0.9327392578125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会掌握",
+      "start_time": 39.34,
+      "end_time": 40.34,
+      "confidence": 0.73193359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "基础的音频相关数",
+      "start_time": 40.86,
+      "end_time": 42.4,
+      "confidence": 0.609619140625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "并且掌握针对不同应用的音频数据处理工具",
+      "start_time": 43.04,
+      "end_time": 46.56,
+      "confidence": 0.96221923828125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元的知识会成为后面章节的基础",
+      "start_time": 47.5,
+      "end_time": 49.86,
+      "confidence": 0.75439453125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    }
+  ]
+}

dataset/transcripts/test1_segment_1_20250423_190044.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_1.wav",
+  "timestamp": "20250423_190044",
+  "segments": [
+    {
+      "text": "第一單元",
+      "start_time": 3.26,
+      "end_time": 3.9,
+      "confidence": 0.546142578125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据处理",
+      "start_time": 4.34,
+      "end_time": 5.74,
+      "confidence": 0.302734375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "单元简介",
+      "start_time": 7.1,
+      "end_time": 7.859999999999999,
+      "confidence": 0.642578125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "所有音频或语音相关的任务都需要使用音频文件",
+      "start_time": 8.8,
+      "end_time": 12.4,
+      "confidence": 0.93402099609375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "在我们深入了解这些任务之前,我们需要了解音频文件的实际内容。",
+      "start_time": 12.8,
+      "end_time": 16.8,
+      "confidence": 0.844482421875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "以及如何利用一瓶文件",
+      "start_time": 17.32,
+      "end_time": 18.72,
+      "confidence": 0.802001953125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本来原将为你介绍的",
+      "start_time": 19.76,
+      "end_time": 21.040000000000003,
+      "confidence": 0.7650146484375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 21.62,
+      "end_time": 25.62,
+      "confidence": 0.87860107421875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "包括波形彩虹绿和冰补涂",
+      "start_time": 26.28,
+      "end_time": 28.080000000000002,
+      "confidence": 0.93768310546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会学习到如何使用音频数语集",
+      "start_time": 28.56,
+      "end_time": 30.36,
+      "confidence": 0.90057373046875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "包括音频数语加载",
+      "start_time": 30.36,
+      "end_time": 31.599999999999998,
+      "confidence": 0.90057373046875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据预处理以及",
+      "start_time": 31.98,
+      "end_time": 33.22,
+      "confidence": 0.597412109375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "高效加载大规模音频数据集的流式加载方法",
+      "start_time": 33.54,
+      "end_time": 36.54,
+      "confidence": 0.76708984375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "完成本大圆的学习后",
+      "start_time": 37.82,
+      "end_time": 38.94,
+      "confidence": 0.88128662109375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会掌握",
+      "start_time": 39.34,
+      "end_time": 40.34,
+      "confidence": 0.375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "基础的音频相关数",
+      "start_time": 40.86,
+      "end_time": 42.38,
+      "confidence": 0.30810546875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "并且掌握针对不同应用的音频数据处理工具",
+      "start_time": 43.04,
+      "end_time": 46.6,
+      "confidence": 0.9736175537109375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本大员的支持会成为后面章节的基础",
+      "start_time": 47.5,
+      "end_time": 49.8,
+      "confidence": 0.82470703125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    }
+  ]
+}

dataset/transcripts/test1_segment_1_20250423_201934.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_1.wav",
+  "timestamp": "20250423_201934",
+  "segments": [
+    {
+      "text": "第一单元",
+      "start_time": 3.26,
+      "end_time": 3.9,
+      "confidence": 0.85302734375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据出来",
+      "start_time": 4.34,
+      "end_time": 5.56,
+      "confidence": 0.4482421875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "单元简介",
+      "start_time": 7.1,
+      "end_time": 7.8,
+      "confidence": 0.854736328125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "所有音频或语音相关的任务都需要使用音频文件",
+      "start_time": 8.8,
+      "end_time": 12.4,
+      "confidence": 0.981781005859375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "在我们深入了解这些任务之前",
+      "start_time": 12.8,
+      "end_time": 14.600000000000001,
+      "confidence": 0.8140869140625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "我们需要了解音频文件的实际内容",
+      "start_time": 14.600000000000001,
+      "end_time": 16.78,
+      "confidence": 0.8140869140625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "以及如何利用音频文件",
+      "start_time": 17.32,
+      "end_time": 18.68,
+      "confidence": 0.793212890625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元将为你介绍的",
+      "start_time": 19.76,
+      "end_time": 21.12,
+      "confidence": 0.852783203125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 21.62,
+      "end_time": 25.580000000000002,
+      "confidence": 0.9444580078125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "包括波形,彩虹率和冰普渡",
+      "start_time": 26.28,
+      "end_time": 28.28,
+      "confidence": 0.732666015625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会学习到如何使用音频数据集,包括音频数据加载。",
+      "start_time": 28.56,
+      "end_time": 31.56,
+      "confidence": 0.953521728515625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "音频数据处理以及",
+      "start_time": 31.98,
+      "end_time": 33.18,
+      "confidence": 0.685791015625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "高效加载大规模音频数据集的流逝加载方法。",
+      "start_time": 33.54,
+      "end_time": 36.5,
+      "confidence": 0.88739013671875,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "完成本单元的学习后",
+      "start_time": 37.82,
+      "end_time": 38.94,
+      "confidence": 0.9327392578125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "你会掌握",
+      "start_time": 39.34,
+      "end_time": 40.34,
+      "confidence": 0.73193359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "基础的音频相关数",
+      "start_time": 40.86,
+      "end_time": 42.4,
+      "confidence": 0.609619140625,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "并且掌握针对不同应用的音频数据处理工具",
+      "start_time": 43.04,
+      "end_time": 46.56,
+      "confidence": 0.96221923828125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    },
+    {
+      "text": "本单元的知识会成为后面章节的基础",
+      "start_time": 47.5,
+      "end_time": 49.86,
+      "confidence": 0.75439453125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null
+    }
+  ]
+}

dataset/transcripts/test1_segment_1_aggregated_0_to_8_20250424_111323.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_1_aggregated_0_to_8.wav",
+  "timestamp": "20250424_111323",
+  "segments": [
+    {
+      "text": "第一单元,音频数据处理单元简介",
+      "start_time": 0.0,
+      "end_time": 2.52,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "所有音频或语音相关的任务都需要使用音频文件",
+      "start_time": 2.52,
+      "end_time": 6.18,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "在我们深入了解这些任务之前",
+      "start_time": 6.18,
+      "end_time": 8.02,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "我们需要了解音频文件的实际内容",
+      "start_time": 8.02,
+      "end_time": 10.18,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "以及如何利用音频文件",
+      "start_time": 10.18,
+      "end_time": 11.52,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "本单元将为你介绍的",
+      "start_time": 11.52,
+      "end_time": 12.700000000000001,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 12.700000000000001,
+      "end_time": 16.9,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    },
+    {
+      "text": "包括波形、采样率和频谱图",
+      "start_time": 16.9,
+      "end_time": 18.68,
+      "confidence": 0.8226318359375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8
+      ]
+    }
+  ]
+}

dataset/transcripts/test1_segment_8_aggregated_7_to_8_20250424_110257.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_8_aggregated_7_to_8.wav",
+  "timestamp": "20250424_110257",
+  "segments": [
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 0.0,
+      "end_time": 3.96,
+      "confidence": 0.9649658203125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        7,
+        8
+      ]
+    },
+    {
+      "text": "包括波形、采样率和频谱图",
+      "start_time": 3.96,
+      "end_time": 5.76,
+      "confidence": 0.9649658203125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        7,
+        8
+      ]
+    }
+  ]
+}

dataset/transcripts/test1_segment_8_aggregated_7_to_8_20250424_110758.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_8_aggregated_7_to_8.wav",
+  "timestamp": "20250424_110758",
+  "segments": [
+    {
+      "text": "本单元将为你介绍与音频数据相关的基本概念",
+      "start_time": 0.0,
+      "end_time": 3.96,
+      "confidence": 0.9649658203125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        7,
+        8
+      ]
+    },
+    {
+      "text": "包括波形、采样率和频谱图",
+      "start_time": 3.96,
+      "end_time": 5.76,
+      "confidence": 0.9649658203125,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        7,
+        8
+      ]
+    }
+  ]
+}

dataset/transcripts/test1_segment_9_aggregated_8_to_9_20250424_110138.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "audio_file": "dataset/audio/segments\\test1_segment_9_aggregated_8_to_9.wav",
+  "timestamp": "20250424_110138",
+  "segments": [
+    {
+      "text": "包括波形、采样率和频谱图。",
+      "start_time": 0.0,
+      "end_time": 1.68,
+      "confidence": 0.812255859375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        8,
+        9
+      ]
+    },
+    {
+      "text": "你会学习到如何使用音频数据集,包括音频数据加载。",
+      "start_time": 1.9000000000000001,
+      "end_time": 4.92,
+      "confidence": 0.812255859375,
+      "verified": false,
+      "verified_text": null,
+      "verification_notes": null,
+      "segment_index": [
+        8,
+        9
+      ]
+    }
+  ]
+}

vad/README.md CHANGED Viewed

@@ -14,7 +14,12 @@
 - 支持批量处理音频片段
 - 保存带时间戳的转录文本
-### 3. 人工验证界面
 - 交互式验证转录结果
 - 支持修改转录文本
 - 添加验证注释
@@ -55,8 +60,7 @@ python vad/main.py process dataset/audio/test1.wav
 这个命令会：
 1. 将音频切割成多个片段
 2. 对每个片段进行转录
-3. 提供交互式界面进行验证
-4. 保存结果到JSON文件
 ### 3. 验证已有的转录结果
@@ -65,6 +69,22 @@ python vad/main.py process dataset/audio/test1.wav
 python vad/main.py verify dataset/transcripts/your_transcript.json
 ```
 ## 参数调整
 可以通过修改 `vad/audio_processor.py` 中的参数来优化切割效果：
@@ -141,12 +161,19 @@ python vad/main.py verify dataset/transcripts/meeting_001_20250422_182233.json
    - 建议使用16kHz采样率
    - 如果是多声道音频会自动转换为单声道
-2. 性能考虑：
    - 转录速度取决于CPU性能和音频长度
    - 较长的音频文件会被自动切割成小片段处理
-3. 后续优化方向：
    - 优化切割策略
    - 添加批量处理功能
    - 改进语义重组算法
    - 添加GUI界面

 - 支持批量处理音频片段
 - 保存带时间戳的转录文本
+### 3. 聚合转录
+- 支持将多个连续片段聚合后进行整体转录
+- 提高语义连贯性和转录准确度
+- 适用于语义上相关的连续语音片段
+### 4. 人工验证界面
 - 交互式验证转录结果
 - 支持修改转录文本
 - 添加验证注释
 这个命令会：
 1. 将音频切割成多个片段
 2. 对每个片段进行转录
+3. 保存结果到JSON文件
 ### 3. 验证已有的转录结果
 python vad/main.py verify dataset/transcripts/your_transcript.json
 ```
+### 4. 聚合转录连续片段
+```bash
+# 在项目根目录下运行
+python vad/main.py aggregate --segments "0,1,2"
+```
+这个命令会：
+1. 自动查找已处理的音频片段
+2. 聚合指定的连续片段（这里是索引为0、1、2的片段）
+3. 对聚合后的音频进行整体转录
+4. 保存结果到JSON文件
+参数说明：
+- `--segments`: 指定要聚合的片段索引，用逗号分隔，索引从0开始（必需）
 ## 参数调整
 可以通过修改 `vad/audio_processor.py` 中的参数来优化切割效果：
    - 建议使用16kHz采样率
    - 如果是多声道音频会自动转换为单声道
+2. 聚合转录建议：
+   - 建议聚合语义上相关的连续片段
+   - 聚合片段不宜过多，建议不超过5个片段
+   - 聚合转录适合解决单独转录时出现的语义断裂问题
+3. 性能考虑：
    - 转录速度取决于CPU性能和音频长度
    - 较长的音频文件会被自动切割成小片段处理
+   - 聚合转录可能需要更多内存和处理时间
+4. 后续优化方向：
    - 优化切割策略
    - 添加批量处理功能
    - 改进语义重组算法
    - 添加GUI界面
+   - 自动检测适合聚合的片段

vad/audio_transcriber.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from faster_whisper import WhisperModel
 from audio_processor import AudioSegment
 import json
-from typing import List, Dict, Optional
 from dataclasses import dataclass
 import os
 from datetime import datetime
 @dataclass
 class TranscriptionResult:
@@ -15,9 +16,10 @@ class TranscriptionResult:
     verified: bool = False
     verified_text: Optional[str] = None
     verification_notes: Optional[str] = None
 class AudioTranscriber:
-    def __init__(self, model: str = "small", device: str = "cuda", compute_type: str = "int8"):
         """
         初始化转录器
@@ -37,7 +39,7 @@ class AudioTranscriber:
         print("Model transcribe...")
         print(f"开始转录音频片段，长度: {len(segment.audio_data)} 采样点 ({len(segment.audio_data)/16000:.2f}秒)")
         segments_generator, info = self.model.transcribe(segment.audio_data,
-                                          beam_size=3,
                                           language="zh")
         print(f"检测到语言: {info.language}, 语言概率: {info.language_probability:.2f}")
         segments = list(segments_generator)
@@ -59,6 +61,50 @@ class AudioTranscriber:
             results.append(result)
         return results
     def save_transcription(self,
                          results: List[TranscriptionResult],
@@ -84,7 +130,8 @@ class AudioTranscriber:
                     "confidence": r.confidence,
                     "verified": r.verified,
                     "verified_text": r.verified_text,
-                    "verification_notes": r.verification_notes
                 }
                 for r in results
             ]
@@ -127,7 +174,8 @@ class AudioTranscriber:
                 confidence=seg["confidence"],
                 verified=seg["verified"],
                 verified_text=seg.get("verified_text"),
-                verification_notes=seg.get("verification_notes")
             )
             results.append(result)

 from faster_whisper import WhisperModel
 from audio_processor import AudioSegment
 import json
+from typing import List, Dict, Optional, Tuple
 from dataclasses import dataclass
 import os
 from datetime import datetime
+import numpy as np
 @dataclass
 class TranscriptionResult:
     verified: bool = False
     verified_text: Optional[str] = None
     verification_notes: Optional[str] = None
+    segment_index: Optional[int] = None  # 添加片段索引字段
 class AudioTranscriber:
+    def __init__(self, model: str = "large", device: str = "cuda", compute_type: str = "int8"):
         """
         初始化转录器
         print("Model transcribe...")
         print(f"开始转录音频片段，长度: {len(segment.audio_data)} 采样点 ({len(segment.audio_data)/16000:.2f}秒)")
         segments_generator, info = self.model.transcribe(segment.audio_data,
+                                          beam_size=5,
                                           language="zh")
         print(f"检测到语言: {info.language}, 语言概率: {info.language_probability:.2f}")
         segments = list(segments_generator)
             results.append(result)
         return results
+    def transcribe_aggregated_segments(self, segments: List[AudioSegment]) -> List[TranscriptionResult]:
+        """
+        将多个连续的音频片段聚合后进行转录，以提高语义准确度
+        Args:
+            segments: 要聚合的连续音频片段列表
+        Returns:
+            转录结果列表
+        """
+        if not segments:
+            return []
+        print(f"开始聚合转录 {len(segments)} 个连续片段...")
+        # 记录第一个片段的开始时间和最后一个片段的结束时间
+        start_time = segments[0].start_time
+        end_time = segments[-1].end_time
+        # 计算总长度并创建合并的音频数据数组
+        total_length = sum(len(segment.audio_data) for segment in segments)
+        aggregated_audio = np.zeros(total_length, dtype=np.float32)
+        # 合并音频数据
+        current_position = 0
+        for segment in segments:
+            segment_length = len(segment.audio_data)
+            aggregated_audio[current_position:current_position + segment_length] = segment.audio_data
+            current_position += segment_length
+        print(f"聚合后音频长度: {len(aggregated_audio)} 采样点 ({len(aggregated_audio)/16000:.2f}秒)")
+        # 创建一个临时的聚合片段对象
+        aggregated_segment = AudioSegment(
+            start_time=start_time,
+            end_time=end_time,
+            audio_data=aggregated_audio,
+            is_speech=True
+        )
+        # 转录聚合后的音频
+        print("开始转录聚合后的音频...")
+        return self.transcribe_segment(aggregated_segment)
     def save_transcription(self,
                          results: List[TranscriptionResult],
                     "confidence": r.confidence,
                     "verified": r.verified,
                     "verified_text": r.verified_text,
+                    "verification_notes": r.verification_notes,
+                    "segment_index": r.segment_index  # 添加片段索引到输出
                 }
                 for r in results
             ]
                 confidence=seg["confidence"],
                 verified=seg["verified"],
                 verified_text=seg.get("verified_text"),
+                verification_notes=seg.get("verification_notes"),
+                segment_index=seg.get("segment_index")  # 加载片段索引
             )
             results.append(result)

vad/main.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import argparse
 from audio_processor import AudioProcessor, AudioSegment
 from audio_transcriber import AudioTranscriber, TranscriptionResult
-from typing import List, Tuple
 import json
 import soundfile as sf
@@ -52,12 +52,90 @@ def transcribe_segments(segment_paths: List[str], original_segments: List[AudioS
         # 转录
         results = transcriber.transcribe_segment(segment)
         all_results.extend(results)
     # 保存转录结果
     output_path = transcriber.save_transcription(all_results, segment_paths[0])
     return output_path
 def verify_transcription(json_path: str):
     """
     交互式验证转录结果
@@ -99,15 +177,67 @@ def verify_transcription(json_path: str):
     transcriber.save_transcription(results, json_path)
     print("\n✅ 验证结果已保存")
 def main():
     parser = argparse.ArgumentParser(description="音频处理和转录工具")
-    parser.add_argument("action", choices=["process", "verify"], help="执行的操作: process(处理音频) 或 verify(验证转录)")
-    parser.add_argument("input_path", help="输入文件路径 (音频文件或JSON文件)")
     args = parser.parse_args()
     try:
         if args.action == "process":
             print(f"处理音频文件: {args.input_path}")
             # 1. 切割音频
             segment_paths, original_segments = process_audio(args.input_path)
@@ -121,8 +251,43 @@ def main():
             print(f"✅ 转录完成，结果保存在: {json_path}")
         elif args.action == "verify":
             verify_transcription(args.input_path)
     except Exception as e:
         print(f"错误: {e}")

 import argparse
 from audio_processor import AudioProcessor, AudioSegment
 from audio_transcriber import AudioTranscriber, TranscriptionResult
+from typing import List, Tuple, Optional
 import json
 import soundfile as sf
         # 转录
         results = transcriber.transcribe_segment(segment)
+        # 设置片段索引
+        for result in results:
+            result.segment_index = i
         all_results.extend(results)
     # 保存转录结果
     output_path = transcriber.save_transcription(all_results, segment_paths[0])
     return output_path
+def transcribe_aggregated_segments(segment_paths: List[str], original_segments: List[AudioSegment],
+                                 segment_indices: List[int]) -> str:
+    """
+    聚合转录指定的连续音频片段并保存结果
+    Args:
+        segment_paths: 所有音频片段的路径列表
+        original_segments: 原始音频片段列表
+        segment_indices: 要聚合的片段索引列表
+    Returns:
+        保存的转录结果文件路径
+    """
+    if not segment_indices:
+        raise ValueError("必须指定至少一个片段索引")
+    # 验证索引有效性
+    for idx in segment_indices:
+        if idx < 0 or idx >= len(segment_paths):
+            raise ValueError(f"无效的片段索引: {idx}，有效范围: 0-{len(segment_paths)-1}")
+    # 按索引排序，确保按顺序处理
+    segment_indices.sort()
+    print(f"准备聚合转录片段索引: {segment_indices}")
+    transcriber = AudioTranscriber()
+    segments_to_aggregate = []
+    # 加载指定的音频片段
+    for idx in segment_indices:
+        path = segment_paths[idx]
+        print(f"加载片段 {idx}/{len(segment_paths)-1}: {path}")
+        # 读取音频数据
+        audio_data, _ = sf.read(path)
+        # 使用原始片段的时间戳
+        original_segment = original_segments[idx]
+        # 创建AudioSegment对象，保持原始时间戳
+        segment = AudioSegment(
+            start_time=original_segment.start_time,
+            end_time=original_segment.end_time,
+            audio_data=audio_data,
+            is_speech=True
+        )
+        segments_to_aggregate.append(segment)
+    # 聚合转录
+    print(f"开始聚合转录 {len(segments_to_aggregate)} 个片段...")
+    results = transcriber.transcribe_aggregated_segments(segments_to_aggregate)
+    # 设置聚合片段的索引信息
+    for result in results:
+        # 使用聚合的片段索引列表作为segment_index
+        result.segment_index = segment_indices
+    # 保存转录结果
+    # 使用第一个片段的路径作为基础，但添加"aggregated"标记
+    base_path = segment_paths[segment_indices[0]]
+    base_name = os.path.splitext(os.path.basename(base_path))[0]
+    aggregated_name = f"{base_name}_aggregated_{segment_indices[0]}_to_{segment_indices[-1]}"
+    # 创建一个临时路径用于保存
+    temp_path = os.path.join(os.path.dirname(base_path), f"{aggregated_name}.wav")
+    output_path = transcriber.save_transcription(results, temp_path)
+    print(f"✅ 聚合转录结果已保存到: {output_path}")
+    return output_path
 def verify_transcription(json_path: str):
     """
     交互式验证转录结果
     transcriber.save_transcription(results, json_path)
     print("\n✅ 验证结果已保存")
+def get_existing_segments(base_dir="dataset/audio/segments"):
+    """
+    获取已存在的音频片段列表
+    """
+    if not os.path.exists(base_dir):
+        return [], []
+    # 获取所有wav文件
+    segment_files = [f for f in os.listdir(base_dir) if f.endswith('.wav')]
+    # 创建一个列表来存储(索引, 路径)对
+    indexed_paths = []
+    # 从文件名中提取索引
+    for filename in segment_files:
+        parts = filename.split('_')
+        if len(parts) >= 3 and parts[-2] == "segment":
+            try:
+                segment_idx = int(parts[-1].split('.')[0]) - 1  # 转换为0-based索引
+                full_path = os.path.join(base_dir, filename)
+                indexed_paths.append((segment_idx, full_path))
+            except (ValueError, IndexError):
+                print(f"警告: 无法从文件名 {filename} 中提取片段索引")
+    # 按索引排序
+    indexed_paths.sort(key=lambda x: x[0])
+    # 提取排序后的路径
+    segment_paths = [path for _, path in indexed_paths]
+    # 创建临时的AudioSegment对象
+    original_segments = []
+    for idx, path in indexed_paths:
+        audio_data, sample_rate = sf.read(path)
+        # 简单估计时间戳（实际应用中可能需要更精确的方法）
+        duration = len(audio_data) / sample_rate
+        segment = AudioSegment(
+            start_time=0.0,  # 这里使用相对时间
+            end_time=duration,
+            audio_data=audio_data,
+            is_speech=True
+        )
+        original_segments.append(segment)
+    return segment_paths, original_segments
 def main():
     parser = argparse.ArgumentParser(description="音频处理和转录工具")
+    parser.add_argument("action", choices=["process", "verify", "aggregate"],
+                      help="执行的操作: process(处理音频), verify(验证转录), aggregate(聚合转录)")
+    parser.add_argument("input_path", nargs='?', help="输入文件路径 (音频文件或JSON文件)")
+    parser.add_argument("--segments", type=str, help="要聚合的片段索引，用逗号分隔，例如 '0,1,2'")
     args = parser.parse_args()
     try:
         if args.action == "process":
+            if not args.input_path:
+                print("❌ 使用 process 操作时必须指定输入文件路径")
+                return
             print(f"处理音频文件: {args.input_path}")
             # 1. 切割音频
             segment_paths, original_segments = process_audio(args.input_path)
             print(f"✅ 转录完成，结果保存在: {json_path}")
         elif args.action == "verify":
+            if not args.input_path:
+                print("❌ 使用 verify 操作时必须指定输入文件路径")
+                return
             verify_transcription(args.input_path)
+        elif args.action == "aggregate":
+            if not args.segments:
+                print("❌ 使用 aggregate 操作时必须指定 --segments 参数")
+                return
+            # 解析片段索引
+            try:
+                segment_indices = [int(idx.strip()) for idx in args.segments.split(",")]
+            except ValueError:
+                print("❌ 片段索引必须是整数，用逗号分隔")
+                return
+            # 获取已存在的音频片段
+            segment_paths, original_segments = get_existing_segments()
+            if not segment_paths:
+                print("❌ 未找到已处理的音频片段，请先使用 process 命令处理音频文件")
+                return
+            print(f"找到 {len(segment_paths)} 个已处理的音频片段")
+            # 验证索引有效性
+            max_idx = len(segment_paths) - 1
+            invalid_indices = [idx for idx in segment_indices if idx < 0 or idx > max_idx]
+            if invalid_indices:
+                print(f"❌ 无效的片段索引: {invalid_indices}，有效范围: 0-{max_idx}")
+                return
+            # 聚合转录指定的片段
+            json_path = transcribe_aggregated_segments(segment_paths, original_segments, segment_indices)
+            print(f"✅ 聚合转录完成，结果保存在: {json_path}")
     except Exception as e:
         print(f"错误: {e}")