tianyaogavin commited on
Commit
7fc9887
·
1 Parent(s): 64f8498

rm vad dataset

Browse files
Files changed (29) hide show
  1. .gitignore +1 -0
  2. vad/dataset/transcripts/test1_segment_1_20250423_130738.json → dataset/transcripts/test1_segment_1_20250423_161502.json +35 -35
  3. vad/__pycache__/audio_transcriber.cpython-312.pyc +0 -0
  4. vad/audio_transcriber.py +1 -1
  5. vad/dataset/audio/segments/test1_segment_1.wav +0 -3
  6. vad/dataset/audio/segments/test1_segment_10.wav +0 -3
  7. vad/dataset/audio/segments/test1_segment_11.wav +0 -3
  8. vad/dataset/audio/segments/test1_segment_12.wav +0 -3
  9. vad/dataset/audio/segments/test1_segment_13.wav +0 -3
  10. vad/dataset/audio/segments/test1_segment_14.wav +0 -3
  11. vad/dataset/audio/segments/test1_segment_15.wav +0 -3
  12. vad/dataset/audio/segments/test1_segment_16.wav +0 -3
  13. vad/dataset/audio/segments/test1_segment_17.wav +0 -3
  14. vad/dataset/audio/segments/test1_segment_18.wav +0 -3
  15. vad/dataset/audio/segments/test1_segment_2.wav +0 -3
  16. vad/dataset/audio/segments/test1_segment_3.wav +0 -3
  17. vad/dataset/audio/segments/test1_segment_4.wav +0 -3
  18. vad/dataset/audio/segments/test1_segment_5.wav +0 -3
  19. vad/dataset/audio/segments/test1_segment_6.wav +0 -3
  20. vad/dataset/audio/segments/test1_segment_7.wav +0 -3
  21. vad/dataset/audio/segments/test1_segment_8.wav +0 -3
  22. vad/dataset/audio/segments/test1_segment_9.wav +0 -3
  23. vad/dataset/audio/temp/test1_segments_20250423_114118.json +0 -80
  24. vad/dataset/audio/temp/test1_segments_20250423_114230.json +0 -80
  25. vad/dataset/audio/temp/test1_segments_20250423_114252.json +0 -80
  26. vad/dataset/audio/temp/test1_segments_20250423_125721.json +0 -80
  27. vad/dataset/audio/temp/test1_segments_20250423_130157.json +0 -80
  28. vad/dataset/audio/temp/test1_segments_20250423_130617.json +0 -80
  29. vad/dataset/audio/temp/test1_segments_20250423_130734.json +0 -80
.gitignore CHANGED
@@ -0,0 +1 @@
 
 
1
+ **/__pycache__
vad/dataset/transcripts/test1_segment_1_20250423_130738.json → dataset/transcripts/test1_segment_1_20250423_161502.json RENAMED
@@ -1,11 +1,11 @@
1
  {
2
  "audio_file": "dataset/audio/segments\\test1_segment_1.wav",
3
- "timestamp": "20250423_130738",
4
  "segments": [
5
  {
6
  "text": "音频数据处理",
7
- "start_time": 0.0,
8
- "end_time": 1.16,
9
  "confidence": 0.906494140625,
10
  "verified": false,
11
  "verified_text": null,
@@ -13,8 +13,8 @@
13
  },
14
  {
15
  "text": "所有音频或语言相关的任务都需要使用音频",
16
- "start_time": 0.0,
17
- "end_time": 3.72,
18
  "confidence": 0.7564697265625,
19
  "verified": false,
20
  "verified_text": null,
@@ -22,8 +22,8 @@
22
  },
23
  {
24
  "text": "在我们先入了解这些任务之前",
25
- "start_time": 0.0,
26
- "end_time": 1.6400000000000001,
27
  "confidence": 0.939605712890625,
28
  "verified": false,
29
  "verified_text": null,
@@ -31,8 +31,8 @@
31
  },
32
  {
33
  "text": "我们需要了解音频文件的实际内容",
34
- "start_time": 1.6400000000000001,
35
- "end_time": 4.0,
36
  "confidence": 0.939605712890625,
37
  "verified": false,
38
  "verified_text": null,
@@ -40,8 +40,8 @@
40
  },
41
  {
42
  "text": "以及如何",
43
- "start_time": 4.0,
44
- "end_time": 4.8,
45
  "confidence": 0.939605712890625,
46
  "verified": false,
47
  "verified_text": null,
@@ -49,8 +49,8 @@
49
  },
50
  {
51
  "text": "本台語言將為你介紹的",
52
- "start_time": 0.0,
53
- "end_time": 1.28,
54
  "confidence": 0.907470703125,
55
  "verified": false,
56
  "verified_text": null,
@@ -58,8 +58,8 @@
58
  },
59
  {
60
  "text": "本单元将为你介绍于音频数据相关的基本概念",
61
- "start_time": 0.0,
62
- "end_time": 3.92,
63
  "confidence": 0.66796875,
64
  "verified": false,
65
  "verified_text": null,
@@ -67,8 +67,8 @@
67
  },
68
  {
69
  "text": "包括剝形、採用、綠和平補土",
70
- "start_time": 0.0,
71
- "end_time": 2.0,
72
  "confidence": 0.708251953125,
73
  "verified": false,
74
  "verified_text": null,
@@ -76,8 +76,8 @@
76
  },
77
  {
78
  "text": "你會學習到如何使用音頻",
79
- "start_time": 2.0,
80
- "end_time": 3.6,
81
  "confidence": 0.708251953125,
82
  "verified": false,
83
  "verified_text": null,
@@ -85,8 +85,8 @@
85
  },
86
  {
87
  "text": "包括音频数位加载",
88
- "start_time": 0.0,
89
- "end_time": 1.4000000000000001,
90
  "confidence": 0.86474609375,
91
  "verified": false,
92
  "verified_text": null,
@@ -94,8 +94,8 @@
94
  },
95
  {
96
  "text": "音频数据处理",
97
- "start_time": 1.4000000000000001,
98
- "end_time": 2.4,
99
  "confidence": 0.86474609375,
100
  "verified": false,
101
  "verified_text": null,
@@ -103,8 +103,8 @@
103
  },
104
  {
105
  "text": "高效加载大规模音频数级的流适加载方",
106
- "start_time": 0.0,
107
- "end_time": 2.88,
108
  "confidence": 0.956787109375,
109
  "verified": false,
110
  "verified_text": null,
@@ -112,8 +112,8 @@
112
  },
113
  {
114
  "text": "完成本單元的學期後",
115
- "start_time": 0.0,
116
- "end_time": 1.44,
117
  "confidence": 0.9926719665527344,
118
  "verified": false,
119
  "verified_text": null,
@@ -121,8 +121,8 @@
121
  },
122
  {
123
  "text": "你會找",
124
- "start_time": 1.44,
125
- "end_time": 2.12,
126
  "confidence": 0.9926719665527344,
127
  "verified": false,
128
  "verified_text": null,
@@ -130,8 +130,8 @@
130
  },
131
  {
132
  "text": "基础的音频相关数",
133
- "start_time": 0.0,
134
- "end_time": 1.6,
135
  "confidence": 0.7969970703125,
136
  "verified": false,
137
  "verified_text": null,
@@ -139,8 +139,8 @@
139
  },
140
  {
141
  "text": "并且掌握针对不同应用的音频数据处理工具",
142
- "start_time": 0.0,
143
- "end_time": 3.52,
144
  "confidence": 0.9851303100585938,
145
  "verified": false,
146
  "verified_text": null,
@@ -148,8 +148,8 @@
148
  },
149
  {
150
  "text": "本单元的支持会成为后面章节的",
151
- "start_time": 0.0,
152
- "end_time": 2.0,
153
  "confidence": 0.930908203125,
154
  "verified": false,
155
  "verified_text": null,
 
1
  {
2
  "audio_file": "dataset/audio/segments\\test1_segment_1.wav",
3
+ "timestamp": "20250423_161502",
4
  "segments": [
5
  {
6
  "text": "音频数据处理",
7
+ "start_time": 4.56,
8
+ "end_time": 5.72,
9
  "confidence": 0.906494140625,
10
  "verified": false,
11
  "verified_text": null,
 
13
  },
14
  {
15
  "text": "所有音频或语言相关的任务都需要使用音频",
16
+ "start_time": 8.4,
17
+ "end_time": 12.120000000000001,
18
  "confidence": 0.7564697265625,
19
  "verified": false,
20
  "verified_text": null,
 
22
  },
23
  {
24
  "text": "在我们先入了解这些任务之前",
25
+ "start_time": 13.11,
26
+ "end_time": 14.75,
27
  "confidence": 0.939605712890625,
28
  "verified": false,
29
  "verified_text": null,
 
31
  },
32
  {
33
  "text": "我们需要了解音频文件的实际内容",
34
+ "start_time": 14.75,
35
+ "end_time": 17.11,
36
  "confidence": 0.939605712890625,
37
  "verified": false,
38
  "verified_text": null,
 
40
  },
41
  {
42
  "text": "以及如何",
43
+ "start_time": 17.11,
44
+ "end_time": 17.91,
45
  "confidence": 0.939605712890625,
46
  "verified": false,
47
  "verified_text": null,
 
49
  },
50
  {
51
  "text": "本台語言將為你介紹的",
52
+ "start_time": 19.77,
53
+ "end_time": 21.05,
54
  "confidence": 0.907470703125,
55
  "verified": false,
56
  "verified_text": null,
 
58
  },
59
  {
60
  "text": "本单元将为你介绍于音频数据相关的基本概念",
61
+ "start_time": 21.63,
62
+ "end_time": 25.549999999999997,
63
  "confidence": 0.66796875,
64
  "verified": false,
65
  "verified_text": null,
 
67
  },
68
  {
69
  "text": "包括剝形、採用、綠和平補土",
70
+ "start_time": 26.28,
71
+ "end_time": 28.28,
72
  "confidence": 0.708251953125,
73
  "verified": false,
74
  "verified_text": null,
 
76
  },
77
  {
78
  "text": "你會學習到如何使用音頻",
79
+ "start_time": 28.28,
80
+ "end_time": 29.880000000000003,
81
  "confidence": 0.708251953125,
82
  "verified": false,
83
  "verified_text": null,
 
85
  },
86
  {
87
  "text": "包括音频数位加载",
88
+ "start_time": 30.42,
89
+ "end_time": 31.82,
90
  "confidence": 0.86474609375,
91
  "verified": false,
92
  "verified_text": null,
 
94
  },
95
  {
96
  "text": "音频数据处理",
97
+ "start_time": 31.82,
98
+ "end_time": 32.82,
99
  "confidence": 0.86474609375,
100
  "verified": false,
101
  "verified_text": null,
 
103
  },
104
  {
105
  "text": "高效加载大规模音频数级的流适加载方",
106
+ "start_time": 33.54,
107
+ "end_time": 36.42,
108
  "confidence": 0.956787109375,
109
  "verified": false,
110
  "verified_text": null,
 
112
  },
113
  {
114
  "text": "完成本單元的學期後",
115
+ "start_time": 37.8,
116
+ "end_time": 39.239999999999995,
117
  "confidence": 0.9926719665527344,
118
  "verified": false,
119
  "verified_text": null,
 
121
  },
122
  {
123
  "text": "你會找",
124
+ "start_time": 39.239999999999995,
125
+ "end_time": 39.919999999999995,
126
  "confidence": 0.9926719665527344,
127
  "verified": false,
128
  "verified_text": null,
 
130
  },
131
  {
132
  "text": "基础的音频相关数",
133
+ "start_time": 40.86,
134
+ "end_time": 42.46,
135
  "confidence": 0.7969970703125,
136
  "verified": false,
137
  "verified_text": null,
 
139
  },
140
  {
141
  "text": "并且掌握针对不同应用的音频数据处理工具",
142
+ "start_time": 43.05,
143
+ "end_time": 46.57,
144
  "confidence": 0.9851303100585938,
145
  "verified": false,
146
  "verified_text": null,
 
148
  },
149
  {
150
  "text": "本单元的支持会成为后面章节的",
151
+ "start_time": 47.49,
152
+ "end_time": 49.49,
153
  "confidence": 0.930908203125,
154
  "verified": false,
155
  "verified_text": null,
vad/__pycache__/audio_transcriber.cpython-312.pyc CHANGED
Binary files a/vad/__pycache__/audio_transcriber.cpython-312.pyc and b/vad/__pycache__/audio_transcriber.cpython-312.pyc differ
 
vad/audio_transcriber.py CHANGED
@@ -63,7 +63,7 @@ class AudioTranscriber:
63
  def save_transcription(self,
64
  results: List[TranscriptionResult],
65
  audio_path: str,
66
- output_dir: str = "../dataset/transcripts"):
67
  """
68
  保存转录结果到JSON文件
69
  """
 
63
  def save_transcription(self,
64
  results: List[TranscriptionResult],
65
  audio_path: str,
66
+ output_dir: str = "dataset/transcripts"):
67
  """
68
  保存转录结果到JSON文件
69
  """
vad/dataset/audio/segments/test1_segment_1.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:877aee33d778b34af2f0b819ac822d80316e97b73cb3823c1f436dbef8efcb0e
3
- size 35564
 
 
 
 
vad/dataset/audio/segments/test1_segment_10.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3135d983a5260d846e6cf165583efa3a0ef379bd86c885e678a63b41f66f548b
3
- size 48044
 
 
 
 
vad/dataset/audio/segments/test1_segment_11.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a966cbb2e82ebd278692adad509a18061306b73b715fc4a93468c27ed61627b
3
- size 111404
 
 
 
 
vad/dataset/audio/segments/test1_segment_12.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:52cfbcdc17cc5f190df467310f1a91c89e27f79662b2ce13f4ff5ec07015afec
3
- size 71084
 
 
 
 
vad/dataset/audio/segments/test1_segment_13.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eedb04a2d817c0875003a7594f8bac255a28898dfae56aa97bd3021870140b2
3
- size 86444
 
 
 
 
vad/dataset/audio/segments/test1_segment_14.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:078850683b71e5b04781884b44bce0edb74999459b68b6fd53175ecacbd4980e
3
- size 34604
 
 
 
 
vad/dataset/audio/segments/test1_segment_15.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c467e2ec3bdec346774cde0480a78689c0f6f13fd093b32baaa00187c392fb
3
- size 29804
 
 
 
 
vad/dataset/audio/segments/test1_segment_16.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:59bc62b5c124c9cac5ef78c69caff4e5caf3d0333e496e382ee365142eafc354
3
- size 47084
 
 
 
 
vad/dataset/audio/segments/test1_segment_17.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dbcb3578c8537243143da7ac2c7531ea7b9fc750cb26e9809643289eeddce7b
3
- size 107564
 
 
 
 
vad/dataset/audio/segments/test1_segment_18.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:538b2dda6990368d68234fded22b5ed3d67c56a620e79cba7ac545a102465160
3
- size 68204
 
 
 
 
vad/dataset/audio/segments/test1_segment_2.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81375721eb3a532941083c9781f53f5e0f1ccbe1ef4108f98a019de400f5c564
3
- size 117164
 
 
 
 
vad/dataset/audio/segments/test1_segment_3.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd6120ff04e7365640b9e3a1fb062bc1c31ce0dc54904bd27e25ac5a0b068cde
3
- size 149804
 
 
 
 
vad/dataset/audio/segments/test1_segment_4.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:99bc0d18ffd0d10742b8d6b5450e537eccd1497c2247e714fa8efe6beb602abd
3
- size 41324
 
 
 
 
vad/dataset/audio/segments/test1_segment_5.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9e2196db3537028898b87442f074523251b33219302e6eb8518fb33396c30bd
3
- size 122924
 
 
 
 
vad/dataset/audio/segments/test1_segment_6.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e929f7966a425a559b7442a2914cb99b0df74f1d02938264642dc71f160fc383
3
- size 113324
 
 
 
 
vad/dataset/audio/segments/test1_segment_7.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:65d800356647c415d80e59fac63db01df31ce51a497aacf43f98aa0e6ec468cb
3
- size 77804
 
 
 
 
vad/dataset/audio/segments/test1_segment_8.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1c574a7c20332f85c6260febf6eae232473a798404ca29f1b54ac39e5b2d35c
3
- size 91244
 
 
 
 
vad/dataset/audio/segments/test1_segment_9.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f943b20eb3aafa0befb884f5d125e0596d3f419d8a3c5546ff3cf878603c36b8
3
- size 67244
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_114118.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_114118",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_114230.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_114230",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_114252.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_114252",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_125721.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_125721",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_130157.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_130157",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_130617.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_130617",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vad/dataset/audio/temp/test1_segments_20250423_130734.json DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "audio_file": "../dataset/audio/test1.wav",
3
- "timestamp": "20250423_130734",
4
- "total_frames": 1821,
5
- "speech_frames": 1167,
6
- "segments": [
7
- {
8
- "start_time": 4.56,
9
- "end_time": 5.67,
10
- "duration": 1.1100000000000003,
11
- "is_speech": true
12
- },
13
- {
14
- "start_time": 8.4,
15
- "end_time": 12.06,
16
- "duration": 3.66,
17
- "is_speech": true
18
- },
19
- {
20
- "start_time": 13.11,
21
- "end_time": 17.79,
22
- "duration": 4.68,
23
- "is_speech": true
24
- },
25
- {
26
- "start_time": 19.77,
27
- "end_time": 21.06,
28
- "duration": 1.2899999999999991,
29
- "is_speech": true
30
- },
31
- {
32
- "start_time": 21.63,
33
- "end_time": 25.47,
34
- "duration": 3.84,
35
- "is_speech": true
36
- },
37
- {
38
- "start_time": 26.28,
39
- "end_time": 29.82,
40
- "duration": 3.539999999999999,
41
- "is_speech": true
42
- },
43
- {
44
- "start_time": 30.42,
45
- "end_time": 32.85,
46
- "duration": 2.4299999999999997,
47
- "is_speech": true
48
- },
49
- {
50
- "start_time": 33.54,
51
- "end_time": 36.39,
52
- "duration": 2.8500000000000014,
53
- "is_speech": true
54
- },
55
- {
56
- "start_time": 37.8,
57
- "end_time": 39.9,
58
- "duration": 2.1000000000000014,
59
- "is_speech": true
60
- },
61
- {
62
- "start_time": 40.86,
63
- "end_time": 42.36,
64
- "duration": 1.5,
65
- "is_speech": true
66
- },
67
- {
68
- "start_time": 43.05,
69
- "end_time": 46.53,
70
- "duration": 3.480000000000004,
71
- "is_speech": true
72
- },
73
- {
74
- "start_time": 47.49,
75
- "end_time": 49.71,
76
- "duration": 2.219999999999999,
77
- "is_speech": true
78
- }
79
- ]
80
- }