Update handler.py
Browse files- handler.py +26 -100
handler.py
CHANGED
@@ -1,109 +1,35 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
import json
|
4 |
-
import base64
|
5 |
-
import soundfile as sf
|
6 |
-
import numpy as np
|
7 |
-
from scipy.signal import resample
|
8 |
|
9 |
-
# --- Configuration ---
|
10 |
-
# Replace with your actual API key/token
|
11 |
-
HF_TOKEN = os.environ.get("HF_API_TOKEN") # Get the token from environment variable
|
12 |
-
# Replace with your actual endpoint URL
|
13 |
-
STG_API_URL = "https://YOUR_ENDPOINT_URL"
|
14 |
|
15 |
-
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
data, sr = sf.read(filename)
|
21 |
-
except sf.LibsndfileError as e:
|
22 |
-
print(f"Error reading audio file: {e}")
|
23 |
-
return None
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
data = data.mean(axis=1) # Average channels to create mono
|
28 |
|
29 |
-
|
30 |
-
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# No parameters needed
|
36 |
-
}
|
37 |
-
json_data = json.dumps(data_payload)
|
38 |
-
|
39 |
-
# Use requests to send the POST request
|
40 |
-
try:
|
41 |
-
response = requests.post(
|
42 |
-
url=STG_API_URL,
|
43 |
-
data=json_data,
|
44 |
-
headers={
|
45 |
-
"Content-Type": "application/json",
|
46 |
-
"Authorization": f"Bearer {HF_TOKEN}"
|
47 |
-
},
|
48 |
)
|
49 |
-
response.raise_for_status()
|
50 |
-
return response.json()
|
51 |
-
except requests.exceptions.RequestException as e:
|
52 |
-
print(f"Error during API request: {e}")
|
53 |
-
print(f"Response content: {response.content}")
|
54 |
-
return None
|
55 |
-
|
56 |
-
def format_timecode(seconds):
|
57 |
-
"""Formats seconds into HH:MM:SS:mmm format."""
|
58 |
-
m, s = divmod(seconds, 60)
|
59 |
-
h, m = divmod(m, 60)
|
60 |
-
return f"{int(h):02}:{int(m):02}:{int(s):02}:{int((s%1)*1000):03}"
|
61 |
-
|
62 |
-
def process_and_format_output(output, input_file):
|
63 |
-
"""Formats the API response (now a dict) and saves it to a file."""
|
64 |
-
if output is None:
|
65 |
-
print("No output received from API.")
|
66 |
-
return None
|
67 |
-
|
68 |
-
# Check if the output is a dictionary and has the expected key
|
69 |
-
if not isinstance(output, dict) or "diarization" not in output:
|
70 |
-
print(f"Unexpected output format: {output}")
|
71 |
-
return None
|
72 |
-
|
73 |
-
try:
|
74 |
-
formatted_output = []
|
75 |
-
for speaker in output["diarization"]:
|
76 |
-
start_time = format_timecode(float(speaker["start"]))
|
77 |
-
end_time = format_timecode(float(speaker["stop"]))
|
78 |
-
formatted_output.append(f"{speaker['label']} START: {start_time} END: {end_time}")
|
79 |
-
|
80 |
-
base_filename = os.path.splitext(os.path.basename(input_file))[0]
|
81 |
-
output_dir = "TMP_STG"
|
82 |
-
os.makedirs(output_dir, exist_ok=True)
|
83 |
-
output_filename = os.path.join(output_dir, base_filename + "_voicerec-output.txt")
|
84 |
-
|
85 |
-
with open(output_filename, "w", encoding="utf-8") as f:
|
86 |
-
for line in formatted_output:
|
87 |
-
f.write(line + "\n")
|
88 |
-
|
89 |
-
return output_filename
|
90 |
-
except (KeyError, ValueError) as e:
|
91 |
-
print(f"Error processing API output: {e}")
|
92 |
-
return None
|
93 |
-
|
94 |
-
# --- Main Script ---
|
95 |
-
|
96 |
-
if __name__ == "__main__":
|
97 |
-
# --- Configuration for Standalone Testing ---
|
98 |
-
SAMPLE_AUDIO_FILE = "sample.wav" # Put your sample audio file in the same directory
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
|
105 |
-
output_file = process_and_format_output(api_output, SAMPLE_AUDIO_FILE)
|
106 |
-
if output_file:
|
107 |
-
print(f"Output saved to: {output_file}")
|
108 |
-
else:
|
109 |
-
print("API request failed.")
|
|
|
1 |
+
from pyannote.audio import Pipeline, Audio
|
2 |
+
import torch
|
|
|
|
|
|
|
|
|
|
|
3 |
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
class EndpointHandler:
|
6 |
+
def __init__(self, path=""):
|
7 |
+
# initialize pretrained pipeline
|
8 |
+
self._pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
|
9 |
|
10 |
+
# send pipeline to GPU if available
|
11 |
+
if torch.cuda.is_available():
|
12 |
+
self._pipeline.to(torch.device("cuda"))
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
# initialize audio reader
|
15 |
+
self._io = Audio()
|
|
|
16 |
|
17 |
+
def __call__(self, data):
|
18 |
+
inputs = data.pop("inputs", data)
|
19 |
+
waveform, sample_rate = self._io(inputs)
|
20 |
|
21 |
+
parameters = data.pop("parameters", dict())
|
22 |
+
diarization = self.pipeline(
|
23 |
+
{"waveform": waveform, "sample_rate": sample_rate}, **parameters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
processed_diarization = [
|
27 |
+
{
|
28 |
+
"speaker": speaker,
|
29 |
+
"start": f"{turn.start:.3f}",
|
30 |
+
"end": f"{turn.end:.3f}",
|
31 |
+
}
|
32 |
+
for turn, _, speaker in diarization.itertracks(yield_label=True)
|
33 |
+
]
|
34 |
|
35 |
+
return {"diarization": processed_diarization}
|
|
|
|
|
|
|
|
|
|