Spaces:
Sleeping
Sleeping
from typing import Iterator | |
from langchain_core.documents import Document | |
from langchain_community.document_loaders.base import BaseBlobParser | |
from langchain_community.document_loaders.blob_loaders import Blob | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
import io | |
class AudioParser(BaseBlobParser): | |
"""Parse audio files from a blob and convert them to text.""" | |
def lazy_parse(self, blob: Blob) -> Iterator[Document]: | |
"""Parse an audio file into the Document iterator. | |
Args: | |
blob: The blob to parse. | |
Returns: An iterator of Documents. | |
""" | |
supported_mime_types = [ | |
"audio/wav", # .wav | |
"audio/mpeg", # .mp3 | |
"audio/ogg", # .ogg | |
"audio/flac", # .flac | |
"audio/x-aiff" # .aiff | |
] | |
# Debugging: Print MIME type | |
print(f"Blob MIME type: {blob.mimetype}") | |
if blob.mimetype not in supported_mime_types: | |
raise ValueError( | |
f"This blob type is not supported for this parser. Supported types are: {supported_mime_types}" | |
) | |
recognizer = sr.Recognizer() | |
try: | |
# Convert to PCM WAV if necessary | |
with blob.as_bytes_io() as audio_file: | |
audio_bytes = audio_file.read() | |
# e.g., "mpeg" from "audio/mpeg" | |
audio_format = blob.mimetype.split('/')[1] | |
print(f"Attempting to process audio format: {audio_format}") | |
if audio_format in ["wav", "flac", "aiff"]: | |
# Directly use AudioFile for these formats | |
audio_file.seek(0) | |
audio_stream = audio_file | |
else: | |
# Convert to PCM WAV using pydub | |
audio_segment = AudioSegment.from_file( | |
io.BytesIO(audio_bytes), format=audio_format) | |
audio_stream = io.BytesIO() | |
audio_segment.export(audio_stream, format="wav") | |
audio_stream.seek(0) | |
with sr.AudioFile(audio_stream) as source: | |
audio_data = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio_data) | |
metadata = {"source": blob.source} | |
yield Document(page_content=text, metadata=metadata) | |
except sr.UnknownValueError: | |
print( | |
"Google Speech Recognition could not understand the audio.") | |
raise | |
except sr.RequestError as e: | |
print( | |
f"Could not request results from Google Speech Recognition service; {e}") | |
raise | |
except Exception as e: | |
print(f"Error processing audio file: {e}") | |
raise | |