from typing import Iterator from langchain_core.documents import Document from langchain_community.document_loaders.base import BaseBlobParser from langchain_community.document_loaders.blob_loaders import Blob import speech_recognition as sr from pydub import AudioSegment import io class AudioParser(BaseBlobParser): """Parse audio files from a blob and convert them to text.""" def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Parse an audio file into the Document iterator. Args: blob: The blob to parse. Returns: An iterator of Documents. """ supported_mime_types = [ "audio/wav", # .wav "audio/mpeg", # .mp3 "audio/ogg", # .ogg "audio/flac", # .flac "audio/x-aiff" # .aiff ] # Debugging: Print MIME type print(f"Blob MIME type: {blob.mimetype}") if blob.mimetype not in supported_mime_types: raise ValueError( f"This blob type is not supported for this parser. Supported types are: {supported_mime_types}" ) recognizer = sr.Recognizer() try: # Convert to PCM WAV if necessary with blob.as_bytes_io() as audio_file: audio_bytes = audio_file.read() # e.g., "mpeg" from "audio/mpeg" audio_format = blob.mimetype.split('/')[1] print(f"Attempting to process audio format: {audio_format}") if audio_format in ["wav", "flac", "aiff"]: # Directly use AudioFile for these formats audio_file.seek(0) audio_stream = audio_file else: # Convert to PCM WAV using pydub audio_segment = AudioSegment.from_file( io.BytesIO(audio_bytes), format=audio_format) audio_stream = io.BytesIO() audio_segment.export(audio_stream, format="wav") audio_stream.seek(0) with sr.AudioFile(audio_stream) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) metadata = {"source": blob.source} yield Document(page_content=text, metadata=metadata) except sr.UnknownValueError: print( "Google Speech Recognition could not understand the audio.") raise except sr.RequestError as e: print( f"Could not request results from Google Speech Recognition service; {e}") raise except Exception as e: print(f"Error processing audio file: {e}") raise