File size: 3,038 Bytes
1897f56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from typing import Iterator
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob
import speech_recognition as sr
from pydub import AudioSegment
import io


class AudioParser(BaseBlobParser):
    """Parse audio files from a blob and convert them to text."""

    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
        """Parse an audio file into the Document iterator.



        Args:

            blob: The blob to parse.



        Returns: An iterator of Documents.

        """
        supported_mime_types = [
            "audio/wav",      # .wav
            "audio/mpeg",     # .mp3
            "audio/ogg",      # .ogg
            "audio/flac",     # .flac
            "audio/x-aiff"    # .aiff
        ]

        # Debugging: Print MIME type
        print(f"Blob MIME type: {blob.mimetype}")

        if blob.mimetype not in supported_mime_types:
            raise ValueError(
                f"This blob type is not supported for this parser. Supported types are: {supported_mime_types}"
            )

        recognizer = sr.Recognizer()

        try:
            # Convert to PCM WAV if necessary
            with blob.as_bytes_io() as audio_file:
                audio_bytes = audio_file.read()
                # e.g., "mpeg" from "audio/mpeg"
                audio_format = blob.mimetype.split('/')[1]
                print(f"Attempting to process audio format: {audio_format}")

                if audio_format in ["wav", "flac", "aiff"]:
                    # Directly use AudioFile for these formats
                    audio_file.seek(0)
                    audio_stream = audio_file
                else:
                    # Convert to PCM WAV using pydub
                    audio_segment = AudioSegment.from_file(
                        io.BytesIO(audio_bytes), format=audio_format)
                    audio_stream = io.BytesIO()
                    audio_segment.export(audio_stream, format="wav")
                    audio_stream.seek(0)

                with sr.AudioFile(audio_stream) as source:
                    audio_data = recognizer.record(source)
                    try:
                        text = recognizer.recognize_google(audio_data)
                        metadata = {"source": blob.source}
                        yield Document(page_content=text, metadata=metadata)
                    except sr.UnknownValueError:
                        print(
                            "Google Speech Recognition could not understand the audio.")
                        raise
                    except sr.RequestError as e:
                        print(
                            f"Could not request results from Google Speech Recognition service; {e}")
                        raise
        except Exception as e:
            print(f"Error processing audio file: {e}")
            raise