Spaces:
Runtime error
Runtime error
File size: 5,034 Bytes
ed4d993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
from typing import Any, Dict, List, Optional
from langchain.chains.base import Chain
from langchain_core.callbacks import CallbackManagerForChainRun
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import Extra
from langchain_experimental.video_captioning.services.audio_service import (
AudioProcessor,
)
from langchain_experimental.video_captioning.services.caption_service import (
CaptionProcessor,
)
from langchain_experimental.video_captioning.services.combine_service import (
CombineProcessor,
)
from langchain_experimental.video_captioning.services.image_service import (
ImageProcessor,
)
from langchain_experimental.video_captioning.services.srt_service import SRTProcessor
class VideoCaptioningChain(Chain):
"""
Video Captioning Chain.
"""
llm: BaseLanguageModel
assemblyai_key: str
prompt: Optional[PromptTemplate] = None
verbose: bool = True
use_logging: Optional[bool] = True
frame_skip: int = -1
image_delta_threshold: int = 3000000
closed_caption_char_limit: int = 20
closed_caption_similarity_threshold: int = 80
use_unclustered_video_models: bool = False
class Config:
extra = Extra.allow
arbitrary_types_allowed = True
@property
def input_keys(self) -> List[str]:
return ["video_file_path"]
@property
def output_keys(self) -> List[str]:
return ["srt"]
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, str]:
if "video_file_path" not in inputs:
raise ValueError(
"Missing 'video_file_path' in inputs for video captioning."
)
video_file_path = inputs["video_file_path"]
nl = "\n"
run_manager.on_text(
"Loading processors..." + nl
) if self.use_logging and run_manager else None
audio_processor = AudioProcessor(api_key=self.assemblyai_key)
image_processor = ImageProcessor(
frame_skip=self.frame_skip, threshold=self.image_delta_threshold
)
caption_processor = CaptionProcessor(
llm=self.llm,
verbose=self.verbose,
similarity_threshold=self.closed_caption_similarity_threshold,
use_unclustered_models=self.use_unclustered_video_models,
)
combine_processor = CombineProcessor(
llm=self.llm,
verbose=self.verbose,
char_limit=self.closed_caption_char_limit,
)
srt_processor = SRTProcessor()
run_manager.on_text(
"Finished loading processors."
+ nl
+ "Generating subtitles from audio..."
+ nl
) if self.use_logging and run_manager else None
# Get models for speech to text subtitles
audio_models = audio_processor.process(video_file_path, run_manager)
run_manager.on_text(
"Finished generating subtitles:"
+ nl
+ f"{nl.join(str(obj) for obj in audio_models)}"
+ nl
+ "Generating closed captions from video..."
+ nl
) if self.use_logging and run_manager else None
# Get models for image frame description
image_models = image_processor.process(video_file_path, run_manager)
run_manager.on_text(
"Finished generating closed captions:"
+ nl
+ f"{nl.join(str(obj) for obj in image_models)}"
+ nl
+ "Refining closed captions..."
+ nl
) if self.use_logging and run_manager else None
# Get models for video event closed-captions
video_models = caption_processor.process(image_models, run_manager)
run_manager.on_text(
"Finished refining closed captions:"
+ nl
+ f"{nl.join(str(obj) for obj in video_models)}"
+ nl
+ "Combining subtitles with closed captions..."
+ nl
) if self.use_logging and run_manager else None
# Combine the subtitle models with the closed-caption models
caption_models = combine_processor.process(
video_models, audio_models, run_manager
)
run_manager.on_text(
"Finished combining subtitles with closed captions:"
+ nl
+ f"{nl.join(str(obj) for obj in caption_models)}"
+ nl
+ "Generating SRT file..."
+ nl
) if self.use_logging and run_manager else None
# Convert the combined model to SRT format
srt_content = srt_processor.process(caption_models)
run_manager.on_text(
"Finished generating srt file." + nl
) if self.use_logging and run_manager else None
return {"srt": srt_content}
@property
def _chain_type(self) -> str:
return "video_captioning_chain"
|