Spaces:
Runtime error
Runtime error
langchain-qa-bot
/
docs
/langchain
/libs
/experimental
/langchain_experimental
/video_captioning
/base.py
from typing import Any, Dict, List, Optional | |
from langchain.chains.base import Chain | |
from langchain_core.callbacks import CallbackManagerForChainRun | |
from langchain_core.language_models import BaseLanguageModel | |
from langchain_core.prompts import PromptTemplate | |
from langchain_core.pydantic_v1 import Extra | |
from langchain_experimental.video_captioning.services.audio_service import ( | |
AudioProcessor, | |
) | |
from langchain_experimental.video_captioning.services.caption_service import ( | |
CaptionProcessor, | |
) | |
from langchain_experimental.video_captioning.services.combine_service import ( | |
CombineProcessor, | |
) | |
from langchain_experimental.video_captioning.services.image_service import ( | |
ImageProcessor, | |
) | |
from langchain_experimental.video_captioning.services.srt_service import SRTProcessor | |
class VideoCaptioningChain(Chain): | |
""" | |
Video Captioning Chain. | |
""" | |
llm: BaseLanguageModel | |
assemblyai_key: str | |
prompt: Optional[PromptTemplate] = None | |
verbose: bool = True | |
use_logging: Optional[bool] = True | |
frame_skip: int = -1 | |
image_delta_threshold: int = 3000000 | |
closed_caption_char_limit: int = 20 | |
closed_caption_similarity_threshold: int = 80 | |
use_unclustered_video_models: bool = False | |
class Config: | |
extra = Extra.allow | |
arbitrary_types_allowed = True | |
def input_keys(self) -> List[str]: | |
return ["video_file_path"] | |
def output_keys(self) -> List[str]: | |
return ["srt"] | |
def _call( | |
self, | |
inputs: Dict[str, Any], | |
run_manager: Optional[CallbackManagerForChainRun] = None, | |
) -> Dict[str, str]: | |
if "video_file_path" not in inputs: | |
raise ValueError( | |
"Missing 'video_file_path' in inputs for video captioning." | |
) | |
video_file_path = inputs["video_file_path"] | |
nl = "\n" | |
run_manager.on_text( | |
"Loading processors..." + nl | |
) if self.use_logging and run_manager else None | |
audio_processor = AudioProcessor(api_key=self.assemblyai_key) | |
image_processor = ImageProcessor( | |
frame_skip=self.frame_skip, threshold=self.image_delta_threshold | |
) | |
caption_processor = CaptionProcessor( | |
llm=self.llm, | |
verbose=self.verbose, | |
similarity_threshold=self.closed_caption_similarity_threshold, | |
use_unclustered_models=self.use_unclustered_video_models, | |
) | |
combine_processor = CombineProcessor( | |
llm=self.llm, | |
verbose=self.verbose, | |
char_limit=self.closed_caption_char_limit, | |
) | |
srt_processor = SRTProcessor() | |
run_manager.on_text( | |
"Finished loading processors." | |
+ nl | |
+ "Generating subtitles from audio..." | |
+ nl | |
) if self.use_logging and run_manager else None | |
# Get models for speech to text subtitles | |
audio_models = audio_processor.process(video_file_path, run_manager) | |
run_manager.on_text( | |
"Finished generating subtitles:" | |
+ nl | |
+ f"{nl.join(str(obj) for obj in audio_models)}" | |
+ nl | |
+ "Generating closed captions from video..." | |
+ nl | |
) if self.use_logging and run_manager else None | |
# Get models for image frame description | |
image_models = image_processor.process(video_file_path, run_manager) | |
run_manager.on_text( | |
"Finished generating closed captions:" | |
+ nl | |
+ f"{nl.join(str(obj) for obj in image_models)}" | |
+ nl | |
+ "Refining closed captions..." | |
+ nl | |
) if self.use_logging and run_manager else None | |
# Get models for video event closed-captions | |
video_models = caption_processor.process(image_models, run_manager) | |
run_manager.on_text( | |
"Finished refining closed captions:" | |
+ nl | |
+ f"{nl.join(str(obj) for obj in video_models)}" | |
+ nl | |
+ "Combining subtitles with closed captions..." | |
+ nl | |
) if self.use_logging and run_manager else None | |
# Combine the subtitle models with the closed-caption models | |
caption_models = combine_processor.process( | |
video_models, audio_models, run_manager | |
) | |
run_manager.on_text( | |
"Finished combining subtitles with closed captions:" | |
+ nl | |
+ f"{nl.join(str(obj) for obj in caption_models)}" | |
+ nl | |
+ "Generating SRT file..." | |
+ nl | |
) if self.use_logging and run_manager else None | |
# Convert the combined model to SRT format | |
srt_content = srt_processor.process(caption_models) | |
run_manager.on_text( | |
"Finished generating srt file." + nl | |
) if self.use_logging and run_manager else None | |
return {"srt": srt_content} | |
def _chain_type(self) -> str: | |
return "video_captioning_chain" | |