import streamlit as st from transcript_extractor import get_transcript import logging import sys # Configure logging for stdout only logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stdout ) logger = logging.getLogger(__name__) def process_single_video(db_handler, data_processor, video_id, embedding_model): """Process a single video for indexing""" try: # Check for existing index existing_index = db_handler.get_elasticsearch_index_by_youtube_id(video_id) if existing_index: logger.info(f"Video {video_id} already processed. Using existing index.") return existing_index # Get transcript data transcript_data = get_transcript(video_id) if not transcript_data: logger.error(f"Failed to retrieve transcript for video {video_id}") return None # Process transcript processed_data = data_processor.process_transcript(video_id, transcript_data) if not processed_data: logger.error(f"Failed to process transcript for video {video_id}") return None # Prepare video data video_data = { 'video_id': video_id, 'title': transcript_data['metadata'].get('title', 'Unknown Title'), 'author': transcript_data['metadata'].get('author', 'Unknown Author'), 'upload_date': transcript_data['metadata'].get('upload_date', 'Unknown Date'), 'view_count': int(transcript_data['metadata'].get('view_count', 0)), 'like_count': int(transcript_data['metadata'].get('like_count', 0)), 'comment_count': int(transcript_data['metadata'].get('comment_count', 0)), 'video_duration': transcript_data['metadata'].get('duration', 'Unknown Duration'), 'transcript_content': processed_data['content'] } # Save to database db_handler.add_video(video_data) # Build index index_name = f"video_{video_id}_{embedding_model}".lower() index_name = data_processor.build_index(index_name) if index_name: # Save index information embedding_model_id = db_handler.add_embedding_model(embedding_model, "Description of the model") video_record = db_handler.get_video_by_youtube_id(video_id) if video_record: db_handler.add_elasticsearch_index(video_record[0], index_name, embedding_model_id) logger.info(f"Successfully processed video: {video_data['title']}") return index_name logger.error(f"Failed to process video {video_id}") return None except Exception as e: logger.error(f"Error processing video {video_id}: {str(e)}") return None