File size: 3,811 Bytes
185fa42
 
 
 
 
 
 
 
 
 
 
 
 
f3c94eb
185fa42
4d0f080
 
 
 
 
 
185fa42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st

st.set_page_config(
    page_title="03_Ground_Truth",  # Use this format for ordering
    page_icon="πŸ“",
    layout="wide"
)

import pandas as pd
from database import DatabaseHandler
from data_processor import DataProcessor
from generate_ground_truth import generate_ground_truth, get_ground_truth_display_data
import logging
import sys

# Configure logging for stdout only
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    stream=sys.stdout
)
logger = logging.getLogger(__name__)

@st.cache_resource
def init_components():
    return DatabaseHandler(), DataProcessor()

def main():
    st.title("Ground Truth Generation πŸ“")
    
    db_handler, data_processor = init_components()
    
    # Get all videos
    videos = db_handler.get_all_videos()
    if not videos:
        st.warning("No videos available. Please process some videos in the Data Ingestion page first.")
        return
    
    video_df = pd.DataFrame(videos, columns=['youtube_id', 'title', 'channel_name', 'upload_date'])
    
    # Channel filter
    channels = sorted(video_df['channel_name'].unique())
    selected_channel = st.selectbox("Filter by Channel", ["All"] + channels)
    
    if selected_channel != "All":
        video_df = video_df[video_df['channel_name'] == selected_channel]
        # Display existing ground truth for channel
        gt_data = get_ground_truth_display_data(db_handler, channel_name=selected_channel)
        if not gt_data.empty:
            st.subheader("Existing Ground Truth Questions for Channel")
            st.dataframe(gt_data)
            
            # Download button for channel ground truth
            csv = gt_data.to_csv(index=False)
            st.download_button(
                label="Download Channel Ground Truth CSV",
                data=csv,
                file_name=f"ground_truth_{selected_channel}.csv",
                mime="text/csv",
            )
    
    st.subheader("Available Videos")
    st.dataframe(video_df)
    
    # Video selection
    selected_video_id = st.selectbox(
        "Select a Video",
        video_df['youtube_id'].tolist(),
        format_func=lambda x: video_df[video_df['youtube_id'] == x]['title'].iloc[0]
    )
    
    if selected_video_id:
        # Generate ground truth
        if st.button("Generate Ground Truth Questions"):
            with st.spinner("Generating questions..."):
                try:
                    questions_df = generate_ground_truth(
                        db_handler,
                        data_processor,
                        selected_video_id
                    )
                    if questions_df is not None and not questions_df.empty:
                        st.success("Successfully generated ground truth questions")
                        st.dataframe(questions_df)
                    else:
                        st.error("Failed to generate ground truth questions")
                except Exception as e:
                    st.error(f"Error generating ground truth: {str(e)}")
                    logger.error(f"Error in ground truth generation: {str(e)}")
        
        # Display existing ground truth
        gt_data = get_ground_truth_display_data(db_handler, video_id=selected_video_id)
        if not gt_data.empty:
            st.subheader("Existing Ground Truth Questions")
            st.dataframe(gt_data)
            
            # Download button for video ground truth
            csv = gt_data.to_csv(index=False)
            st.download_button(
                label="Download Ground Truth CSV",
                data=csv,
                file_name=f"ground_truth_{selected_video_id}.csv",
                mime="text/csv",
            )

if __name__ == "__main__":
    main()