Spaces:
Running
Running
File size: 3,811 Bytes
185fa42 f3c94eb 185fa42 4d0f080 185fa42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import streamlit as st
st.set_page_config(
page_title="03_Ground_Truth", # Use this format for ordering
page_icon="π",
layout="wide"
)
import pandas as pd
from database import DatabaseHandler
from data_processor import DataProcessor
from generate_ground_truth import generate_ground_truth, get_ground_truth_display_data
import logging
import sys
# Configure logging for stdout only
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
stream=sys.stdout
)
logger = logging.getLogger(__name__)
@st.cache_resource
def init_components():
return DatabaseHandler(), DataProcessor()
def main():
st.title("Ground Truth Generation π")
db_handler, data_processor = init_components()
# Get all videos
videos = db_handler.get_all_videos()
if not videos:
st.warning("No videos available. Please process some videos in the Data Ingestion page first.")
return
video_df = pd.DataFrame(videos, columns=['youtube_id', 'title', 'channel_name', 'upload_date'])
# Channel filter
channels = sorted(video_df['channel_name'].unique())
selected_channel = st.selectbox("Filter by Channel", ["All"] + channels)
if selected_channel != "All":
video_df = video_df[video_df['channel_name'] == selected_channel]
# Display existing ground truth for channel
gt_data = get_ground_truth_display_data(db_handler, channel_name=selected_channel)
if not gt_data.empty:
st.subheader("Existing Ground Truth Questions for Channel")
st.dataframe(gt_data)
# Download button for channel ground truth
csv = gt_data.to_csv(index=False)
st.download_button(
label="Download Channel Ground Truth CSV",
data=csv,
file_name=f"ground_truth_{selected_channel}.csv",
mime="text/csv",
)
st.subheader("Available Videos")
st.dataframe(video_df)
# Video selection
selected_video_id = st.selectbox(
"Select a Video",
video_df['youtube_id'].tolist(),
format_func=lambda x: video_df[video_df['youtube_id'] == x]['title'].iloc[0]
)
if selected_video_id:
# Generate ground truth
if st.button("Generate Ground Truth Questions"):
with st.spinner("Generating questions..."):
try:
questions_df = generate_ground_truth(
db_handler,
data_processor,
selected_video_id
)
if questions_df is not None and not questions_df.empty:
st.success("Successfully generated ground truth questions")
st.dataframe(questions_df)
else:
st.error("Failed to generate ground truth questions")
except Exception as e:
st.error(f"Error generating ground truth: {str(e)}")
logger.error(f"Error in ground truth generation: {str(e)}")
# Display existing ground truth
gt_data = get_ground_truth_display_data(db_handler, video_id=selected_video_id)
if not gt_data.empty:
st.subheader("Existing Ground Truth Questions")
st.dataframe(gt_data)
# Download button for video ground truth
csv = gt_data.to_csv(index=False)
st.download_button(
label="Download Ground Truth CSV",
data=csv,
file_name=f"ground_truth_{selected_video_id}.csv",
mime="text/csv",
)
if __name__ == "__main__":
main() |