Spaces:
Running
Running
File size: 6,281 Bytes
28acfc9 507c938 dbd33b2 25b2b2b dbd33b2 25b2b2b ee38416 25b2b2b dbd33b2 15c3ea7 507c938 25b2b2b 15c3ea7 dbd33b2 25b2b2b 507c938 25b2b2b dbd33b2 15c3ea7 dbd33b2 507c938 dbd33b2 25b2b2b dbd33b2 25b2b2b dbd33b2 507c938 25b2b2b 15c3ea7 dbd33b2 25b2b2b dbd33b2 507c938 25b2b2b 507c938 25b2b2b 507c938 dbd33b2 25b2b2b 507c938 dbd33b2 25b2b2b dbd33b2 507c938 25b2b2b 507c938 25b2b2b 507c938 dbd33b2 15c3ea7 dbd33b2 25b2b2b dbd33b2 507c938 25b2b2b 507c938 dbd33b2 25b2b2b 15c3ea7 25b2b2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import sys
import os
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
import re
import logging
import ssl
import certifi
import requests
# Set up logging
# Configure logging for stdout only
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
stream=sys.stdout
)
logger = logging.getLogger(__name__)
# Try to load from .env file if it exists, but don't fail if it doesn't
try:
current_dir = os.path.dirname(os.path.abspath(__file__))
dotenv_path = os.path.join(os.path.dirname(current_dir), '.env')
if os.path.exists(dotenv_path):
load_dotenv(dotenv_path)
logger.info(f"Loaded environment variables from {dotenv_path}")
except Exception as e:
logger.warning(f"Could not load .env file: {e}")
# Get API key from environment variable
API_KEY = os.getenv('YOUTUBE_API_KEY')
# Safe logging of API key
if API_KEY:
masked_key = f"{API_KEY[:3]}...{API_KEY[-3:]}" if len(API_KEY) > 6 else "***"
logger.info(f"API_KEY found (masked): {masked_key}")
else:
logger.error("YouTube API key not found in environment variables")
raise ValueError("YouTube API key not found. Make sure it's set in your environment variables or .env file.")
def get_youtube_client():
try:
# Create a custom session with SSL verification
session = requests.Session()
session.verify = certifi.where()
# Create a custom HTTP object
http = googleapiclient.http.build_http()
http.verify = session.verify
# Build the YouTube client with the custom HTTP object
youtube = build('youtube', 'v3', developerKey=API_KEY, http=http)
logger.info("YouTube API client initialized successfully")
return youtube
except Exception as e:
logger.error(f"Error initializing YouTube API client: {str(e)}")
raise
# Rest of your existing functions remain the same...
def extract_video_id(url):
if not url:
return None
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
if video_id_match:
return video_id_match.group(1)
return None
def get_video_metadata(video_id):
youtube = get_youtube_client()
try:
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_id
)
response = request.execute()
if 'items' in response and len(response['items']) > 0:
video = response['items'][0]
snippet = video['snippet']
description = snippet.get('description', '').strip()
if not description:
description = 'Not Available'
return {
'title': snippet['title'],
'author': snippet['channelTitle'],
'upload_date': snippet['publishedAt'],
'view_count': video['statistics'].get('viewCount', '0'),
'like_count': video['statistics'].get('likeCount', '0'),
'comment_count': video['statistics'].get('commentCount', '0'),
'duration': video['contentDetails']['duration'],
'description': description
}
else:
logger.error(f"No video found with id: {video_id}")
return None
except Exception as e:
logger.error(f"An error occurred while fetching metadata for video {video_id}: {str(e)}")
return None
def get_transcript(video_id):
if not video_id:
return None
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
metadata = get_video_metadata(video_id)
logger.info(f"Metadata for video {video_id}: {metadata}")
logger.info(f"Transcript length for video {video_id}: {len(transcript)}")
if not metadata:
return None
return {
'transcript': transcript,
'metadata': metadata
}
except Exception as e:
logger.error(f"Error extracting transcript for video {video_id}: {str(e)}")
return None
def get_channel_videos(channel_url):
youtube = get_youtube_client()
channel_id = extract_channel_id(channel_url)
if not channel_id:
logger.error(f"Invalid channel URL: {channel_url}")
return []
try:
request = youtube.search().list(
part="id,snippet",
channelId=channel_id,
type="video",
maxResults=50
)
response = request.execute()
videos = []
for item in response['items']:
videos.append({
'video_id': item['id']['videoId'],
'title': item['snippet']['title'],
'description': item['snippet']['description'],
'published_at': item['snippet']['publishedAt']
})
return videos
except HttpError as e:
logger.error(f"An HTTP error {e.resp.status} occurred: {e.content}")
return []
except Exception as e:
logger.error(f"An error occurred while fetching channel videos: {str(e)}")
return []
def extract_channel_id(url):
channel_id_match = re.search(r"(?:channel\/|c\/|@)([a-zA-Z0-9-_]+)", url)
if channel_id_match:
return channel_id_match.group(1)
return None
def test_api_key():
try:
youtube = get_youtube_client()
request = youtube.videos().list(part="snippet", id="dQw4w9WgXcQ")
response = request.execute()
if 'items' in response:
logger.info("API key is valid and working")
return True
else:
logger.error("API request successful but returned unexpected response")
return False
except Exception as e:
logger.error(f"API key test failed: {str(e)}")
return False
def initialize_youtube_api():
if test_api_key():
logger.info("YouTube API initialized successfully")
return True
else:
logger.error("Failed to initialize YouTube API")
return False |