Spaces:
Sleeping
Sleeping
File size: 5,504 Bytes
5fdb69e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from IPython.display import Markdown, display\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install youtube_transcript_api"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"from youtube_transcript_api import YouTubeTranscriptApi"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables in a file called .env\n",
"\n",
"load_dotenv(override=True)\n",
"api_key = os.getenv('OPENAI_API_KEY')"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"class YouTubeWebLink:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" self.video_id = self.get_video_id(url)\n",
" self.set_openai_client()\n",
" self.set_system_prompt()\n",
"\n",
" def get_video_id(self, url):\n",
" \"\"\" extract youtube video id from url with regular expression \"\"\"\n",
" regex = r\"(?:v=|be/)([a-zA-Z0-9_-]{11})\"\n",
" match = re.search(regex, url)\n",
" if match:\n",
" return match.group(1)\n",
" else:\n",
" raise ValueError(\"Probably not a YouTube URL\")\n",
" \n",
" def set_openai_client(self):\n",
" self.openai = OpenAI()\n",
" \n",
" def set_system_prompt(self, system_prompt=None):\n",
" \"\"\" set system prompt from youtube video \"\"\"\n",
" self.system_prompt = \"\"\"\n",
" You are a skilled explainer and storyteller who specializes in summarizing YouTube video transcripts in a way that's both engaging and informative. \n",
" Your task is to:\n",
" - Capture key points and main ideas of the video\n",
" - Structure your summary with in clear sections\n",
" - Include important details, facts, and figures mentioned\n",
" - Never end your summary with a \"Conclusion\" section\n",
" - Keep the summary short and easy to understand\n",
" - Always format your response in markdown for better readability\n",
" \"\"\" if system_prompt is None else system_prompt\n",
"\n",
" def get_transcript(self):\n",
" \"\"\" get transcript from youtube video \"\"\"\n",
" try:\n",
" print('Fetching video transcript...')\n",
" transcript = YouTubeTranscriptApi.get_transcript(self.video_id)\n",
" return \" \".join([item['text'] for item in transcript])\n",
" except Exception as e:\n",
" print(f\"Error fetching transcript: {e}\")\n",
" return None\n",
" \n",
" def get_summary_from_transcript(self, transcript):\n",
" \"\"\" summarize text using openai \"\"\"\n",
" try:\n",
" print('Summarizing video...')\n",
" response = self.openai.chat.completions.create(\n",
" model=\"gpt-4o-mini\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": self.system_prompt},\n",
" {\"role\": \"user\", \"content\": f\"Summarize the following YouTube video transcript:\\n\\n{transcript}\"}\n",
" ]\n",
" )\n",
" return response.choices[0].message.content\n",
" except Exception as e:\n",
" print(f\"Error summarizing text: {e}\")\n",
" return None\n",
"\n",
" def display_summary(self):\n",
" \"\"\" summarize youtube video \"\"\"\n",
" transcript = self.get_transcript()\n",
" summary = self.get_summary_from_transcript(transcript)\n",
" display(Markdown(summary))\n"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"# video link and share link of same youtube video\n",
"test_url_1 = \"https://www.youtube.com/watch?v=nYy-umCNKPQ&list=PLWHe-9GP9SMMdl6SLaovUQF2abiLGbMjs\"\n",
"test_url_2 = \"https://youtu.be/nYy-umCNKPQ?si=ILVrQlKT0W71G5pU\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Test that we get same id\n",
"video1, video2 = YouTubeWebLink(test_url_1), YouTubeWebLink(test_url_2)\n",
"video1.video_id, video2.video_id"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"video1.display_summary()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llms",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|