oma / transcript_processor.py
Dave Cavell
Correct typo
d6ed924
import requests
import os
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
API_URL = "https://api-inference.huggingface.co/models/SamLowe/roberta-base-go_emotions"
headers = {"Authorization": os.getenv('BEARER')}
def query(payload):
"""
Sends a request to the Hugging Face API with the given payload.
Args:
payload (dict): The payload to send in the request.
Returns:
dict: The JSON response from the API.
"""
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status() # Raise an exception for HTTP errors
print(response.json())
return response.json()
except requests.exceptions.RequestException as e:
return {"error": str(e)}
def get_youtube_transcript(video_id):
"""
Retrieves and formats the transcript of a YouTube video.
Args:
video_id (str): The ID of the YouTube video.
Returns:
str: The formatted transcript text truncated to 800 characters.
"""
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
formatter = TextFormatter()
formatted_transcript = formatter.format_transcript(transcript).replace('\r', '').replace('\n', '')
# Truncate text to 800 characters to deal with limitations of the model
print(formatted_transcript)
return formatted_transcript[:800]
except Exception as e:
return str(e)