SearchPod1.0

Running

App Files Files Community

SearchPod1.0 / utils.py

siddhartharyaai

Update utils.py

388741c verified 4 months ago

raw

history blame

20.7 kB

	import os
	import re
	import json
	import requests
	import tempfile
	from bs4 import BeautifulSoup
	from typing import List, Literal, Optional
	from pydantic import BaseModel
	from pydub import AudioSegment, effects
	from transformers import pipeline
	import yt_dlp
	import tiktoken
	from groq import Groq # Retained for LLM interaction
	import numpy as np
	import torch
	import random

	# New Imports - Corrected for local directory
	from opendeepresearch.agent import OpenDeepResearcher
	from report_structure import generate_report


	class DialogueItem(BaseModel):
	speaker: Literal["Jane", "John"]
	display_speaker: str = "Jane"
	text: str

	class Dialogue(BaseModel):
	dialogue: List[DialogueItem]

	asr_pipeline = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-tiny.en",
	device=0 if torch.cuda.is_available() else -1
	)

	def truncate_text(text, max_tokens=2048):
	print("[LOG] Truncating text if needed.")
	tokenizer = tiktoken.get_encoding("cl100k_base")
	tokens = tokenizer.encode(text)
	if len(tokens) > max_tokens:
	print("[LOG] Text too long, truncating.")
	return tokenizer.decode(tokens[:max_tokens])
	return text

	def extract_text_from_url(url):
	# This function is retained for potential edge cases.
	print("[LOG] Extracting text from URL (fallback method):", url)
	try:
	headers = {
	"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/115.0.0.0 Safari/537.36")
	}
	response = requests.get(url, headers=headers)
	if response.status_code != 200:
	print(f"[ERROR] Failed to fetch URL: {url} with status code {response.status_code}")
	return ""
	soup = BeautifulSoup(response.text, 'html.parser')
	for script in soup(["script", "style"]):
	script.decompose()
	text = soup.get_text(separator=' ')
	print("[LOG] Text extraction from URL (fallback) successful.")
	return text
	except Exception as e:
	print(f"[ERROR] Exception during text extraction from URL (fallback): {e}")
	return ""

	def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
	print(f"[LOG] Shifting pitch by {semitones} semitones.")
	new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
	shifted_audio = audio._spawn(audio.raw_data, overrides={'frame_rate': new_sample_rate})
	return shifted_audio.set_frame_rate(audio.frame_rate)

	def is_sufficient(text: str, min_word_count: int = 500) -> bool:
	# This function's role is reduced; the agent decides.
	word_count = len(text.split())
	print(f"[DEBUG] Aggregated word count: {word_count}")
	return word_count >= min_word_count

	def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
	# No longer needed
	pass
	def research_topic(topic: str) -> str:
	# No longer needed
	pass

	def fetch_wikipedia_summary(topic: str) -> str:
	# No longer needed
	pass

	def fetch_rss_feed(feed_url: str) -> list:
	# No longer needed
	pass

	def find_relevant_article(items, topic: str, min_match=2) -> tuple:
	# No longer needed
	pass

	def fetch_article_text(link: str) -> str:
	# No longer needed
	pass

	def generate_script(
	system_prompt: str,
	input_text: str,
	tone: str,
	target_length: str,
	host_name: str = "Jane",
	guest_name: str = "John",
	sponsor_style: str = "Separate Break",
	sponsor_provided=None
	):
	print("[LOG] Generating script with tone:", tone, "and length:", target_length)
	import streamlit as st
	if (host_name == "Jane" or not host_name) and st.session_state.get("language_selection") in ["English (Indian)", "Hinglish", "Hindi"]:
	host_name = "Isha"
	if (guest_name == "John" or not guest_name) and st.session_state.get("language_selection") in ["English (Indian)", "Hinglish", "Hindi"]:
	guest_name = "Aarav"

	words_per_minute = 150
	numeric_minutes = 3
	match = re.search(r"(\d+)", target_length)
	if match:
	numeric_minutes = int(match.group(1))

	min_words = max(50, numeric_minutes * 100)
	max_words = numeric_minutes * words_per_minute

	tone_map = {
	"Humorous": "funny and exciting, makes people chuckle",
	"Formal": "business-like, well-structured, professional",
	"Casual": "like a conversation between close friends, relaxed and informal",
	"Youthful": "like how teenagers might chat, energetic and lively"
	}
	chosen_tone = tone_map.get(tone, "casual")

	if sponsor_provided:
	if sponsor_style == "Separate Break":
	sponsor_instructions = (
	"If sponsor content is provided, include it in a separate ad break (~30 seconds). "
	"Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
	)
	else:
	sponsor_instructions = (
	"If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
	"Avoid abrupt transitions."
	)
	else:
	sponsor_instructions = ""

	prompt = (
	f"{system_prompt}\n"
	f"TONE: {chosen_tone}\n"
	f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
	f"INPUT TEXT: {input_text}\n\n"
	f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
	"Please provide the output in the following JSON format without any additional text:\n\n"
	"{\n"
	' "dialogue": [\n'
	' {\n'
	' "speaker": "Jane",\n'
	' "text": "..." \n'
	' },\n'
	' {\n'
	' "speaker": "John",\n'
	' "text": "..." \n'
	' }\n'
	" ]\n"
	"}"
	)
	print("[LOG] Sending prompt to Deepseek R1 via OpenRouter:")
	print(prompt)

	# Add language-specific instructions
	if st.session_state.get("language_selection") == "Hinglish":
	prompt += "\n\nPlease generate the script in Romanized Hindi.\n"
	elif st.session_state.get("language_selection") == "Hindi":
	prompt += "\n\nPlease generate the script exclusively in Hindi, using only Hindi vocabulary and grammar without any English words or phrases.\n"

	try:
	headers = {
	"Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
	"Content-Type": "application/json"
	}
	data = {
	"model": "deepseek/deepseek-r1",
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": 2048,
	"temperature": 0.7
	}
	response = requests.post("https://openrouter.ai/api/v1/chat/completions",
	headers=headers, data=json.dumps(data))
	response.raise_for_status()
	raw_content = response.json()["choices"][0]["message"]["content"].strip()
	except Exception as e:
	print("[ERROR] Deepseek API error:", e)
	raise ValueError(f"Error communicating with Deepseek API: {str(e)}")

	start_index = raw_content.find('{')
	end_index = raw_content.rfind('}')
	if start_index == -1 or end_index == -1:
	raise ValueError("Failed to parse dialogue: No JSON found.")

	json_str = raw_content[start_index:end_index+1].strip()

	try:
	data = json.loads(json_str)
	dialogue_list = data.get("dialogue", [])

	for d in dialogue_list:
	raw_speaker = d.get("speaker", "Jane")
	if raw_speaker.lower() == host_name.lower():
	d["speaker"] = "Jane"
	d["display_speaker"] = host_name
	elif raw_speaker.lower() == guest_name.lower():
	d["speaker"] = "John"
	d["display_speaker"] = guest_name
	else:
	d["speaker"] = "Jane"
	d["display_speaker"] = raw_speaker

	new_dialogue_items = []
	for d in dialogue_list:
	if "display_speaker" not in d:
	d["display_speaker"] = d["speaker"]
	new_dialogue_items.append(DialogueItem(**d))

	return Dialogue(dialogue=new_dialogue_items)
	except json.JSONDecodeError as e:
	print("[ERROR] JSON decoding (format) failed:", e)
	raise ValueError(f"Failed to parse dialogue: {str(e)}")
	except Exception as e:
	print("[ERROR] JSON decoding failed:", e)
	raise ValueError(f"Failed to parse dialogue: {str(e)}")

	def transcribe_youtube_video(video_url: str) -> str:
	print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
	video_id_match = re.search(r"(?:v=\|\/)([0-9A-Za-z_-]{11})", video_url)
	if not video_id_match:
	raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")

	video_id = video_id_match.group(1)
	print("[LOG] Extracted video ID:", video_id)

	base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
	params = {"video_id": video_id, "lang": "en"}
	headers = {
	"x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
	"x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
	}

	try:
	response = requests.get(base_url, headers=headers, params=params, timeout=30)
	print("[LOG] RapidAPI Response Status Code:", response.status_code)
	print("[LOG] RapidAPI Response Body:", response.text)

	if response.status_code != 200:
	raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")

	data = response.json()
	if not isinstance(data, list) or not data:
	raise ValueError(f"Unexpected transcript format or empty transcript: {data}")

	transcript_as_text = data[0].get('transcriptionAsText', '').strip()
	if not transcript_as_text:
	raise ValueError("transcriptionAsText field is missing or empty.")

	print("[LOG] Transcript retrieval successful.")
	print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
	snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
	print(f"[DEBUG] Transcript Snippet: {snippet}")

	return transcript_as_text
	except Exception as e:
	print("[ERROR] RapidAPI transcription error:", e)
	raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")

	def generate_audio_mp3(text: str, speaker: str) -> str:
	try:
	import streamlit as st
	print(f"[LOG] Generating audio for speaker: {speaker}")
	language_selection = st.session_state.get("language_selection", "English (American)")
	if language_selection == "English (American)":
	print(f"[LOG] Using Deepgram for English (American)")
	if speaker in ["John", "Jane"]:
	processed_text = text
	else:
	processed_text = _preprocess_text_for_tts(text, speaker)
	deepgram_api_url = "https://api.deepgram.com/v1/speak"
	params = {"model": "aura-asteria-en"}
	if speaker == "John":
	params["model"] = "aura-zeus-en"
	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
	}
	body = {"text": processed_text}
	response = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
	if response.status_code != 200:
	raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
	content_type = response.headers.get('Content-Type', '')
	if 'audio/mpeg' not in content_type:
	raise ValueError("Unexpected Content-Type from Deepgram.")
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	mp3_file.write(chunk)
	mp3_path = mp3_file.name
	audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
	audio_seg = effects.normalize(audio_seg)
	final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
	audio_seg.export(final_mp3_path, format="mp3")
	if os.path.exists(mp3_path):
	os.remove(mp3_path)
	return final_mp3_path
	else:
	print(f"[LOG] Using Murf API for language: {language_selection}")
	if language_selection == "Hinglish":
	from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
	text = transliterate(text, DEVANAGARI, IAST)
	api_key = os.environ.get("MURF_API_KEY")
	headers = {
	"Content-Type": "application/json",
	"Accept": "application/json",
	"api-key": api_key
	}
	multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
	if language_selection == "English (Indian)":
	voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
	elif language_selection == "Hindi":
	voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
	elif language_selection == "Hinglish":
	voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
	else:
	voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
	payload = {
	"audioDuration": 0,
	"channelType": "MONO",
	"encodeAsBase64": False,
	"format": "WAV",
	"modelVersion": "GEN2",
	"multiNativeLocale": multi_native_locale,
	"pitch": 0,
	"pronunciationDictionary": {},
	"rate": 0,
	"sampleRate": 48000,
	"style": "Conversational",
	"text": text,
	"variation": 1,
	"voiceId": voice_id
	}
	response = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
	if response.status_code != 200:
	raise ValueError(f"Murf API error: {response.status_code}, {response.text}")
	json_resp = response.json()
	audio_url = json_resp.get("audioFile")
	if not audio_url:
	raise ValueError("No audio file URL returned by Murf API")
	audio_response = requests.get(audio_url)
	if audio_response.status_code != 200:
	raise ValueError(f"Error fetching audio from {audio_url}")
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
	wav_file.write(audio_response.content)
	wav_path = wav_file.name
	audio_seg = AudioSegment.from_file(wav_path, format="wav")
	audio_seg = effects.normalize(audio_seg)
	final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
	audio_seg.export(final_mp3_path, format="mp3")
	os.remove(wav_path)
	return final_mp3_path
	except Exception as e:
	print("[ERROR] Error generating audio:", e)
	raise ValueError(f"Error generating audio: {str(e)}")

	def transcribe_youtube_video_OLD_YTDLP(video_url: str) -> str:
	pass

	def _preprocess_text_for_tts(text: str, speaker: str) -> str:
	text = re.sub(r"\bNo\.\b", "Number", text)
	text = re.sub(r"\b(?i)SaaS\b", "sass", text)
	abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
	def insert_periods_for_abbrev(m):
	abbr = m.group(0)
	if abbr in abbreviations_as_words:
	return abbr
	return ".".join(list(abbr)) + "."
	text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
	text = re.sub(r"\.\.", ".", text)
	def remove_periods_for_tts(m):
	return m.group().replace(".", " ").strip()
	text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
	text = re.sub(r"-", " ", text)
	text = re.sub(r"\b(ha(ha)?\|heh\|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
	text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
	text = re.sub(r"\b(groan\|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
	if speaker != "Jane":
	def insert_thinking_pause(m):
	word = m.group(1)
	if random.random() < 0.3:
	filler = random.choice(['hmm,', 'well,', 'let me see,'])
	return f"{word}..., {filler}"
	else:
	return f"{word}...,"
	keywords_pattern = r"\b(important\|significant\|crucial\|point\|topic)\b"
	text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
	conj_pattern = r"\b(and\|but\|so\|because\|however)\b"
	text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
	text = re.sub(r"\b(uh\|um\|ah)\b", "", text, flags=re.IGNORECASE)
	def capitalize_match(m):
	return m.group().upper()
	text = re.sub(r'(^\s\w)\|([.!?]\s\w)', capitalize_match, text)
	return text.strip()

	def _spell_digits(d: str) -> str:
	digit_map = {
	'0': 'zero', '1': 'one', '2': 'two', '3': 'three',
	'4': 'four', '5': 'five', '6': 'six', '7': 'seven',
	'8': 'eight', '9': 'nine'
	}
	return " ".join(digit_map[ch] for ch in d if ch in digit_map)

	def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
	if custom_music_path:
	music_path = custom_music_path
	else:
	music_path = "bg_music.mp3"

	try:
	bg_music = AudioSegment.from_file(music_path, format="mp3")
	except Exception as e:
	print("[ERROR] Failed to load background music:", e)
	return spoken

	bg_music = bg_music - 18.0
	total_length_ms = len(spoken) + 2000
	looped_music = AudioSegment.empty()
	while len(looped_music) < total_length_ms:
	looped_music += bg_music
	looped_music = looped_music[:total_length_ms]
	final_mix = looped_music.overlay(spoken, position=2000)
	return final_mix

	def call_groq_api_for_qa(system_prompt: str) -> str:
	#Kept for use, Changed model
	try:
	headers = {
	"Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}", # Use GROQ API KEY
	"Content-Type": "application/json",
	"Accept": "application/json"
	}
	data = {
	"model": "deepseek-r1-distill-llama-70b", #Using Deepseek
	"messages": [{"role": "user", "content": system_prompt}],
	"max_tokens": 512,
	"temperature": 0.7
	}
	response = requests.post("https://api.groq.com/openai/v1/chat/completions", #Using groq endpoint
	headers=headers, data=json.dumps(data))
	response.raise_for_status()
	return response.json()["choices"][0]["message"]["content"].strip()
	except Exception as e:
	print("[ERROR] Groq API error:", e)
	fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
	return json.dumps(fallback)

	# --- Agent and Tavily Integration ---

	def run_research_agent(topic: str, report_type: str = "research_report", max_results: int = 20) -> str:
	"""
	Runs the Open Deep Research agent to generate a research report.

	Args:
	topic: The research topic.
	report_type: The type of report to generate (currently only supports "research_report").
	max_results: The maximum number of search results to use.

	Returns:
	A string containing the generated research report. Or, in case of error,
	an error message.
	"""
	print(f"[LOG] Starting research agent for topic: {topic}")
	try:
	agent = OpenDeepResearcher(topic, report_type=report_type, max_results=max_results, tavily_api_key=os.environ.get("TAVILY_API_KEY"))
	report_content = agent.run()
	print("[LOG] Research agent completed successfully.")

	# Now, use the report_structure module to generate the structured report.
	structured_report = generate_report(report_content)
	return structured_report
	except Exception as e:
	print(f"[ERROR] Error in research agent: {e}")
	return f"Sorry, I encountered an error during research: {e}"