Agentic_RAG / web_scrapping.py
mariemerenc's picture
Upload 7 files
8da6fbf verified
from crewai import Agent, Task, Crew
from llm_setup import crew_llm
from crewai_tools import (
SerperDevTool,
ScrapeWebsiteTool
)
# Função disponibilizada no DataCamp
def setup_web_scraping_agent():
"""Setup the web scraping agent and related components"""
search_tool = SerperDevTool() # Tool for performing web searches
scrape_website = ScrapeWebsiteTool() # Tool for extracting data from websites
# Define the web search agent
web_search_agent = Agent(
role="Expert Web Search Agent",
goal="Identify and retrieve relevant web data for user queries",
backstory="An expert in identifying valuable web sources for the user's needs",
allow_delegation=False,
verbose=True,
llm=crew_llm
)
# Define the web scraping agent
web_scraper_agent = Agent(
role="Expert Web Scraper Agent",
goal="Extract and analyze content from specific web pages identified by the search agent",
backstory="A highly skilled web scraper, capable of analyzing and summarizing website content accurately",
allow_delegation=False,
verbose=True,
llm=crew_llm
)
# Define the web search task
search_task = Task(
description=(
"Identify the most relevant web page or article for the topic: '{topic}'. "
"Use all available tools to search for and provide a link to a web page "
"that contains valuable information about the topic. Keep your response concise."
),
expected_output=(
"A concise summary of the most relevant web page or article for '{topic}', "
"including the link to the source and key points from the content."
),
tools=[search_tool],
agent=web_search_agent,
)
# Define the web scraping task
scraping_task = Task(
description=(
"Extract and analyze data from the given web page or website. Focus on the key sections "
"that provide insights into the topic: '{topic}'. Use all available tools to retrieve the content, "
"and summarize the key findings in a concise manner."
),
expected_output=(
"A detailed summary of the content from the given web page or website, highlighting the key insights "
"and explaining their relevance to the topic: '{topic}'. Ensure clarity and conciseness."
),
tools=[scrape_website],
agent=web_scraper_agent,
)
# Define the crew to manage agents and tasks
crew = Crew(
agents=[web_search_agent, web_scraper_agent],
tasks=[search_task, scraping_task],
verbose=1,
memory=False,
)
return crew
# Função disponibilizada no DataCamp
def get_web_content(query):
"""Get content from web scraping"""
crew = setup_web_scraping_agent()
result = crew.kickoff(inputs={"topic": query})
return result.raw