benfls commited on
Commit
4e13619
·
1 Parent(s): 715b2e1

ajout system prompt

Browse files
Files changed (5) hide show
  1. agent.py +39 -37
  2. config.py +2 -1
  3. requirements.txt +1 -0
  4. tools/__init__.py +2 -1
  5. tools/web_tools.py +40 -15
agent.py CHANGED
@@ -10,12 +10,12 @@ from langchain_huggingface import HuggingFaceEndpoint
10
  from langgraph.graph import START, StateGraph, MessagesState
11
  from langgraph.prebuilt import tools_condition, ToolNode
12
 
13
- from tools import WebSearchTool, WebContentTool
14
  #, AudioToTextTool, SpreadsheetParserTool, StringUtilitiesTool
15
 
16
  import config
17
 
18
- # Configuration du logging
19
  logging.basicConfig(
20
  level=logging.INFO,
21
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -25,16 +25,16 @@ logger = logging.getLogger("Agent")
25
 
26
  class LangGraphAgent:
27
  """
28
- Agent avancé utilisant LangGraph et le modèle Qwen3-30B-A3B
29
  """
30
 
31
  def __init__(self, verbose: bool = True):
32
  """
33
- Initialise l'agent avec ses outils et sa configuration.
34
  """
35
  self.verbose = verbose
36
 
37
- # Initialisation du modèle
38
  self.llm = HuggingFaceEndpoint(
39
  repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
40
  huggingfacehub_api_token=config.HUGGINGFACE_API_KEY,
@@ -42,21 +42,24 @@ class LangGraphAgent:
42
  temperature = config.TEMPERATURE
43
  )
44
 
45
- # Charger les outils
46
  self.tools = self._setup_tools()
47
 
48
- # Créer le graphe d'exécution
49
  self.workflow = self._create_workflow()
50
 
51
  def _setup_tools(self) -> List[BaseTool]:
52
- """Configure et retourne les outils disponibles pour l'agent"""
53
  tools = []
54
 
55
- # Ajouter les outils selon la configuration
56
  if config.ENABLE_WEB_SEARCH:
57
  tools.append(WebSearchTool())
58
  tools.append(WebContentTool())
59
 
 
 
 
60
  # if config.ENABLE_AUDIO_TO_TEXT:
61
  # tools.append(AudioToTextTool())
62
 
@@ -66,13 +69,13 @@ class LangGraphAgent:
66
  # if config.ENABLE_STRING_UTILITIES:
67
  # tools.append(StringUtilitiesTool())
68
 
69
- logger.info(f"Agent initialisé avec {len(tools)} outils")
70
  return tools
71
 
72
  def _create_workflow(self) -> StateGraph:
73
- """Crée le graphe d'exécution de l'agent"""
74
 
75
- # Définir le prompt
76
  prompt = ChatPromptTemplate.from_messages([
77
  ("system", config.DEFAULT_SYSTEM_MESSAGE),
78
  MessagesPlaceholder(variable_name="chat_history"),
@@ -80,9 +83,9 @@ class LangGraphAgent:
80
  MessagesPlaceholder(variable_name="agent_scratchpad"),
81
  ])
82
 
83
- # Définir les nodes
84
  def assistant(state: MessagesState):
85
- # Utiliser le prompt pour formater les messages
86
  messages = prompt.format_messages(
87
  chat_history=state["messages"][:-1],
88
  input=state["messages"][-1].content,
@@ -90,14 +93,14 @@ class LangGraphAgent:
90
  )
91
  return {"messages": [self.llm.invoke(messages)]}
92
 
93
- # Créer le graphe
94
  builder = StateGraph(MessagesState)
95
 
96
- # Ajouter les nodes
97
  builder.add_node("assistant", assistant)
98
  builder.add_node("tools", ToolNode(self.tools))
99
 
100
- # Ajouter les edges
101
  builder.add_edge(START, "assistant")
102
  builder.add_conditional_edges(
103
  "assistant",
@@ -105,60 +108,59 @@ class LangGraphAgent:
105
  )
106
  builder.add_edge("tools", "assistant")
107
 
108
- # Compiler le graphe
109
  return builder.compile()
110
 
111
  def __call__(self, question: str) -> str:
112
  """
113
- Répond à une question en utilisant l'agent.
114
 
115
  Args:
116
- question: La question à laquelle répondre
117
 
118
  Returns:
119
- La réponse de l'agent
120
  """
121
  if not question.strip():
122
- return "Veuillez poser une question."
123
 
124
  try:
125
- logger.info(f"Question reçue: {question[:50]}...")
126
 
127
- # Exécuter le workflow
128
  result = self.workflow.invoke({
129
  "messages": [HumanMessage(content=question)]
130
  })
131
 
132
- # Extraire la réponse finale
133
  final_message = result["messages"][-1].content
134
- if "FINAL ANSWER:" in final_message:
135
- return final_message.split("FINAL ANSWER:")[1].strip()
136
  return final_message
137
 
138
  except Exception as e:
139
- logger.error(f"Erreur lors du traitement de la question: {str(e)}")
140
- return f"Désolé, une erreur s'est produite: {str(e)}"
141
 
142
  if __name__ == "__main__":
143
- # Créer une instance de l'agent
144
  agent = LangGraphAgent(verbose=True)
145
 
146
- # Question à tester
147
  question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
148
 
149
- # Obtenir la réponse
150
  response = agent(question)
151
 
152
- # Afficher la réponse
153
  print("\nQuestion:", question)
154
- print("\nRéponse:", response)
155
 
156
- # Question à tester
157
- question ="In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
158
 
159
  # Obtenir la réponse
160
  response = agent(question)
161
-
162
  # Afficher la réponse
163
  print("\nQuestion:", question)
164
  print("\nRéponse:", response)
 
10
  from langgraph.graph import START, StateGraph, MessagesState
11
  from langgraph.prebuilt import tools_condition, ToolNode
12
 
13
+ from tools import WebSearchTool, WebContentTool, WikipediaSearchTool
14
  #, AudioToTextTool, SpreadsheetParserTool, StringUtilitiesTool
15
 
16
  import config
17
 
18
+ # Configure logging
19
  logging.basicConfig(
20
  level=logging.INFO,
21
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 
25
 
26
  class LangGraphAgent:
27
  """
28
+ Advanced agent using LangGraph and the Qwen3-30B-A3B model
29
  """
30
 
31
  def __init__(self, verbose: bool = True):
32
  """
33
+ Initialize the agent with its tools and configuration.
34
  """
35
  self.verbose = verbose
36
 
37
+ # Initialize the model
38
  self.llm = HuggingFaceEndpoint(
39
  repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
40
  huggingfacehub_api_token=config.HUGGINGFACE_API_KEY,
 
42
  temperature = config.TEMPERATURE
43
  )
44
 
45
+ # Load tools
46
  self.tools = self._setup_tools()
47
 
48
+ # Create the execution graph
49
  self.workflow = self._create_workflow()
50
 
51
  def _setup_tools(self) -> List[BaseTool]:
52
+ """Configure and return the available tools for the agent"""
53
  tools = []
54
 
55
+ # Add tools according to configuration
56
  if config.ENABLE_WEB_SEARCH:
57
  tools.append(WebSearchTool())
58
  tools.append(WebContentTool())
59
 
60
+ if config.ENABLE_WIKIPEDIA_SEARCH:
61
+ tools.append(WikipediaSearchTool())
62
+
63
  # if config.ENABLE_AUDIO_TO_TEXT:
64
  # tools.append(AudioToTextTool())
65
 
 
69
  # if config.ENABLE_STRING_UTILITIES:
70
  # tools.append(StringUtilitiesTool())
71
 
72
+ logger.info(f"Agent initialized with {len(tools)} tools")
73
  return tools
74
 
75
  def _create_workflow(self) -> StateGraph:
76
+ """Create the agent's execution graph"""
77
 
78
+ # Define the prompt
79
  prompt = ChatPromptTemplate.from_messages([
80
  ("system", config.DEFAULT_SYSTEM_MESSAGE),
81
  MessagesPlaceholder(variable_name="chat_history"),
 
83
  MessagesPlaceholder(variable_name="agent_scratchpad"),
84
  ])
85
 
86
+ # Define the nodes
87
  def assistant(state: MessagesState):
88
+ # Use the prompt to format messages
89
  messages = prompt.format_messages(
90
  chat_history=state["messages"][:-1],
91
  input=state["messages"][-1].content,
 
93
  )
94
  return {"messages": [self.llm.invoke(messages)]}
95
 
96
+ # Create the graph
97
  builder = StateGraph(MessagesState)
98
 
99
+ # Add nodes
100
  builder.add_node("assistant", assistant)
101
  builder.add_node("tools", ToolNode(self.tools))
102
 
103
+ # Add edges
104
  builder.add_edge(START, "assistant")
105
  builder.add_conditional_edges(
106
  "assistant",
 
108
  )
109
  builder.add_edge("tools", "assistant")
110
 
111
+ # Compile the graph
112
  return builder.compile()
113
 
114
  def __call__(self, question: str) -> str:
115
  """
116
+ Answer a question using the agent.
117
 
118
  Args:
119
+ question: The question to answer
120
 
121
  Returns:
122
+ The agent's answer
123
  """
124
  if not question.strip():
125
+ return "Please ask a question."
126
 
127
  try:
128
+ logger.info(f"Question received: {question[:50]}...")
129
 
130
+ # Execute the workflow
131
  result = self.workflow.invoke({
132
  "messages": [HumanMessage(content=question)]
133
  })
134
 
135
+ # Extract the final answer
136
  final_message = result["messages"][-1].content
137
+ # if "FINAL ANSWER:" in final_message:
138
+ # return final_message.split("FINAL ANSWER:")[1].strip()
139
  return final_message
140
 
141
  except Exception as e:
142
+ logger.error(f"Error processing the question: {str(e)}")
143
+ return f"Sorry, an error occurred: {str(e)}"
144
 
145
  if __name__ == "__main__":
146
+ # Create an instance of the agent
147
  agent = LangGraphAgent(verbose=True)
148
 
149
+ # Question to test
150
  question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
151
 
152
+ # Get the answer
153
  response = agent(question)
154
 
155
+ # Display the answer
156
  print("\nQuestion:", question)
157
+ print("\nAnswer:", response)
158
 
159
+ # Question to test
160
+ question = ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI"
161
 
162
  # Obtenir la réponse
163
  response = agent(question)
 
164
  # Afficher la réponse
165
  print("\nQuestion:", question)
166
  print("\nRéponse:", response)
config.py CHANGED
@@ -16,7 +16,8 @@ TEMPERATURE = 0.7
16
  RETRY_ATTEMPTS = 3
17
 
18
  # Configuration des outils
19
- ENABLE_WEB_SEARCH = True
 
20
  ENABLE_AUDIO_TO_TEXT = False
21
  ENABLE_SPREADSHEET_PARSER = False
22
  ENABLE_STRING_UTILITIES = False
 
16
  RETRY_ATTEMPTS = 3
17
 
18
  # Configuration des outils
19
+ ENABLE_WEB_SEARCH = False
20
+ ENABLE_WIKIPEDIA_SEARCH = True
21
  ENABLE_AUDIO_TO_TEXT = False
22
  ENABLE_SPREADSHEET_PARSER = False
23
  ENABLE_STRING_UTILITIES = False
requirements.txt CHANGED
@@ -12,3 +12,4 @@ pandas
12
  openpyxl
13
  pydub
14
  huggingface-hub
 
 
12
  openpyxl
13
  pydub
14
  huggingface-hub
15
+ wikipedia
tools/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .web_tools import WebSearchTool, WebContentTool
2
  # from .audio_tools import AudioToTextTool
3
  # from .spreadsheet_tools import SpreadsheetParserTool
4
  # from .string_tools import StringUtilitiesTool
@@ -6,6 +6,7 @@ from .web_tools import WebSearchTool, WebContentTool
6
  __all__ = [
7
  'WebSearchTool',
8
  'WebContentTool',
 
9
  # 'AudioToTextTool',
10
  # 'SpreadsheetParserTool',
11
  # 'StringUtilitiesTool'
 
1
+ from .web_tools import WebSearchTool, WebContentTool, WikipediaSearchTool
2
  # from .audio_tools import AudioToTextTool
3
  # from .spreadsheet_tools import SpreadsheetParserTool
4
  # from .string_tools import StringUtilitiesTool
 
6
  __all__ = [
7
  'WebSearchTool',
8
  'WebContentTool',
9
+ 'WikipediaSearchTool',
10
  # 'AudioToTextTool',
11
  # 'SpreadsheetParserTool',
12
  # 'StringUtilitiesTool'
tools/web_tools.py CHANGED
@@ -3,31 +3,56 @@ from langchain.tools import BaseTool
3
  from typing import Optional, Type
4
  import requests
5
  from bs4 import BeautifulSoup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  class WebSearchTool(BaseTool):
8
  name: str = "web_search"
9
- description: str = "Recherche des informations sur le web à partir d'un terme de recherche"
10
  args_schema: Optional[Type] = None
11
 
12
  def _run(self, query: str) -> str:
13
- """Exécute une recherche web et retourne les résultats pertinents"""
14
  try:
15
  search_tool = DuckDuckGoSearchRun()
16
  return search_tool.run(query)
17
  except Exception as e:
18
- return f"Erreur lors de la recherche web: {str(e)}"
19
 
20
  async def _arun(self, query: str) -> str:
21
- """Version asynchrone de l'outil"""
22
  return self._run(query)
23
 
24
  class WebContentTool(BaseTool):
25
  name: str = "fetch_web_content"
26
- description: str = "Récupère le contenu d'une page web à partir d'une URL"
27
  args_schema: Optional[Type] = None
28
 
29
  def _run(self, url: str) -> str:
30
- """Récupère et nettoie le contenu d'une page web"""
31
  try:
32
  headers = {
33
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
@@ -35,32 +60,32 @@ class WebContentTool(BaseTool):
35
  response = requests.get(url, headers=headers, timeout=10)
36
 
37
  if response.status_code != 200:
38
- return f"Erreur lors de la récupération du contenu: {response.status_code}"
39
 
40
- # Extraction du contenu avec BeautifulSoup
41
  soup = BeautifulSoup(response.text, 'html.parser')
42
 
43
- # Supprimer les scripts, styles et autres éléments non pertinents
44
  for element in soup(['script', 'style', 'header', 'footer', 'nav']):
45
  element.decompose()
46
 
47
- # Extraire le texte principal
48
  text = soup.get_text(separator='\n')
49
 
50
- # Nettoyer le texte (espaces multiples, lignes vides)
51
  lines = [line.strip() for line in text.split('\n') if line.strip()]
52
  cleaned_text = '\n'.join(lines)
53
 
54
- # Limiter la longueur du texte retourné
55
  max_length = 5000
56
  if len(cleaned_text) > max_length:
57
- cleaned_text = cleaned_text[:max_length] + "... (contenu tronqué)"
58
 
59
  return cleaned_text
60
 
61
  except Exception as e:
62
- return f"Erreur lors de la récupération du contenu web: {str(e)}"
63
 
64
  async def _arun(self, url: str) -> str:
65
- """Version asynchrone de l'outil"""
66
  return self._run(url)
 
3
  from typing import Optional, Type
4
  import requests
5
  from bs4 import BeautifulSoup
6
+ import wikipedia
7
+
8
+
9
+ class WikipediaSearchTool(BaseTool):
10
+ name: str = "wikipedia_search"
11
+ description: str = "Search for information on Wikipedia using a given term or subject"
12
+ args_schema: Optional[Type] = None
13
+
14
+ def _run(self, query: str) -> str:
15
+ """Synchronous Wikipedia search"""
16
+ try:
17
+ wikipedia.set_lang("en")
18
+ summary = wikipedia.summary(query, sentences=3)
19
+ return summary
20
+ except wikipedia.exceptions.DisambiguationError as e:
21
+ return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}"
22
+ except wikipedia.exceptions.PageError:
23
+ return f"No page found for '{query}'."
24
+ except Exception as e:
25
+ return f"Error during Wikipedia search: {str(e)}"
26
+
27
+ async def _arun(self, query: str) -> str:
28
+ """Asynchronous Wikipedia search (fallback to sync)"""
29
+ return self._run(query)
30
+
31
 
32
  class WebSearchTool(BaseTool):
33
  name: str = "web_search"
34
+ description: str = "Search for information on the web using a search term"
35
  args_schema: Optional[Type] = None
36
 
37
  def _run(self, query: str) -> str:
38
+ """Execute a web search and return relevant results"""
39
  try:
40
  search_tool = DuckDuckGoSearchRun()
41
  return search_tool.run(query)
42
  except Exception as e:
43
+ return f"Error during web search: {str(e)}"
44
 
45
  async def _arun(self, query: str) -> str:
46
+ """Asynchronous version of the tool"""
47
  return self._run(query)
48
 
49
  class WebContentTool(BaseTool):
50
  name: str = "fetch_web_content"
51
+ description: str = "Retrieve the content of a web page from a URL"
52
  args_schema: Optional[Type] = None
53
 
54
  def _run(self, url: str) -> str:
55
+ """Retrieve and clean web page content"""
56
  try:
57
  headers = {
58
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
60
  response = requests.get(url, headers=headers, timeout=10)
61
 
62
  if response.status_code != 200:
63
+ return f"Error retrieving content: {response.status_code}"
64
 
65
+ # Extract content with BeautifulSoup
66
  soup = BeautifulSoup(response.text, 'html.parser')
67
 
68
+ # Remove scripts, styles and other irrelevant elements
69
  for element in soup(['script', 'style', 'header', 'footer', 'nav']):
70
  element.decompose()
71
 
72
+ # Extract main text
73
  text = soup.get_text(separator='\n')
74
 
75
+ # Clean text (multiple spaces, empty lines)
76
  lines = [line.strip() for line in text.split('\n') if line.strip()]
77
  cleaned_text = '\n'.join(lines)
78
 
79
+ # Limit text length
80
  max_length = 5000
81
  if len(cleaned_text) > max_length:
82
+ cleaned_text = cleaned_text[:max_length] + "... (content truncated)"
83
 
84
  return cleaned_text
85
 
86
  except Exception as e:
87
+ return f"Error retrieving web content: {str(e)}"
88
 
89
  async def _arun(self, url: str) -> str:
90
+ """Asynchronous version of the tool"""
91
  return self._run(url)