muhammadmaazuddin commited on
Commit
2b557d7
·
1 Parent(s): ac0f9ba

Score : 45

Browse files
app.py CHANGED
@@ -5,7 +5,7 @@ import inspect
5
  import pandas as pd
6
  from typing import Any
7
 
8
- from src.final_assignment_template.agent import manager_agent
9
  # (Keep Constants as is)
10
  # --- Constants ---
11
 
@@ -35,9 +35,9 @@ class BasicAgent:
35
  if task_id and file_name:
36
  print('With task_id')
37
  print(task_id)
38
- fixed_answer = manager_agent.run(f"""<Task>{question_text}</Task>\n<TaskID>{task_id}</TaskID>""")
39
  else:
40
- fixed_answer = manager_agent.run(f'<Task>{question_text}</Task>')
41
  print(f'---------------------fixed_answer----------------\n{fixed_answer}')
42
 
43
  return fixed_answer
@@ -97,11 +97,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
97
  answers_payload = []
98
  print(f"Running agent on {len(questions_data)} questions...")
99
 
100
- for item in questions_data:
101
  task_id = item.get("task_id")
102
  question_text = item.get("question")
103
- file_name = item.get("file_name")
104
- file_data = None
105
  # or file_name != ''
106
  if not task_id or question_text is None:
107
  print(f"Skipping item with missing task_id or question: {item}")
 
5
  import pandas as pd
6
  from typing import Any
7
 
8
+ from src.final_assignment_template.agent import Task_agent
9
  # (Keep Constants as is)
10
  # --- Constants ---
11
 
 
35
  if task_id and file_name:
36
  print('With task_id')
37
  print(task_id)
38
+ fixed_answer = Task_agent.run(f"""<Task>{question_text}</Task>\n<TaskID>{task_id}</TaskID>""")
39
  else:
40
+ fixed_answer = Task_agent.run(f'<Task>{question_text}</Task>')
41
  print(f'---------------------fixed_answer----------------\n{fixed_answer}')
42
 
43
  return fixed_answer
 
97
  answers_payload = []
98
  print(f"Running agent on {len(questions_data)} questions...")
99
 
100
+ for item in questions_data[0:20]:
101
  task_id = item.get("task_id")
102
  question_text = item.get("question")
 
 
103
  # or file_name != ''
104
  if not task_id or question_text is None:
105
  print(f"Skipping item with missing task_id or question: {item}")
src/final_assignment_template/__pycache__/agent.cpython-311.pyc CHANGED
Binary files a/src/final_assignment_template/__pycache__/agent.cpython-311.pyc and b/src/final_assignment_template/__pycache__/agent.cpython-311.pyc differ
 
src/final_assignment_template/__pycache__/models.cpython-311.pyc CHANGED
Binary files a/src/final_assignment_template/__pycache__/models.cpython-311.pyc and b/src/final_assignment_template/__pycache__/models.cpython-311.pyc differ
 
src/final_assignment_template/__pycache__/tools.cpython-311.pyc CHANGED
Binary files a/src/final_assignment_template/__pycache__/tools.cpython-311.pyc and b/src/final_assignment_template/__pycache__/tools.cpython-311.pyc differ
 
src/final_assignment_template/agent.py CHANGED
@@ -1,55 +1,102 @@
1
- from smolagents import load_tool, Tool, tool, ToolCallingAgent, CodeAgent, GoogleSearchTool,FinalAnswerTool,PythonInterpreterTool , LiteLLMModel, VisitWebpageTool, DuckDuckGoSearchTool
2
- from litellm import completion
3
 
4
- from langchain.agents import load_tools
5
- from langchain_community.tools.tavily_search import TavilySearchResults
6
 
7
- import os
8
  from src.final_assignment_template.models import openrouter_qwenCoder_model, modelLiteLLm
9
- from src.final_assignment_template.tools import travily_tool, Video_understanding_tool, image_understanding_tool, get_task_file
10
  # (Keep Constants as is)
11
  # --- Constants ---
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- web_agent = CodeAgent(
19
- model=openrouter_qwenCoder_model,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  tools=[
21
- # GoogleSearchTool(provider="serper"),
22
- # DuckDuckGoSearchTool(max_results=10),
 
 
23
  travily_tool,
 
 
24
  VisitWebpageTool(),
 
 
25
  ],
26
- name="web_agent",
27
- description="""Browses the web to find information""",
28
- verbosity_level=1,
29
- max_steps=5,
30
- )
31
-
32
- manager_agent = CodeAgent(
33
- name="Task_Agent",
34
- description="""You will be provided a task and you need to verify before giving final answer
35
- You can perform tasks which are text and image based, skip all other
36
- """,
37
- model=modelLiteLLm,
38
- tools=[PythonInterpreterTool(),Video_understanding_tool,image_understanding_tool,get_task_file],
39
- managed_agents=[web_agent],
40
  additional_authorized_imports=[
41
- "json",
42
- "pandas",
43
- "numpy",
44
- "markdown"
45
- 'math', 'statistics', 're', 'unicodedata', 'random',
46
- 'datetime', 'queue', 'time', 'collections', 'stat', 'itertools',
47
- 'PIL','requests'
48
  ],
49
- planning_interval=3,
 
50
  verbosity_level=1,
 
51
  # final_answer_checks=[check_reasoning_and_plot],
52
- max_steps=5,
53
  )
54
 
55
 
 
 
 
1
+ from smolagents import CodeAgent,ToolCallingAgent, PythonInterpreterTool , VisitWebpageTool, DuckDuckGoSearchTool
 
2
 
 
 
3
 
 
4
  from src.final_assignment_template.models import openrouter_qwenCoder_model, modelLiteLLm
5
+ from src.final_assignment_template.tools import travily_tool, bm25_query, BM25Tool,extract_filter_textual_info_from_textual_context, summarize_before_final_answer, Video_link_understanding_tool, image_understanding_tool, get_task_file
6
  # (Keep Constants as is)
7
  # --- Constants ---
8
 
9
 
10
+ # retrived_context_qa_agent = ToolCallingAgent(
11
+ # name="retrived_context_qa_agent",
12
+ # description="""
13
+ # You are a simple QA agent for the retrived web contect.
14
+ # 1. Pass query and context and avaialbe tools.
15
+ # 2. If you can answer directly, respond in plain text.
16
+ # 3. Otherwise, return an explicit action JSON, e.g.
17
+ # {"action": "use_tool", "tool_name": "...", "input": "..."}.
18
+ # """,
19
+ # model=modelLiteLLm,
20
+ # tools=[], # no extra tools by default
21
+ # add_base_tools=False, # don’t add PythonInterpreterTool, etc.
22
+ # verbosity_level=1,
23
+ # planning_interval=1,
24
+ # )
25
 
26
 
27
 
28
+ # web_agent = CodeAgent(
29
+ # model=openrouter_qwenCoder_model,
30
+ # tools=[
31
+ # # GoogleSearchTool(provider="serper"),
32
+ # # DuckDuckGoSearchTool(max_results=10),
33
+ # travily_tool,
34
+ # VisitWebpageTool(),
35
+ # ],
36
+ # name="web_agent",
37
+ # description="""Browses the web to find information""",
38
+ # verbosity_level=1,
39
+ # planning_interval=1,
40
+ # max_steps=8,
41
+ # )
42
 
43
+ # code_agent = CodeAgent(
44
+ # model=openrouter_qwenCoder_model,
45
+ # tools=[
46
+ # # GoogleSearchTool(provider="serper"),
47
+ # # DuckDuckGoSearchTool(max_results=10),
48
+ # PythonInterpreterTool(additional_authorized_imports=[
49
+ # "json",
50
+ # "markdown",
51
+ # 'numpy',
52
+ # 'pandas'
53
+ # 'math', 'statistics', 're', 'unicodedata', 'random',
54
+ # 'datetime', 'queue', 'time', 'collections', 'stat', 'itertools',
55
+ # ])
56
+ # ],
57
+ # name="code_agent",
58
+ # description="""You can execute python code using this agent""",
59
+ # verbosity_level=1,
60
+ # max_steps=3,
61
+ # )
62
+
63
+ # - When using the Video_Link_Understanding_Tool and Image_Understanding_Tool, consider their responses and generate an answer based on the textual understanding they provide.
64
+ # - Video_Link_Understanding_Tool: This tool can only return textual understanding.
65
+ # - Image_Understanding_Tool: This tool can only return textual understanding.
66
+ Task_agent = CodeAgent(
67
+ name="task_Agent",
68
+ description="""
69
+ - You are the Task Agent.
70
+ - Provide the correct answer
71
+ - Must call 'summarize_before_final_answer' at the end
72
+ """,
73
+ model=modelLiteLLm,
74
+ add_base_tools=True,
75
  tools=[
76
+ PythonInterpreterTool(),
77
+ Video_link_understanding_tool,
78
+ image_understanding_tool,
79
+ get_task_file,
80
  travily_tool,
81
+ # DuckDuckGoSearchTool(),
82
+ # bm25_query,
83
  VisitWebpageTool(),
84
+ extract_filter_textual_info_from_textual_context,
85
+ # summarize_before_final_answer,
86
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  additional_authorized_imports=[
88
+ 'numpy',
89
+ 'pandas'
90
+ 'math',
91
+ 'datetime',
 
 
 
92
  ],
93
+ # managed_agents=[web_agent],
94
+ planning_interval=1,
95
  verbosity_level=1,
96
+ max_steps=7,
97
  # final_answer_checks=[check_reasoning_and_plot],
 
98
  )
99
 
100
 
101
+
102
+
src/final_assignment_template/models.py CHANGED
@@ -2,14 +2,24 @@ from smolagents import LiteLLMModel
2
  import os
3
 
4
 
 
 
 
 
 
 
 
 
5
  openrouter_qwenCoder_model = LiteLLMModel(
6
  model_id="openrouter/qwen/qwen-2.5-coder-32b-instruct:free",
7
  api_base="https://openrouter.ai/api/v1",
8
  api_key=os.getenv("OPENROUTER_API_KEY")
9
  )
10
 
 
 
11
  modelLiteLLm = LiteLLMModel(
12
- model_id="openrouter/deepseek/deepseek-r1:free",
13
  api_base="https://openrouter.ai/api/v1",
14
  api_key=os.getenv("OPENROUTER_API_KEY")
15
  )
@@ -27,3 +37,10 @@ imageLiteLLm = LiteLLMModel(
27
  api_base="https://openrouter.ai/api/v1",
28
  api_key=os.getenv("OPENROUTER_API_KEY")
29
  )
 
 
 
 
 
 
 
 
2
  import os
3
 
4
 
5
+ planner_model = LiteLLMModel(
6
+ # model_id="openrouter/openai/o4-mini-high",
7
+ model_id="openrouter/deepseek/deepseek-r1:free",
8
+ api_base="https://openrouter.ai/api/v1",
9
+ api_key=os.getenv("OPENROUTER_API_KEY")
10
+ )
11
+
12
+
13
  openrouter_qwenCoder_model = LiteLLMModel(
14
  model_id="openrouter/qwen/qwen-2.5-coder-32b-instruct:free",
15
  api_base="https://openrouter.ai/api/v1",
16
  api_key=os.getenv("OPENROUTER_API_KEY")
17
  )
18
 
19
+ # nvidia/llama-3.3-nemotron-super-49b-v1:free
20
+ # microsoft/mai-ds-r1:free
21
  modelLiteLLm = LiteLLMModel(
22
+ model_id="openrouter/microsoft/mai-ds-r1:free",
23
  api_base="https://openrouter.ai/api/v1",
24
  api_key=os.getenv("OPENROUTER_API_KEY")
25
  )
 
37
  api_base="https://openrouter.ai/api/v1",
38
  api_key=os.getenv("OPENROUTER_API_KEY")
39
  )
40
+
41
+
42
+ summarizeModle = LiteLLMModel(
43
+ model_id="openrouter/meta-llama/llama-4-maverick:free",
44
+ api_base="https://openrouter.ai/api/v1",
45
+ api_key=os.getenv("OPENROUTER_API_KEY")
46
+ )
src/final_assignment_template/tools.py CHANGED
@@ -10,11 +10,13 @@ from io import BytesIO
10
  import base64
11
 
12
 
13
- from src.final_assignment_template.models import videoLiteLLm, imageLiteLLm
 
 
14
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
- travily_tool = Tool.from_langchain(TavilySearchResults(max_results=25,))
18
 
19
  from smolagents import Tool
20
 
@@ -40,18 +42,123 @@ from smolagents import Tool
40
  # model_downloads_tool = HFModelDownloadsTool()
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  @tool
44
- def Video_understanding_tool(query:str)->str:
45
- """
46
- This tool for understanding or finding something in the video link.
 
 
 
47
 
48
  Args:
49
- query: link with your query.
50
- """
51
- print("processcing vidoe ",query)
52
- messages =[ {"role": "user", "content": [{"type": "text", "text": query}]} ]
53
- resp = videoLiteLLm(messages)
54
- return resp.content or 'No data'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
 
@@ -74,28 +181,31 @@ def get_task_file(task_id:str)->requests.models.Response:
74
  return response
75
 
76
  @tool
77
- def image_understanding_tool(query:str,response:requests.models.Response)->str:
78
- """
79
- This tool for understanding or perform any query on the image.
80
- Provide the image base64 image data
81
-
 
82
 
83
  Args:
84
- query: Query for the image.
85
- response : The return value from the get_task_file which returns the response.
86
- """
87
- print("processcing image ")
88
-
89
-
90
- image = Image.open(BytesIO(response.content)).convert("RGB")
91
-
92
- buffered = BytesIO()
93
- image.save(buffered, format="PNG") # change format if necessary
94
- img_bytes = buffered.getvalue()
95
- img_b64 = base64.b64encode(img_bytes).decode('utf-8')
96
-
97
- print(img_b64)
98
- messages =[ {
 
 
99
  "role": "user",
100
  "content": [
101
  {"type": "text", "text": query},
@@ -103,13 +213,49 @@ def image_understanding_tool(query:str,response:requests.models.Response)->str:
103
  "type": "image_url",
104
  "image_url": {
105
  "url": img_b64,
106
- "format": "image/png" # Adjust MIME type if necessary
107
  }
108
  }
109
  ]
110
- } ]
111
- resp = imageLiteLLm(messages)
112
- print(resp.content)
113
- return resp.content or 'No data'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
 
 
 
 
 
 
 
 
10
  import base64
11
 
12
 
13
+ from langchain_core.documents import Document
14
+ from langchain_community.retrievers import BM25Retriever
15
+ from src.final_assignment_template.models import videoLiteLLm,modelLiteLLm, summarizeModle, imageLiteLLm
16
 
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ travily_tool = Tool.from_langchain(TavilySearchResults(max_results=20))
20
 
21
  from smolagents import Tool
22
 
 
42
  # model_downloads_tool = HFModelDownloadsTool()
43
 
44
 
45
+ from langchain_core.documents import Document
46
+ from langchain_community.retrievers import BM25Retriever
47
+
48
+ @tool
49
+ def bm25_query(texts: list[str], query: str, top_k: int = 3) -> list[str]:
50
+ """
51
+ Creates a BM25 retriever from a list of texts (e.g., web pages, articles),
52
+ queries it, and returns the top relevant results.
53
+
54
+ Args:
55
+ texts (list[str]): List of text contents (e.g., web page texts, articles, notes).
56
+ query (str): The search query string.
57
+ top_k (int): Number of top results to return (default is 3).
58
+
59
+ Returns:
60
+ list[str]: List of top-k relevant page contents.
61
+ """
62
+ documents = [Document(page_content=text) for text in texts]
63
+ retriever = BM25Retriever.from_documents(documents)
64
+ results = retriever.get_relevant_documents(query)
65
+ print(results)
66
+ return [doc.page_content for doc in results[:top_k]]
67
+
68
+
69
+
70
+ class BM25Tool(Tool):
71
+ name = "bm25"
72
+ description = (
73
+ "Retrieves relevant information from a provided list of text strings "
74
+ "based on a query using BM25."
75
+ )
76
+ inputs = {
77
+ "query": {
78
+ "type": "string",
79
+ "description": "The text query to search for relevant strings."
80
+ }
81
+ }
82
+ output_type = "string"
83
+
84
+ def __init__(self, texts: list[str]):
85
+ """
86
+ Args:
87
+ texts (list[str]): A list of text strings to index (e.g., guest bios, docs, notes).
88
+ """
89
+ documents = [Document(page_content=text) for text in texts]
90
+ self.retriever = BM25Retriever.from_documents(documents)
91
+
92
+ def forward(self, query: str) -> str:
93
+ """
94
+ Retrieves the top-3 most relevant strings matching the query.
95
+
96
+ Args:
97
+ query (str): Text query.
98
+
99
+ Returns:
100
+ str: Concatenated top-3 matching strings or a not-found message.
101
+ """
102
+ results = self.retriever.get_relevant_documents(query)
103
+ if not results:
104
+ return "No relevant information found."
105
+ top_texts = [doc.page_content for doc in results[:3]]
106
+ return "\n\n".join(top_texts)
107
+
108
+
109
+
110
  @tool
111
+ def summarize_before_final_answer(
112
+ context: str,
113
+ question: str,
114
+ ) -> str:
115
+ """
116
+ Given a whole context(all logs) and question sends it to the LLM, and returns the paragraph overview for the answer.
117
 
118
  Args:
119
+ context (str): The full context or background information.
120
+ question (str): The user's specific question about that context.
121
+
122
+ Returns:
123
+ str: Summarization of whole process for generating final answer.
124
+ """
125
+ # build a single user prompt
126
+ prompt = (
127
+ context.strip()
128
+ + "\n\n"
129
+ + "Question: "
130
+ + question.strip()
131
+ + "\n\n"
132
+ + "Give the summarize of all steps for generating final answer in next step:"
133
+ )
134
+
135
+
136
+ # call the model
137
+ response = summarizeModle(
138
+ messages=[{"role": "user", "content": prompt}],
139
+ )
140
+
141
+ # the .content attribute holds the generated text
142
+ return response.content.strip()
143
+
144
+
145
+
146
+ @tool
147
+ def Video_link_understanding_tool(query: str) -> str:
148
+ """
149
+ A tool that processes a video link (e.g., YouTube) and returns a textual understanding of its content using an LLM.
150
+
151
+ Args:
152
+ query: A video URL along with an optional query for context or specific focus.
153
+
154
+ Returns:
155
+ A text-based summary or understanding of the video content.
156
+ """
157
+ print("Processing video:", query)
158
+ messages = [{"role": "user", "content": [{"type": "text", "text": query}]}]
159
+ resp = videoLiteLLm(messages)
160
+ return resp.content or 'No data'
161
+
162
 
163
 
164
 
 
181
  return response
182
 
183
  @tool
184
+ def image_understanding_tool(query: str, response: requests.models.Response) -> str:
185
+ """
186
+ A tool for analyzing and understanding the content of an image based on a given query.
187
+
188
+ This tool processes the image provided in the response (from get_task_file), encodes it into base64,
189
+ and queries a lightweight image LLM to generate insights or answers about the image.
190
 
191
  Args:
192
+ query: The query or instruction related to the image content.
193
+ response: The HTTP response object containing the image data.
194
+
195
+ Returns:
196
+ A text-based understanding or interpretation of the image.
197
+ """
198
+ print("Processing image...")
199
+
200
+ image = Image.open(BytesIO(response.content)).convert("RGB")
201
+
202
+ buffered = BytesIO()
203
+ image.save(buffered, format="PNG")
204
+ img_bytes = buffered.getvalue()
205
+ img_b64 = base64.b64encode(img_bytes).decode('utf-8')
206
+
207
+ # print(img_b64)
208
+ messages = [{
209
  "role": "user",
210
  "content": [
211
  {"type": "text", "text": query},
 
213
  "type": "image_url",
214
  "image_url": {
215
  "url": img_b64,
216
+ "format": "image/png"
217
  }
218
  }
219
  ]
220
+ }]
221
+
222
+ resp = imageLiteLLm(messages)
223
+ print(resp.content)
224
+ return resp.content or 'No data'
225
+
226
+
227
+
228
+
229
+
230
+ @tool
231
+ def extract_filter_textual_info_from_textual_context(
232
+ context: str,
233
+ question: str,
234
+ ) -> str:
235
+ """
236
+ Tool to pull out targeted details from a large body of text.
237
+
238
+ Combines the context and an questoin into a single prompt,
239
+ queries the llm, and returns the resulting extract.
240
+
241
+ Args:
242
+ context (str): The full background text (e.g., long document, webpage, notes).
243
+ question (str): What you want to extract (e.g., “list all dates mentioned”).
244
+
245
+ Returns:
246
+ str: The extracted information, trimmed of whitespace.
247
+ """
248
+ # Build the extraction prompt
249
+ prompt = (
250
+ "Context:\n" + context.strip() +
251
+ "\n\nQuestion: " + question.strip() +
252
+ "\n\nExtracted Information:"
253
+ )
254
 
255
 
256
+ # Call the model to perform extraction
257
+ response = modelLiteLLm(
258
+ messages=[{"role": "user", "content": prompt}],
259
+ )
260
+ print(response)
261
+ return response.content