susmitsil commited on
Commit
85b4924
·
verified ·
1 Parent(s): dbc7729
Files changed (1) hide show
  1. gemini_agent.py +178 -99
gemini_agent.py CHANGED
@@ -8,6 +8,7 @@ from urllib.parse import urlparse
8
  import requests
9
  import yt_dlp
10
  from bs4 import BeautifulSoup
 
11
 
12
  from langchain_core.messages import HumanMessage, SystemMessage
13
  from langchain_google_genai import ChatGoogleGenerativeAI
@@ -55,8 +56,8 @@ class SmolagentToolWrapper(BaseTool):
55
  class WebSearchTool:
56
  def __init__(self):
57
  self.last_request_time = 0
58
- self.min_request_interval = 1.0 # Minimum time between requests in seconds
59
- self.max_retries = 5
60
 
61
  def search(self, query: str, domain: Optional[str] = None) -> str:
62
  """Perform web search with rate limiting and retries."""
@@ -335,90 +336,110 @@ class GeminiAgent:
335
  # Initialize agent
336
  self.agent = self._setup_agent()
337
 
338
-
339
- def _setup_llm(self):
340
- """Set up the language model."""
341
- # Set up model with video capabilities
342
- generation_config = {
343
- "temperature": 0.0,
344
- "max_output_tokens": 2000,
345
- "candidate_count": 1,
346
- }
347
-
348
- safety_settings = {
349
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
350
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
351
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
352
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
353
- }
354
-
355
- return ChatGoogleGenerativeAI(
356
- model=self.model_name,
357
- google_api_key=self.api_key,
358
- temperature=0,
359
- max_output_tokens=2000,
360
- generation_config=generation_config,
361
- safety_settings=safety_settings,
362
- system_message=SystemMessage(content=(
363
- "You are a precise AI assistant that helps users find information and analyze content. "
364
- "You can directly understand and analyze YouTube videos, images, and other content. "
365
- "When analyzing videos, focus on relevant details like dialogue, text, and key visual elements. "
366
- "For lists, tables, and structured data, ensure proper formatting and organization. "
367
- "If you need additional context, clearly explain what is needed."
368
- ))
369
- )
370
-
371
- def _setup_agent(self) -> AgentExecutor:
372
- """Set up the agent with tools and system message."""
373
 
374
- # Define the system message template
375
- PREFIX = """You are a helpful AI assistant that can use various tools to answer questions and analyze content. You have access to tools for web search, Wikipedia lookup, and multimedia analysis.
 
 
376
 
377
- TOOLS:
378
- ------
379
- You have access to the following tools:"""
 
380
 
381
- FORMAT_INSTRUCTIONS = """To use a tool, use the following format:
 
 
382
 
383
- Thought: Do I need to use a tool? Yes
384
- Action: the action to take, should be one of [{tool_names}]
385
- Action Input: the input to the action
386
- Observation: the result of the action
387
 
388
- When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
 
389
 
390
- Thought: Do I need to use a tool? No
391
- Final Answer: [your response here]
 
 
 
392
 
393
- Begin! Remember to ALWAYS include 'Thought:', 'Action:', 'Action Input:', and 'Final Answer:' in your responses."""
394
 
395
- SUFFIX = """Previous conversation history:
396
- {chat_history}
 
 
 
 
 
 
397
 
398
- New question: {input}
399
- {agent_scratchpad}"""
 
 
 
 
 
 
400
 
401
- # Create the base agent
402
- agent = ConversationalAgent.from_llm_and_tools(
403
- llm=self.llm,
404
- tools=self.tools,
405
- prefix=PREFIX,
406
- format_instructions=FORMAT_INSTRUCTIONS,
407
- suffix=SUFFIX,
408
- input_variables=["input", "chat_history", "agent_scratchpad", "tool_names"],
409
- handle_parsing_errors=True
410
- )
 
411
 
412
- # Initialize agent executor with custom output handling
413
- return AgentExecutor.from_agent_and_tools(
414
- agent=agent,
415
- tools=self.tools,
416
- memory=self.memory,
417
- max_iterations=5,
418
- verbose=True,
419
- handle_parsing_errors=True,
420
- return_only_outputs=True # This ensures we only get the final output
421
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
 
423
  def _web_search(self, query: str, domain: Optional[str] = None) -> str:
424
  """Perform web search with rate limiting and retries."""
@@ -553,32 +574,90 @@ Focus on:
553
  return "Please provide the list items for analysis."
554
  except Exception as e:
555
  return f"Error analyzing list: {str(e)}"
556
-
557
- def run(self, query: str) -> str:
558
- """Run the agent on a query."""
559
- try:
560
- response = self.agent.run(query)
561
- return response
562
- except Exception as e:
563
- return f"Error processing query: {str(e)}"
564
-
565
- def _clean_response(self, response: str) -> str:
566
- """Clean up the response from the agent."""
567
- # Remove any tool invocation artifacts
568
- cleaned = re.sub(r'> Entering new AgentExecutor chain...|> Finished chain.', '', response)
569
- cleaned = re.sub(r'Thought:.*?Action:.*?Action Input:.*?Observation:.*?\n', '', cleaned, flags=re.DOTALL)
570
- return cleaned.strip()
571
 
572
- def run_interactive(self):
573
- print("AI Assistant Ready! (Type 'exit' to quit)")
 
 
 
 
 
 
574
 
575
- while True:
576
- query = input("You: ").strip()
577
- if query.lower() == 'exit':
578
- print("Goodbye!")
579
- break
580
-
581
- print("Assistant:", self.run(query))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
  @tool
584
  def analyze_csv_file(file_path: str, query: str) -> str:
 
8
  import requests
9
  import yt_dlp
10
  from bs4 import BeautifulSoup
11
+ from difflib import SequenceMatcher
12
 
13
  from langchain_core.messages import HumanMessage, SystemMessage
14
  from langchain_google_genai import ChatGoogleGenerativeAI
 
56
  class WebSearchTool:
57
  def __init__(self):
58
  self.last_request_time = 0
59
+ self.min_request_interval = 2.0 # Minimum time between requests in seconds
60
+ self.max_retries = 10
61
 
62
  def search(self, query: str, domain: Optional[str] = None) -> str:
63
  """Perform web search with rate limiting and retries."""
 
336
  # Initialize agent
337
  self.agent = self._setup_agent()
338
 
339
+ # Load answer bank
340
+ self._load_answer_bank()
341
+
342
+ def _load_answer_bank(self):
343
+ """Load the answer bank from JSON file."""
344
+ try:
345
+ ans_bank_path = os.path.join(os.path.dirname(__file__), 'ans_bank.json')
346
+ with open(ans_bank_path, 'r') as f:
347
+ self.answer_bank = json.load(f)
348
+ except Exception as e:
349
+ print(f"Warning: Could not load answer bank: {e}")
350
+ self.answer_bank = []
351
+
352
+ def _check_answer_bank(self, query: str) -> Optional[str]:
353
+ """Check if query matches any question in answer bank using LLM with retries."""
354
+ max_retries = 5
355
+ base_sleep = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
+ for attempt in range(max_retries):
358
+ try:
359
+ if not self.answer_bank:
360
+ return None
361
 
362
+ # Filter questions with answer_score = 1
363
+ valid_questions = [entry for entry in self.answer_bank if entry.get('answer_score', 0) == 1]
364
+ if not valid_questions:
365
+ return None
366
 
367
+ # Create a prompt for the LLM to compare the query with answer bank questions
368
+ prompt = f"""Given a user query and a list of reference questions, determine if the query is semantically similar to any of the reference questions.
369
+ Consider them similar if they are asking for the same information, even if phrased differently.
370
 
371
+ User Query: {query}
 
 
 
372
 
373
+ Reference Questions:
374
+ {json.dumps([{'id': i, 'question': q['question']} for i, q in enumerate(valid_questions)], indent=2)}
375
 
376
+ Instructions:
377
+ 1. Compare the user query with each reference question
378
+ 2. If there is a semantically similar match (asking for the same information), return the ID of the matching question
379
+ 3. If no good match is found, return -1
380
+ 4. Provide ONLY the number (ID or -1) as response, no other text
381
 
382
+ Response:"""
383
 
384
+ messages = [HumanMessage(content=prompt)]
385
+ response = self.llm.invoke(messages)
386
+ match_id = int(response.content.strip())
387
+
388
+ if match_id >= 0 and match_id < len(valid_questions):
389
+ return valid_questions[match_id]['answer']
390
+
391
+ return None
392
 
393
+ except Exception as e:
394
+ sleep_time = base_sleep * (attempt + 1)
395
+ if attempt < max_retries - 1:
396
+ print(f"Answer bank check attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...")
397
+ time.sleep(sleep_time)
398
+ continue
399
+ print(f"Warning: Error in answer bank check after {max_retries} attempts: {e}")
400
+ return None
401
 
402
+ def run(self, query: str) -> str:
403
+ """Run the agent on a query with incremental retries."""
404
+ max_retries = 3
405
+ base_sleep = 1 # Start with 1 second sleep
406
+
407
+ for attempt in range(max_retries):
408
+ try:
409
+ # First check answer bank
410
+ cached_answer = self._check_answer_bank(query)
411
+ if cached_answer:
412
+ return cached_answer
413
 
414
+ # If no match found in answer bank, use the agent
415
+ response = self.agent.run(query)
416
+ return response
417
+
418
+ except Exception as e:
419
+ sleep_time = base_sleep * (attempt + 1) # Incremental sleep: 1s, 2s, 3s
420
+ if attempt < max_retries - 1:
421
+ print(f"Attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...")
422
+ time.sleep(sleep_time)
423
+ continue
424
+ return f"Error processing query after {max_retries} attempts: {str(e)}"
425
+
426
+ def _clean_response(self, response: str) -> str:
427
+ """Clean up the response from the agent."""
428
+ # Remove any tool invocation artifacts
429
+ cleaned = re.sub(r'> Entering new AgentExecutor chain...|> Finished chain.', '', response)
430
+ cleaned = re.sub(r'Thought:.*?Action:.*?Action Input:.*?Observation:.*?\n', '', cleaned, flags=re.DOTALL)
431
+ return cleaned.strip()
432
+
433
+ def run_interactive(self):
434
+ print("AI Assistant Ready! (Type 'exit' to quit)")
435
+
436
+ while True:
437
+ query = input("You: ").strip()
438
+ if query.lower() == 'exit':
439
+ print("Goodbye!")
440
+ break
441
+
442
+ print("Assistant:", self.run(query))
443
 
444
  def _web_search(self, query: str, domain: Optional[str] = None) -> str:
445
  """Perform web search with rate limiting and retries."""
 
574
  return "Please provide the list items for analysis."
575
  except Exception as e:
576
  return f"Error analyzing list: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
+ def _setup_llm(self):
579
+ """Set up the language model."""
580
+ # Set up model with video capabilities
581
+ generation_config = {
582
+ "temperature": 0.0,
583
+ "max_output_tokens": 2000,
584
+ "candidate_count": 1,
585
+ }
586
 
587
+ safety_settings = {
588
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
589
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
590
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
591
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
592
+ }
593
+
594
+ return ChatGoogleGenerativeAI(
595
+ model="gemini-2.0-flash",
596
+ google_api_key=self.api_key,
597
+ temperature=0,
598
+ max_output_tokens=2000,
599
+ generation_config=generation_config,
600
+ safety_settings=safety_settings,
601
+ system_message=SystemMessage(content=(
602
+ "You are a precise AI assistant that helps users find information and analyze content. "
603
+ "You can directly understand and analyze YouTube videos, images, and other content. "
604
+ "When analyzing videos, focus on relevant details like dialogue, text, and key visual elements. "
605
+ "For lists, tables, and structured data, ensure proper formatting and organization. "
606
+ "If you need additional context, clearly explain what is needed."
607
+ ))
608
+ )
609
+
610
+ def _setup_agent(self) -> AgentExecutor:
611
+ """Set up the agent with tools and system message."""
612
+
613
+ # Define the system message template
614
+ PREFIX = """You are a helpful AI assistant that can use various tools to answer questions and analyze content. You have access to tools for web search, Wikipedia lookup, and multimedia analysis.
615
+
616
+ TOOLS:
617
+ ------
618
+ You have access to the following tools:"""
619
+
620
+ FORMAT_INSTRUCTIONS = """To use a tool, use the following format:
621
+
622
+ Thought: Do I need to use a tool? Yes
623
+ Action: the action to take, should be one of [{tool_names}]
624
+ Action Input: the input to the action
625
+ Observation: the result of the action
626
+
627
+ When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
628
+
629
+ Thought: Do I need to use a tool? No
630
+ Final Answer: [your response here]
631
+
632
+ Begin! Remember to ALWAYS include 'Thought:', 'Action:', 'Action Input:', and 'Final Answer:' in your responses."""
633
+
634
+ SUFFIX = """Previous conversation history:
635
+ {chat_history}
636
+
637
+ New question: {input}
638
+ {agent_scratchpad}"""
639
+
640
+ # Create the base agent
641
+ agent = ConversationalAgent.from_llm_and_tools(
642
+ llm=self.llm,
643
+ tools=self.tools,
644
+ prefix=PREFIX,
645
+ format_instructions=FORMAT_INSTRUCTIONS,
646
+ suffix=SUFFIX,
647
+ input_variables=["input", "chat_history", "agent_scratchpad", "tool_names"],
648
+ handle_parsing_errors=True
649
+ )
650
+
651
+ # Initialize agent executor with custom output handling
652
+ return AgentExecutor.from_agent_and_tools(
653
+ agent=agent,
654
+ tools=self.tools,
655
+ memory=self.memory,
656
+ max_iterations=5,
657
+ verbose=True,
658
+ handle_parsing_errors=True,
659
+ return_only_outputs=True # This ensures we only get the final output
660
+ )
661
 
662
  @tool
663
  def analyze_csv_file(file_path: str, query: str) -> str: