qResearch

Sleeping

App Files Files Community

Quazim0t0 commited on Feb 18

Commit

eb339cc

verified ·

1 Parent(s): 15e06d4

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -436

app.py CHANGED Viewed

@@ -1,3 +1,9 @@
 import argparse
 import json
 import os
@@ -34,480 +40,111 @@ from tqdm import tqdm
 from smolagents import (
     CodeAgent,
     HfApiModel,
-    LiteLLMModel,
-    Model,
     ToolCallingAgent,
 )
 from smolagents.agent_types import AgentText, AgentImage, AgentAudio
 from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
 from smolagents import Tool
-class GoogleSearchTool(Tool):
-    name = "web_search"
-    description = """Performs a google web search for your query then returns a string of the top search results."""
-    inputs = {
-        "query": {"type": "string", "description": "The search query to perform."},
-        "filter_year": {
-            "type": "integer",
-            "description": "Optionally restrict results to a certain year",
-            "nullable": True,
-        },
-    }
-    output_type = "string"
-    def __init__(self):
-        super().__init__(self)
-        import os
-        self.serpapi_key = os.getenv("SERPER_API_KEY")
-    def forward(self, query: str, filter_year: Optional[int] = None) -> str:
-        import requests
-        if self.serpapi_key is None:
-            raise ValueError("Missing SerpAPI key. Make sure you have 'SERPER_API_KEY' in your env variables.")
-        params = {
-            "engine": "google",
-            "q": query,
-            "api_key": self.serpapi_key,
-            "google_domain": "google.com",
-        }
-        headers = {
-        'X-API-KEY': self.serpapi_key,
-        'Content-Type': 'application/json'
-        }
-        if filter_year is not None:
-            params["tbs"] = f"cdr:1,cd_min:01/01/{filter_year},cd_max:12/31/{filter_year}"
-        response = requests.request("POST", "https://google.serper.dev/search", headers=headers, data=json.dumps(params))
-        if response.status_code == 200:
-            results = response.json()
-        else:
-            raise ValueError(response.json())
-        if "organic" not in results.keys():
-            print("REZZZ", results.keys())
-            if filter_year is not None:
-                raise Exception(
-                    f"No results found for query: '{query}' with filtering on year={filter_year}. Use a less restrictive query or do not filter on year."
-                )
-            else:
-                raise Exception(f"No results found for query: '{query}'. Use a less restrictive query.")
-        if len(results["organic"]) == 0:
-            year_filter_message = f" with filter year={filter_year}" if filter_year is not None else ""
-            return f"No results found for '{query}'{year_filter_message}. Try with a more general query, or remove the year filter."
-        web_snippets = []
-        if "organic" in results:
-            for idx, page in enumerate(results["organic"]):
-                date_published = ""
-                if "date" in page:
-                    date_published = "\nDate published: " + page["date"]
-                source = ""
-                if "source" in page:
-                    source = "\nSource: " + page["source"]
-                snippet = ""
-                if "snippet" in page:
-                    snippet = "\n" + page["snippet"]
-                redacted_version = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{snippet}"
-                redacted_version = redacted_version.replace("Your browser can't play this video.", "")
-                web_snippets.append(redacted_version)
-        return "## Search Results\n" + "\n\n".join(web_snippets)
-# web_search = GoogleSearchTool()
-# print(web_search(query="Donald Trump news"))
-# quit()
-AUTHORIZED_IMPORTS = [
-    "requests",
-    "zipfile",
-    "os",
-    "pandas",
-    "numpy",
-    "sympy",
-    "json",
-    "bs4",
-    "pubchempy",
-    "xml",
-    "yahoo_finance",
-    "Bio",
-    "sklearn",
-    "scipy",
-    "pydub",
-    "io",
-    "PIL",
-    "chess",
-    "PyPDF2",
-    "pptx",
-    "torch",
-    "datetime",
-    "fractions",
-    "csv",
-]
 load_dotenv(override=True)
 login(os.getenv("HF_TOKEN"))
-append_answer_lock = threading.Lock()
 custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
-user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
-BROWSER_CONFIG = {
-    "viewport_size": 1024 * 5,
-    "downloads_folder": "downloads_folder",
-    "request_kwargs": {
-        "headers": {"User-Agent": user_agent},
-        "timeout": 300,
-    },
-    "serpapi_key": os.getenv("SERPAPI_API_KEY"),
-}
-os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
-model = LiteLLMModel(
-    "gpt-4o",
-    custom_role_conversions=custom_role_conversions,
-    api_key=os.getenv("OPENAI_API_KEY")
 )
-text_limit = 20000
-ti_tool = TextInspectorTool(model, text_limit)
-browser = SimpleTextBrowser(**BROWSER_CONFIG)
-WEB_TOOLS = [
-    GoogleSearchTool(),
-    VisitTool(browser),
-    PageUpTool(browser),
-    PageDownTool(browser),
-    FinderTool(browser),
-    FindNextTool(browser),
-    ArchiveSearchTool(browser),
-    TextInspectorTool(model, text_limit),
-]
-# Agent creation in a factory function
-def create_agent():
-    """Creates a fresh agent instance for each session"""
-    return CodeAgent(
-        model=model,
-        tools=[visualizer] + WEB_TOOLS,
-        max_steps=10,
-        verbosity_level=1,
-        additional_authorized_imports=AUTHORIZED_IMPORTS,
-        planning_interval=4,
-    )
-document_inspection_tool = TextInspectorTool(model, 20000)
-def stream_to_gradio(
-    agent,
-    task: str,
-    reset_agent_memory: bool = False,
-    additional_args: Optional[dict] = None,
-):
-    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
-    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
-        for message in pull_messages_from_step(
-            step_log,
-        ):
-            yield message
-    final_answer = step_log  # Last log is the run's final_answer
-    final_answer = handle_agent_output_types(final_answer)
-    if isinstance(final_answer, AgentText):
-        yield gr.ChatMessage(
-            role="assistant",
-            content=f"**Final answer:**\n{final_answer.to_string()}\n",
-        )
-    elif isinstance(final_answer, AgentImage):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "image/png"},
-        )
-    elif isinstance(final_answer, AgentAudio):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
-        )
-    else:
-        yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
-class GradioUI:
-    """A one-line interface to launch your agent in Gradio"""
-    def __init__(self, file_upload_folder: str | None = None):
-        self.file_upload_folder = file_upload_folder
-        if self.file_upload_folder is not None:
-            if not os.path.exists(file_upload_folder):
-                os.mkdir(file_upload_folder)
-    def interact_with_agent(self, prompt, messages, session_state):
-        # Get or create session-specific agent
-        if 'agent' not in session_state:
-            session_state['agent'] = create_agent()
-        # Adding monitoring
         try:
-            # log the existence of agent memory
-            has_memory = hasattr(session_state['agent'], 'memory')
-            print(f"Agent has memory: {has_memory}")
-            if has_memory:
-                print(f"Memory type: {type(session_state['agent'].memory)}")
             messages.append(gr.ChatMessage(role="user", content=prompt))
             yield messages
             for msg in stream_to_gradio(session_state['agent'], task=prompt, reset_agent_memory=False):
-                messages.append(msg)
                 yield messages
             yield messages
         except Exception as e:
             print(f"Error in interaction: {str(e)}")
             raise
-    def upload_file(
-        self,
-        file,
-        file_uploads_log,
-        allowed_file_types=[
-            "application/pdf",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            "text/plain",
-        ],
-    ):
-        """
-        Handle file uploads, default allowed types are .pdf, .docx, and .txt
-        """
-        if file is None:
-            return gr.Textbox("No file uploaded", visible=True), file_uploads_log
-        try:
-            mime_type, _ = mimetypes.guess_type(file.name)
-        except Exception as e:
-            return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
-        if mime_type not in allowed_file_types:
-            return gr.Textbox("File type disallowed", visible=True), file_uploads_log
-        # Sanitize file name
-        original_name = os.path.basename(file.name)
-        sanitized_name = re.sub(
-            r"[^\w\-.]", "_", original_name
-        )  # Replace any non-alphanumeric, non-dash, or non-dot characters with underscores
-        type_to_ext = {}
-        for ext, t in mimetypes.types_map.items():
-            if t not in type_to_ext:
-                type_to_ext[t] = ext
-        # Ensure the extension correlates to the mime type
-        sanitized_name = sanitized_name.split(".")[:-1]
-        sanitized_name.append("" + type_to_ext[mime_type])
-        sanitized_name = "".join(sanitized_name)
-        # Save the uploaded file to the specified folder
-        file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
-        shutil.copy(file.name, file_path)
-        return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
-    def log_user_message(self, text_input, file_uploads_log):
-        return (
-            text_input
-            + (
-                f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"
-                if len(file_uploads_log) > 0
-                else ""
-            ),
-            gr.Textbox(value="", interactive=False, placeholder="Please wait while Steps are getting populated"),
-            gr.Button(interactive=False)
-        )
-    def detect_device(self, request: gr.Request):
-        # Check whether the user device is a mobile or a computer
-        if not request:
-            return "Unknown device"
-        # Method 1: Check sec-ch-ua-mobile header
-        is_mobile_header = request.headers.get('sec-ch-ua-mobile')
-        if is_mobile_header:
-            return "Mobile" if '?1' in is_mobile_header else "Desktop"
-        # Method 2: Check user-agent string
-        user_agent = request.headers.get('user-agent', '').lower()
-        mobile_keywords = ['android', 'iphone', 'ipad', 'mobile', 'phone']
-        if any(keyword in user_agent for keyword in mobile_keywords):
-            return "Mobile"
-        # Method 3: Check platform
-        platform = request.headers.get('sec-ch-ua-platform', '').lower()
-        if platform:
-            if platform in ['"android"', '"ios"']:
-                return "Mobile"
-            elif platform in ['"windows"', '"macos"', '"linux"']:
-                return "Desktop"
-        # Default case if no clear indicators
-        return "Desktop"
-    def launch(self, **kwargs):
         with gr.Blocks(theme="ocean", fill_height=True) as demo:
-            # Different layouts for mobile and computer devices
             @gr.render()
             def layout(request: gr.Request):
                 device = self.detect_device(request)
-                print(f"device - {device}")
-                # Render layout with sidebar
                 if device == "Desktop":
-                    with gr.Blocks(fill_height=True,) as sidebar_demo:
                         with gr.Sidebar():
-                            gr.Markdown("""# open Deep Research - free the AI agents!
-                OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), a very nice assistant that can perform deep searches on the web to answer user questions.
-                However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
-                You can try a simplified version here.<br><br>""")
-                            with gr.Group():
-                                gr.Markdown("**Your request**", container=True)
-                                text_input = gr.Textbox(lines=3, label="Your request", container=False, placeholder="Enter your prompt here and press Shift+Enter or press the button")
-                                launch_research_btn = gr.Button("Run", variant="primary")
-                            # If an upload folder is provided, enable the upload feature
-                            if self.file_upload_folder is not None:
-                                upload_file = gr.File(label="Upload a file")
-                                upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
-                                upload_file.change(
-                                    self.upload_file,
-                                    [upload_file, file_uploads_log],
-                                    [upload_status, file_uploads_log],
-                                )
-                            gr.HTML("<br><br><h4><center>Powered by:</center></h4>")
-                            with gr.Row():
-                                gr.HTML("""<div style="display: flex; align-items: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
-                        <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png" style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
-                        <a href="https://github.com/huggingface/smolagents"><b>huggingface/smolagents</b></a>
-                        </div>""")
-                        # Add session state to store session-specific data
-                        session_state = gr.State({})  # Initialize empty state for each session
-                        stored_messages = gr.State([])
-                        file_uploads_log = gr.State([])
-                        chatbot = gr.Chatbot(
-                            label="open-Deep-Research",
-                            type="messages",
-                            avatar_images=(
-                                None,
-                                "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
-                            ),
-                            resizeable=False,
-                            scale=1,
-                            elem_id="my-chatbot"
-                        )
-                        text_input.submit(
-                            self.log_user_message,
-                            [text_input, file_uploads_log],
-                            [stored_messages, text_input, launch_research_btn],
-                        ).then(self.interact_with_agent,
-                            # Include session_state in function calls
-                            [stored_messages, chatbot, session_state],
-                            [chatbot]
-                        ).then(lambda : (gr.Textbox(interactive=True, placeholder="Enter your prompt here and press the button"), gr.Button(interactive=True)),
-                              None,
-                              [text_input, launch_research_btn])
-                        launch_research_btn.click(
-                            self.log_user_message,
-                            [text_input, file_uploads_log],
-                            [stored_messages, text_input, launch_research_btn],
-                        ).then(self.interact_with_agent,
-                            # Include session_state in function calls
-                            [stored_messages, chatbot, session_state],
-                            [chatbot]
-                        ).then(lambda : (gr.Textbox(interactive=True, placeholder="Enter your prompt here and press the button"), gr.Button(interactive=True)),
-                              None,
-                              [text_input, launch_research_btn])
-                # Render simple layout
                 else:
-                    with gr.Blocks(fill_height=True,) as simple_demo:
-                        gr.Markdown("""# open Deep Research - free the AI agents!
-            _Built with [smolagents](https://github.com/huggingface/smolagents)_
-            OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), a very nice assistant that can perform deep searches on the web to answer user questions.
-            However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
-            You can try a simplified version below. 👇""")
-                        # Add session state to store session-specific data
-                        session_state = gr.State({})  # Initialize empty state for each session
-                        stored_messages = gr.State([])
-                        file_uploads_log = gr.State([])
-                        chatbot = gr.Chatbot(
-                            label="open-Deep-Research",
-                            type="messages",
-                            avatar_images=(
-                                None,
-                                "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
-                            ),
-                            resizeable=True,
-                            scale=1,
-                        )
-                        # If an upload folder is provided, enable the upload feature
-                        if self.file_upload_folder is not None:
-                            upload_file = gr.File(label="Upload a file")
-                            upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
-                            upload_file.change(
-                                self.upload_file,
-                                [upload_file, file_uploads_log],
-                                [upload_status, file_uploads_log],
-                            )
-                        text_input = gr.Textbox(lines=1, label="Your request", placeholder="Enter your prompt here and press the button")
-                        launch_research_btn = gr.Button("Run", variant="primary",)
-                        text_input.submit(
-                            self.log_user_message,
-                            [text_input, file_uploads_log],
-                            [stored_messages, text_input, launch_research_btn],
-                        ).then(self.interact_with_agent,
-                            # Include session_state in function calls
-                            [stored_messages, chatbot, session_state],
-                            [chatbot]
-                        ).then(lambda : (gr.Textbox(interactive=True, placeholder="Enter your prompt here and press the button"), gr.Button(interactive=True)),
-                              None,
-                              [text_input, launch_research_btn])
-                        launch_research_btn.click(
-                            self.log_user_message,
-                            [text_input, file_uploads_log],
-                            [stored_messages, text_input, launch_research_btn],
-                        ).then(self.interact_with_agent,
-                            # Include session_state in function calls
-                            [stored_messages, chatbot, session_state],
-                            [chatbot]
-                        ).then(lambda : (gr.Textbox(interactive=True, placeholder="Enter your prompt here and press the button"), gr.Button(interactive=True)),
-                              None,
-                              [text_input, launch_research_btn])
-        demo.launch(debug=True, **kwargs)
-GradioUI().launch()

+"""qResearch: Advanced AI Research Assistant
+Modified implementation of deep research capabilities using Qwen2.5-Coder
+and DuckDuckGo search integration.
+"""
 import argparse
 import json
 import os
 from smolagents import (
     CodeAgent,
     HfApiModel,
     ToolCallingAgent,
 )
 from smolagents.agent_types import AgentText, AgentImage, AgentAudio
 from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
 from smolagents import Tool
 load_dotenv(override=True)
 login(os.getenv("HF_TOKEN"))
+# Custom role conversions for model interaction
 custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
+# Initialize Qwen2.5-Coder-32B-Instruct model
+model = HfApiModel(
+    model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    custom_role_conversions=custom_role_conversions
 )
+class DuckDuckGoSearchTool(Tool):
+    """Web search tool using DuckDuckGo's search API"""
+    name = "web_search"
+    description = "Performs web searches using DuckDuckGo's search engine"
+    inputs = {
+        "query": {"type": "string", "description": "Search query text"},
+        "max_results": {"type": "integer", "description": "Number of results to return", "default": 5}
+    }
+    output_type = "string"
+    def forward(self, query: str, max_results: int = 5) -> str:
+        from duckduckgo_search import DDGS
+        with DDGS() as ddgs:
+            results = [r for r in ddgs.text(query, max_results=max_results)]
+        search_results = []
+        for idx, result in enumerate(results):
+            search_results.append(
+                f"{idx+1}. [{result['title']}]({result['href']})\n{result['body']}"
+            )
+        return "## Search Results\n" + "\n\n".join(search_results)
+# Initialize research agent with DuckDuckGo tool
+agent = CodeAgent(
+    tools=[DuckDuckGoSearchTool()],
+    model=model
+)
+class ResearchInterface:
+    def __init__(self):
+        self.file_upload_folder = "uploaded_files"
+        os.makedirs(self.file_upload_folder, exist_ok=True)
+    def detect_device(self, request: gr.Request):
+        user_agent = request.headers.get("user-agent", "").lower()
+        return "Mobile" if ("mobile" in user_agent) else "Desktop"
+    def format_mla_response(self, content: str) -> str:
+        """Formats agent responses in MLA style"""
+        return f"{content}\n\n*Note: Research conducted using qResearch AI system (qResearch, 2024)*"
+    def interact_with_agent(self, messages, chatbot, session_state):
+        # Agent interaction logic remains similar
+        # Modified response formatting for MLA
         try:
             messages.append(gr.ChatMessage(role="user", content=prompt))
             yield messages
             for msg in stream_to_gradio(session_state['agent'], task=prompt, reset_agent_memory=False):
+                formatted_msg = self.format_mla_response(msg.content)
+                messages.append(formatted_msg)
                 yield messages
             yield messages
         except Exception as e:
             print(f"Error in interaction: {str(e)}")
             raise
+    def create_interface(self):
         with gr.Blocks(theme="ocean", fill_height=True) as demo:
             @gr.render()
             def layout(request: gr.Request):
                 device = self.detect_device(request)
                 if device == "Desktop":
+                    with gr.Blocks(fill_height=True) as sidebar_demo:
                         with gr.Sidebar():
+                            gr.Markdown("""# qResearch - Advanced AI Research System
+                            Developed as an open-source alternative to proprietary research assistants.
+                            """)
+                            # Interface elements remain similar
+                        with gr.Row():
+                            gr.Markdown("<div style='text-align: center; width: 100%; margin-top: 20px'>"
+                                      "Research conducted using qResearch AI system (qResearch, 2024)"
+                                      "</div>")
                 else:
+                    # Mobile interface
+                    with gr.Blocks(fill_height=True) as simple_demo:
+                        gr.Markdown("""# qResearch Mobile
+                        *Advanced research capabilities in your pocket*""")
+                        # Mobile interface elements
+        return demo
+if __name__ == "__main__":
+    research_interface = ResearchInterface()
+    demo = research_interface.create_interface()
+    demo.launch()