pdf-upload-extractor-tool

Running

App Files Files Community

matterattetatte commited on Jan 9

Commit

e13ea1d

verified ·

1 Parent(s): 71f6efc

Update tool.py

Browse files

Files changed (1) hide show

tool.py +39 -31

tool.py CHANGED Viewed

@@ -1,38 +1,46 @@
 from smolagents import Tool
-from typing import Optional
 class SimpleTool(Tool):
-    name = "get_travel_duration"
-    description = "Gets the travel time between two places."
-    inputs = {"start_location":{"type":"string","description":"the place from which you start your ride"},"destination_location":{"type":"string","description":"the place of arrival"},"transportation_mode":{"type":"string","nullable":True,"description":"The transportation mode, in 'driving', 'walking', 'bicycling', or 'transit'. Defaults to 'driving'."}}
-    output_type = "string"
-    def forward(self, start_location: str, destination_location: str, transportation_mode: Optional[str] = None) -> str:
-        """Gets the travel time between two places.
-        Args:
-            start_location: the place from which you start your ride
-            destination_location: the place of arrival
-            transportation_mode: The transportation mode, in 'driving', 'walking', 'bicycling', or 'transit'. Defaults to 'driving'.
-        """
-        import os   # All imports are placed within the function, to allow for sharing to Hub.
-        import googlemaps
-        from datetime import datetime
-        gmaps = googlemaps.Client(os.getenv("GMAPS_API_KEY"))
-        if transportation_mode is None:
-            transportation_mode = "driving"
-        try:
-            directions_result = gmaps.directions(
-                start_location,
-                destination_location,
-                mode=transportation_mode,
-                departure_time=datetime(2025, 12, 6, 11, 0), # At 11, date far in the future
-            )
-            if len(directions_result) == 0:
-                return "No way found between these places with the required transportation mode."
-            return directions_result[0]["legs"][0]["duration"]["text"]
-        except Exception as e:
-            print(e)
-            return e

 from smolagents import Tool
 class SimpleTool(Tool):
+    name = "pdf_extraction"
+    description = """Reads and extracts the text from all PDF files in the given folder and returns the combined text."""
+    inputs = {
+        "path": { "type": "string", "description": "Folder location of PDF files", "default": "pdfs", "nullable": True }
+    }
+    output_type = "any"
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        try:
+            from pypdf import PdfReader
+        except ImportError:
+            raise ImportError(
+                "You must install package `pypdf` to run this tool: for instance, run `pip install pypdf`."
+            )
+        self.reader_class = PdfReader
+    def forward(self, path: str = "pdfs") -> str:
+        # Ensure the folder exists
+        if not os.path.exists(path):
+            return f"Error: The folder '{path}' does not exist."
+        # Find all PDF files in the folder
+        pdf_files = [file for file in os.listdir(path) if file.endswith(".pdf")]
+        if not pdf_files:
+            return f"No PDF files found in the folder '{path}'."
+        combined_text = []
+        # Iterate over each PDF file and extract its text
+        for pdf_file in pdf_files:
+            pdf_path = os.path.join(path, pdf_file)
+            try:
+                reader = self.reader_class(pdf_path)
+                file_text = ""
+                for page in reader.pages:
+                    file_text += page.extract_text()  # Extract text from each page
+                combined_text.append(f"### File: {pdf_file}\n{file_text.strip()}")
+            except Exception as e:
+                combined_text.append(f"### File: {pdf_file}\nError reading file: {str(e)}")
+        # Return all combined results
+        return "\n\n".join(combined_text)