Update tool.py
Browse files
tool.py
CHANGED
@@ -1,38 +1,46 @@
|
|
1 |
from smolagents import Tool
|
2 |
-
from typing import Optional
|
3 |
|
4 |
class SimpleTool(Tool):
|
5 |
-
name = "
|
6 |
-
description = "
|
7 |
-
inputs = {
|
8 |
-
|
|
|
|
|
9 |
|
10 |
-
def
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
"""
|
18 |
-
import os # All imports are placed within the function, to allow for sharing to Hub.
|
19 |
-
import googlemaps
|
20 |
-
from datetime import datetime
|
21 |
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
1 |
from smolagents import Tool
|
|
|
2 |
|
3 |
class SimpleTool(Tool):
|
4 |
+
name = "pdf_extraction"
|
5 |
+
description = """Reads and extracts the text from all PDF files in the given folder and returns the combined text."""
|
6 |
+
inputs = {
|
7 |
+
"path": { "type": "string", "description": "Folder location of PDF files", "default": "pdfs", "nullable": True }
|
8 |
+
}
|
9 |
+
output_type = "any"
|
10 |
|
11 |
+
def __init__(self, *args, **kwargs):
|
12 |
+
super().__init__(*args, **kwargs)
|
13 |
+
try:
|
14 |
+
from pypdf import PdfReader
|
15 |
+
except ImportError:
|
16 |
+
raise ImportError(
|
17 |
+
"You must install package `pypdf` to run this tool: for instance, run `pip install pypdf`."
|
18 |
+
)
|
19 |
+
self.reader_class = PdfReader
|
20 |
|
21 |
+
def forward(self, path: str = "pdfs") -> str:
|
22 |
+
# Ensure the folder exists
|
23 |
+
if not os.path.exists(path):
|
24 |
+
return f"Error: The folder '{path}' does not exist."
|
|
|
|
|
|
|
|
|
25 |
|
26 |
+
# Find all PDF files in the folder
|
27 |
+
pdf_files = [file for file in os.listdir(path) if file.endswith(".pdf")]
|
28 |
+
if not pdf_files:
|
29 |
+
return f"No PDF files found in the folder '{path}'."
|
30 |
|
31 |
+
combined_text = []
|
32 |
+
|
33 |
+
# Iterate over each PDF file and extract its text
|
34 |
+
for pdf_file in pdf_files:
|
35 |
+
pdf_path = os.path.join(path, pdf_file)
|
36 |
+
try:
|
37 |
+
reader = self.reader_class(pdf_path)
|
38 |
+
file_text = ""
|
39 |
+
for page in reader.pages:
|
40 |
+
file_text += page.extract_text() # Extract text from each page
|
41 |
+
combined_text.append(f"### File: {pdf_file}\n{file_text.strip()}")
|
42 |
+
except Exception as e:
|
43 |
+
combined_text.append(f"### File: {pdf_file}\nError reading file: {str(e)}")
|
44 |
+
|
45 |
+
# Return all combined results
|
46 |
+
return "\n\n".join(combined_text)
|