Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
""" | |
Test script for the PDF processor API | |
""" | |
import requests | |
import argparse | |
import os | |
import json | |
from pathlib import Path | |
def test_api(api_url, pdf_path): | |
""" | |
Test the PDF processor API by sending a PDF file and checking the response | |
""" | |
print(f"Testing API at {api_url} with PDF file: {pdf_path}") | |
if not os.path.exists(pdf_path): | |
print(f"Error: PDF file not found at {pdf_path}") | |
return | |
# Send the PDF file to the API | |
with open(pdf_path, 'rb') as pdf_file: | |
files = {'file': (os.path.basename(pdf_path), pdf_file, 'application/pdf')} | |
try: | |
print("Sending request to API...") | |
response = requests.post(f"{api_url}/convert", files=files) | |
if response.status_code == 200: | |
print("Request successful!") | |
result = response.json() | |
# Print response summary | |
print("\nResponse summary:") | |
print(f"Filename: {result.get('filename', 'N/A')}") | |
print(f"Status: {result.get('status', 'N/A')}") | |
# Check output files | |
output_files = result.get('output_files', {}) | |
print("\nOutput files:") | |
for file_type, file_path in output_files.items(): | |
print(f"- {file_type}: {file_path}") | |
# Save the markdown content to a file | |
md_content = result.get('markdown_content', '') | |
output_dir = Path('test_output') | |
output_dir.mkdir(exist_ok=True) | |
output_file = output_dir / f"{Path(pdf_path).stem}_output.md" | |
with open(output_file, 'w') as f: | |
f.write(md_content) | |
print(f"\nMarkdown content saved to: {output_file}") | |
# Save the full response as JSON | |
response_file = output_dir / f"{Path(pdf_path).stem}_response.json" | |
with open(response_file, 'w') as f: | |
json.dump(result, f, indent=2) | |
print(f"Full response saved to: {response_file}") | |
else: | |
print(f"Request failed with status code: {response.status_code}") | |
print(f"Response content: {response.text}") | |
except Exception as e: | |
print(f"Error during API test: {str(e)}") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Test the PDF processor API") | |
parser.add_argument("--api", default="http://localhost:7860", help="API URL (default: http://localhost:7860)") | |
parser.add_argument("--pdf", required=True, help="Path to the PDF file to test") | |
args = parser.parse_args() | |
test_api(args.api, args.pdf) |