#!/usr/bin/env python3 """ Test script for the PDF processor API """ import requests import argparse import os import json from pathlib import Path def test_api(api_url, pdf_path): """ Test the PDF processor API by sending a PDF file and checking the response """ print(f"Testing API at {api_url} with PDF file: {pdf_path}") if not os.path.exists(pdf_path): print(f"Error: PDF file not found at {pdf_path}") return # Send the PDF file to the API with open(pdf_path, 'rb') as pdf_file: files = {'file': (os.path.basename(pdf_path), pdf_file, 'application/pdf')} try: print("Sending request to API...") response = requests.post(f"{api_url}/convert", files=files) if response.status_code == 200: print("Request successful!") result = response.json() # Print response summary print("\nResponse summary:") print(f"Filename: {result.get('filename', 'N/A')}") print(f"Status: {result.get('status', 'N/A')}") # Check output files output_files = result.get('output_files', {}) print("\nOutput files:") for file_type, file_path in output_files.items(): print(f"- {file_type}: {file_path}") # Save the markdown content to a file md_content = result.get('markdown_content', '') output_dir = Path('test_output') output_dir.mkdir(exist_ok=True) output_file = output_dir / f"{Path(pdf_path).stem}_output.md" with open(output_file, 'w') as f: f.write(md_content) print(f"\nMarkdown content saved to: {output_file}") # Save the full response as JSON response_file = output_dir / f"{Path(pdf_path).stem}_response.json" with open(response_file, 'w') as f: json.dump(result, f, indent=2) print(f"Full response saved to: {response_file}") else: print(f"Request failed with status code: {response.status_code}") print(f"Response content: {response.text}") except Exception as e: print(f"Error during API test: {str(e)}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Test the PDF processor API") parser.add_argument("--api", default="http://localhost:7860", help="API URL (default: http://localhost:7860)") parser.add_argument("--pdf", required=True, help="Path to the PDF file to test") args = parser.parse_args() test_api(args.api, args.pdf)