File size: 26,010 Bytes
7647e70
 
 
 
 
c04ffe5
 
 
7647e70
 
 
 
c04ffe5
 
7647e70
 
 
 
 
c04ffe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7647e70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c04ffe5
 
 
 
 
 
 
 
 
 
 
7647e70
 
 
 
 
c04ffe5
 
 
 
 
 
 
 
7647e70
 
 
 
c04ffe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7647e70
c04ffe5
 
7647e70
 
 
c04ffe5
7647e70
c04ffe5
 
7647e70
c04ffe5
7647e70
c04ffe5
7647e70
 
c04ffe5
7647e70
 
c04ffe5
 
7647e70
c04ffe5
7647e70
 
 
 
c04ffe5
 
7647e70
c04ffe5
7647e70
 
c04ffe5
 
 
 
 
7647e70
c04ffe5
 
 
 
7647e70
c04ffe5
 
 
 
 
 
 
7647e70
c04ffe5
 
 
 
 
 
 
 
 
 
 
 
 
7647e70
c04ffe5
 
 
 
 
 
 
7647e70
 
 
 
 
 
 
 
 
 
 
 
 
c04ffe5
7647e70
 
 
 
 
 
c04ffe5
7647e70
 
 
 
 
 
 
 
 
 
 
 
c04ffe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7647e70
 
 
c04ffe5
7647e70
 
c04ffe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7647e70
836388f
c04ffe5
 
 
 
 
 
7647e70
 
 
 
 
 
 
836388f
7647e70
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
import os
import io
import cv2
import numpy as np
import tempfile
import time
import math
import json
from PIL import Image, ImageEnhance, ImageFilter
from pdf2image import convert_from_bytes
import streamlit as st
import logging
import concurrent.futures
from pathlib import Path

# Configure logging
logger = logging.getLogger("preprocessing")
logger.setLevel(logging.INFO)

# Ensure logs directory exists
def ensure_log_directory(config):
    """Create logs directory if it doesn't exist"""
    if config.get("logging", {}).get("enabled", False):
        log_path = config.get("logging", {}).get("output_path", "logs/preprocessing_metrics.json")
        log_dir = os.path.dirname(log_path)
        if log_dir:
            Path(log_dir).mkdir(parents=True, exist_ok=True)
            
def log_preprocessing_metrics(metrics, config):
    """Log preprocessing metrics to JSON file"""
    if not config.get("enabled", False):
        return
        
    log_path = config.get("output_path", "logs/preprocessing_metrics.json")
    ensure_log_directory({"logging": {"enabled": True, "output_path": log_path}})
    
    # Add timestamp
    metrics["timestamp"] = time.strftime("%Y-%m-%d %H:%M:%S")
    
    # Append to log file
    try:
        existing_data = []
        if os.path.exists(log_path):
            with open(log_path, 'r') as f:
                existing_data = json.load(f)
                if not isinstance(existing_data, list):
                    existing_data = [existing_data]
        
        existing_data.append(metrics)
        
        with open(log_path, 'w') as f:
            json.dump(existing_data, f, indent=2)
            
        logger.info(f"Logged preprocessing metrics to {log_path}")
    except Exception as e:
        logger.error(f"Error logging preprocessing metrics: {str(e)}")

def get_document_config(document_type, global_config):
    """
    Get document-specific preprocessing configuration by merging with global settings.
    
    Args:
        document_type: The type of document (e.g., 'standard', 'newspaper', 'handwritten')
        global_config: The global preprocessing configuration
        
    Returns:
        A merged configuration dictionary with document-specific overrides
    """
    # Start with a copy of the global config
    config = {
        "deskew": global_config.get("deskew", {}),
        "thresholding": global_config.get("thresholding", {}),
        "morphology": global_config.get("morphology", {}),
        "performance": global_config.get("performance", {}),
        "logging": global_config.get("logging", {})
    }
    
    # Apply document-specific overrides if they exist
    doc_types = global_config.get("document_types", {})
    if document_type in doc_types:
        doc_config = doc_types[document_type]
        
        # Merge document-specific settings into the config
        for section in doc_config:
            if section in config:
                config[section].update(doc_config[section])
    
    return config

def deskew_image(img_array, config):
    """
    Detect and correct skew in document images.
    
    Uses a combination of methods (minAreaRect and/or Hough transform)
    to estimate the skew angle more robustly.
    
    Args:
        img_array: Input image as numpy array
        config: Deskew configuration dict
    
    Returns:
        Deskewed image as numpy array, estimated angle, success flag
    """
    if not config.get("enabled", False):
        return img_array, 0.0, True
    
    # Convert to grayscale if needed
    gray = img_array if len(img_array.shape) == 2 else cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    
    # Start with a threshold to get binary image for angle detection
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    angles = []
    angle_threshold = config.get("angle_threshold", 0.1)
    max_angle = config.get("max_angle", 45.0)
    
    # Method 1: minAreaRect approach
    try:
        # Find all contours
        contours, _ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        
        # Filter contours by area to avoid noise
        min_area = binary.shape[0] * binary.shape[1] * 0.0001  # 0.01% of image area
        filtered_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area]
        
        # Get angles from rotated rectangles around contours
        for contour in filtered_contours:
            rect = cv2.minAreaRect(contour)
            width, height = rect[1]
            
            # Calculate the angle based on the longer side
            # (This is important for getting the orientation right)
            angle = rect[2]
            if width < height:
                angle += 90
                
            # Normalize angle to -45 to 45 range
            if angle > 45:
                angle -= 90
            if angle < -45:
                angle += 90
                
            # Clamp angle to max limit
            angle = max(min(angle, max_angle), -max_angle)
            angles.append(angle)
    except Exception as e:
        logger.error(f"Error in minAreaRect skew detection: {str(e)}")
    
    # Method 2: Hough Transform approach (if enabled)
    if config.get("use_hough", True):
        try:
            # Apply Canny edge detection
            edges = cv2.Canny(gray, 50, 150, apertureSize=3)
            
            # Apply Hough lines
            lines = cv2.HoughLinesP(edges, 1, np.pi/180, 
                                   threshold=100, minLineLength=100, maxLineGap=10)
            
            if lines is not None:
                for line in lines:
                    x1, y1, x2, y2 = line[0]
                    if x2 - x1 != 0:  # Avoid division by zero
                        # Calculate line angle in degrees
                        angle = math.atan2(y2 - y1, x2 - x1) * 180.0 / np.pi
                        
                        # Normalize angle to -45 to 45 range
                        if angle > 45:
                            angle -= 90
                        if angle < -45:
                            angle += 90
                            
                        # Clamp angle to max limit
                        angle = max(min(angle, max_angle), -max_angle)
                        angles.append(angle)
        except Exception as e:
            logger.error(f"Error in Hough transform skew detection: {str(e)}")
    
    # If no angles were detected, return original image
    if not angles:
        logger.warning("No skew angles detected, using original image")
        return img_array, 0.0, False
    
    # Combine angles using the specified consensus method
    consensus_method = config.get("consensus_method", "average")
    if consensus_method == "average":
        final_angle = sum(angles) / len(angles)
    elif consensus_method == "median":
        final_angle = sorted(angles)[len(angles) // 2]
    elif consensus_method == "min":
        final_angle = min(angles, key=abs)
    elif consensus_method == "max":
        final_angle = max(angles, key=abs)
    else:
        final_angle = sum(angles) / len(angles)  # Default to average
    
    # If angle is below threshold, don't rotate
    if abs(final_angle) < angle_threshold:
        logger.info(f"Detected angle ({final_angle:.2f}°) is below threshold, skipping deskew")
        return img_array, final_angle, True
    
    # Log the detected angle
    logger.info(f"Deskewing image with angle: {final_angle:.2f}°")
    
    # Get image dimensions
    h, w = img_array.shape[:2]
    center = (w // 2, h // 2)
    
    # Get rotation matrix
    rotation_matrix = cv2.getRotationMatrix2D(center, final_angle, 1.0)
    
    # Calculate new image dimensions
    abs_cos = abs(rotation_matrix[0, 0])
    abs_sin = abs(rotation_matrix[0, 1])
    new_w = int(h * abs_sin + w * abs_cos)
    new_h = int(h * abs_cos + w * abs_sin)
    
    # Adjust the rotation matrix to account for new dimensions
    rotation_matrix[0, 2] += (new_w / 2) - center[0]
    rotation_matrix[1, 2] += (new_h / 2) - center[1]
    
    # Perform the rotation
    try:
        # Determine the number of channels to create the correct output array
        if len(img_array.shape) == 3:
            rotated = cv2.warpAffine(img_array, rotation_matrix, (new_w, new_h), 
                                   flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, 
                                   borderValue=(255, 255, 255))
        else:
            rotated = cv2.warpAffine(img_array, rotation_matrix, (new_w, new_h), 
                                   flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, 
                                   borderValue=255)
        return rotated, final_angle, True
    except Exception as e:
        logger.error(f"Error rotating image: {str(e)}")
        if config.get("fallback", {}).get("enabled", True):
            logger.info("Using original image as fallback after rotation failure")
            return img_array, final_angle, False
        return img_array, final_angle, False

def preblur(img_array, config):
    """
    Apply pre-filtering blur to stabilize thresholding results.
    
    Args:
        img_array: Input image as numpy array
        config: Pre-blur configuration dict
    
    Returns:
        Blurred image as numpy array
    """
    if not config.get("enabled", False):
        return img_array
    
    method = config.get("method", "gaussian")
    kernel_size = config.get("kernel_size", 3)
    
    # Ensure kernel size is odd
    if kernel_size % 2 == 0:
        kernel_size += 1
    
    try:
        if method == "gaussian":
            return cv2.GaussianBlur(img_array, (kernel_size, kernel_size), 0)
        elif method == "median":
            return cv2.medianBlur(img_array, kernel_size)
        else:
            logger.warning(f"Unknown blur method: {method}, using gaussian")
            return cv2.GaussianBlur(img_array, (kernel_size, kernel_size), 0)
    except Exception as e:
        logger.error(f"Error applying {method} blur: {str(e)}")
        return img_array

def apply_threshold(img_array, config):
    """
    Apply thresholding to create binary image.
    
    Supports Otsu's method and adaptive thresholding.
    Includes pre-filtering and fallback mechanisms.
    
    Args:
        img_array: Input image as numpy array
        config: Thresholding configuration dict
    
    Returns:
        Binary image as numpy array, success flag
    """
    method = config.get("method", "adaptive")
    if method == "none":
        return img_array, True
    
    # Convert to grayscale if needed
    gray = img_array if len(img_array.shape) == 2 else cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    
    # Apply pre-blur if configured
    preblur_config = config.get("preblur", {})
    if preblur_config.get("enabled", False):
        gray = preblur(gray, preblur_config)
    
    binary = None
    try:
        if method == "otsu":
            # Apply Otsu's thresholding
            _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        elif method == "adaptive":
            # Apply adaptive thresholding
            block_size = config.get("adaptive_block_size", 11)
            constant = config.get("adaptive_constant", 2)
            
            # Ensure block size is odd
            if block_size % 2 == 0:
                block_size += 1
                
            binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                         cv2.THRESH_BINARY, block_size, constant)
        else:
            logger.warning(f"Unknown thresholding method: {method}, using adaptive")
            block_size = config.get("adaptive_block_size", 11)
            constant = config.get("adaptive_constant", 2)
            
            # Ensure block size is odd
            if block_size % 2 == 0:
                block_size += 1
                
            binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                         cv2.THRESH_BINARY, block_size, constant)
    except Exception as e:
        logger.error(f"Error applying {method} thresholding: {str(e)}")
        if config.get("fallback", {}).get("enabled", True):
            logger.info("Using original grayscale image as fallback after thresholding failure")
            return gray, False
        return gray, False
    
    # Calculate percentage of non-zero pixels for logging
    nonzero_pct = np.count_nonzero(binary) / binary.size * 100
    logger.info(f"Binary image has {nonzero_pct:.2f}% non-zero pixels")
    
    # Check if thresholding was successful (crude check)
    if nonzero_pct < 1 or nonzero_pct > 99:
        logger.warning(f"Thresholding produced extreme result ({nonzero_pct:.2f}% non-zero)")
        if config.get("fallback", {}).get("enabled", True):
            logger.info("Using original grayscale image as fallback after poor thresholding")
            return gray, False
    
    return binary, True

def apply_morphology(binary_img, config):
    """
    Apply morphological operations to clean up binary image.
    
    Supports opening, closing, or both operations.
    
    Args:
        binary_img: Binary image as numpy array
        config: Morphology configuration dict
    
    Returns:
        Processed binary image as numpy array
    """
    if not config.get("enabled", False):
        return binary_img
    
    operation = config.get("operation", "close")
    kernel_size = config.get("kernel_size", 1)
    kernel_shape = config.get("kernel_shape", "rect")
    
    # Create appropriate kernel
    if kernel_shape == "rect":
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size*2+1, kernel_size*2+1))
    elif kernel_shape == "ellipse":
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size*2+1, kernel_size*2+1))
    elif kernel_shape == "cross":
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (kernel_size*2+1, kernel_size*2+1))
    else:
        logger.warning(f"Unknown kernel shape: {kernel_shape}, using rect")
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size*2+1, kernel_size*2+1))
    
    result = binary_img
    try:
        if operation == "open":
            # Opening: Erosion followed by dilation - removes small noise
            result = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, kernel)
        elif operation == "close":
            # Closing: Dilation followed by erosion - fills small holes
            result = cv2.morphologyEx(binary_img, cv2.MORPH_CLOSE, kernel)
        elif operation == "both":
            # Both operations in sequence
            result = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, kernel)
            result = cv2.morphologyEx(result, cv2.MORPH_CLOSE, kernel)
        else:
            logger.warning(f"Unknown morphological operation: {operation}, using close")
            result = cv2.morphologyEx(binary_img, cv2.MORPH_CLOSE, kernel)
    except Exception as e:
        logger.error(f"Error applying morphological operation: {str(e)}")
        return binary_img
    
    return result

@st.cache_data(ttl=24*3600, show_spinner=False)  # Cache for 24 hours
def convert_pdf_to_images(pdf_bytes, dpi=150, rotation=0):
    """Convert PDF bytes to a list of images with caching"""
    try:
        images = convert_from_bytes(pdf_bytes, dpi=dpi)
        
        # Apply rotation if specified
        if rotation != 0 and images:
            rotated_images = []
            for img in images:
                rotated_img = img.rotate(rotation, expand=True, resample=Image.BICUBIC)
                rotated_images.append(rotated_img)
            return rotated_images
        
        return images
    except Exception as e:
        st.error(f"Error converting PDF: {str(e)}")
        logger.error(f"PDF conversion error: {str(e)}")
        return []

@st.cache_data(ttl=24*3600, show_spinner=False, hash_funcs={dict: lambda x: str(sorted(x.items()))})
def preprocess_image(image_bytes, preprocessing_options):
    """
    Conservative preprocessing function for handwritten documents with early exit for clean scans.
    Implements light processing: grayscale → denoise (gently) → contrast (conservative) 
    
    Args:
        image_bytes: Image content as bytes
        preprocessing_options: Dictionary with document_type, grayscale, denoise, contrast options
        
    Returns:
        Processed image bytes or original image bytes if no processing needed
    """
    # Setup basic console logging
    logger = logging.getLogger("image_preprocessor")
    logger.setLevel(logging.INFO)
    
    # Log which preprocessing options are being applied
    logger.info(f"Document type: {preprocessing_options.get('document_type', 'standard')}")
    
    # Check if any preprocessing is actually requested
    has_preprocessing = (
        preprocessing_options.get("grayscale", False) or
        preprocessing_options.get("denoise", False) or
        preprocessing_options.get("contrast", 0) != 0
    )
    
    # Convert bytes to PIL Image
    image = Image.open(io.BytesIO(image_bytes))
    
    # Check for minimal skew and exit early if document is already straight
    # This avoids unnecessary processing for clean scans
    try:
        from utils.image_utils import detect_skew
        skew_angle = detect_skew(image)
        if abs(skew_angle) < 0.5:
            logger.info(f"Document has minimal skew ({skew_angle:.2f}°), skipping preprocessing")
            # Return original image bytes as is for perfectly straight documents
            if not has_preprocessing:
                return image_bytes
    except Exception as e:
        logger.warning(f"Error in skew detection: {str(e)}, continuing with preprocessing")
    
    # If no preprocessing options are selected, return the original image
    if not has_preprocessing:
        logger.info("No preprocessing options selected, skipping preprocessing")
        return image_bytes
    
    # Initialize metrics for logging
    metrics = {
        "file": preprocessing_options.get("filename", "unknown"),
        "document_type": preprocessing_options.get("document_type", "standard"),
        "preprocessing_applied": []
    }
    start_time = time.time()
    
    # Handle RGBA images (transparency) by converting to RGB
    if image.mode == 'RGBA':
        # Convert RGBA to RGB by compositing onto white background
        logger.info("Converting RGBA image to RGB")
        background = Image.new('RGB', image.size, (255, 255, 255))
        background.paste(image, mask=image.split()[3])  # 3 is the alpha channel
        image = background
        metrics["preprocessing_applied"].append("alpha_conversion")
    elif image.mode not in ('RGB', 'L'):
        # Convert other modes to RGB
        logger.info(f"Converting {image.mode} image to RGB")
        image = image.convert('RGB')
        metrics["preprocessing_applied"].append("format_conversion")
    
    # Convert to NumPy array for OpenCV processing
    img_array = np.array(image)
    
    # Apply grayscale if requested (useful for handwritten text)
    if preprocessing_options.get("grayscale", False):
        if len(img_array.shape) == 3:  # Only convert if it's not already grayscale
            # For handwritten documents, apply gentle CLAHE to enhance contrast locally
            if preprocessing_options.get("document_type") == "handwritten":
                img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
                clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8,8))  # Conservative clip limit
                img_array = clahe.apply(img_array)
            else:
                # Standard grayscale for printed documents
                img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
            
            metrics["preprocessing_applied"].append("grayscale")
    
    # Apply light denoising if requested
    if preprocessing_options.get("denoise", False):
        try:
            # Apply very gentle denoising
            is_color = len(img_array.shape) == 3 and img_array.shape[2] == 3
            if is_color:
                # Very light color denoising with conservative parameters
                img_array = cv2.fastNlMeansDenoisingColored(img_array, None, 2, 2, 3, 7)
            else:
                # Very light grayscale denoising
                img_array = cv2.fastNlMeansDenoising(img_array, None, 2, 3, 7)
            
            metrics["preprocessing_applied"].append("light_denoise")
        except Exception as e:
            logger.error(f"Denoising error: {str(e)}")
    
    # Apply contrast adjustment if requested (conservative range)
    contrast_value = preprocessing_options.get("contrast", 0)
    if contrast_value != 0:
        # Use a gentler contrast adjustment factor
        contrast_factor = 1 + (contrast_value / 200)  # Conservative scaling factor
        
        # Convert NumPy array back to PIL Image for contrast adjustment
        if len(img_array.shape) == 2:  # If grayscale, convert to RGB for PIL
            image = Image.fromarray(cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB))
        else:
            image = Image.fromarray(img_array)
            
        enhancer = ImageEnhance.Contrast(image)
        image = enhancer.enhance(contrast_factor)
        
        # Convert back to NumPy array
        img_array = np.array(image)
        metrics["preprocessing_applied"].append(f"contrast_{contrast_value}")
    
    # Convert back to PIL Image
    if len(img_array.shape) == 2:  # If grayscale, convert to RGB for saving
        processed_image = Image.fromarray(cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB))
    else:
        processed_image = Image.fromarray(img_array)
    
    # Record total processing time
    metrics["processing_time"] = (time.time() - start_time) * 1000  # ms
    
    # Higher quality for OCR processing
    byte_io = io.BytesIO()
    try:
        # Make sure the image is in RGB mode before saving as JPEG
        if processed_image.mode not in ('RGB', 'L'):
            processed_image = processed_image.convert('RGB')
        
        processed_image.save(byte_io, format='JPEG', quality=92, optimize=True)
        byte_io.seek(0)
        
        logger.info(f"Preprocessing complete. Original image mode: {image.mode}, processed mode: {processed_image.mode}")
        logger.info(f"Original size: {len(image_bytes)/1024:.1f}KB, processed size: {len(byte_io.getvalue())/1024:.1f}KB")
        logger.info(f"Applied preprocessing steps: {', '.join(metrics['preprocessing_applied'])}")
        
        return byte_io.getvalue()
    except Exception as e:
        logger.error(f"Error saving processed image: {str(e)}")
        # Fallback to original image
        logger.info("Using original image as fallback")
        return image_bytes

def create_temp_file(content, suffix, temp_file_paths):
    """Create a temporary file and track it for cleanup"""
    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
        tmp.write(content)
        temp_path = tmp.name
        # Track temporary file for cleanup
        temp_file_paths.append(temp_path)
        logger.info(f"Created temporary file: {temp_path}")
        return temp_path

def apply_preprocessing_to_file(file_bytes, file_ext, preprocessing_options, temp_file_paths):
    """
    Apply conservative preprocessing to file and return path to the temporary file.
    Handles format conversion and user-selected preprocessing options.
    
    Args:
        file_bytes: File content as bytes
        file_ext: File extension (e.g., '.jpg', '.pdf')
        preprocessing_options: Dictionary with document_type and preprocessing options
        temp_file_paths: List to track temporary files for cleanup
        
    Returns:
        Tuple of (temp_file_path, was_processed_flag)
    """
    document_type = preprocessing_options.get("document_type", "standard")
    
    # Check for user-selected preprocessing
    has_preprocessing = (
        preprocessing_options.get("grayscale", False) or
        preprocessing_options.get("denoise", False) or
        preprocessing_options.get("contrast", 0) != 0
    )
    
    # Check for RGBA/transparency that needs conversion
    format_needs_conversion = False
    
    # Only check formats that might have transparency
    if file_ext.lower() in ['.png', '.tif', '.tiff']:
        try:
            # Check if image has transparency
            image = Image.open(io.BytesIO(file_bytes))
            if image.mode == 'RGBA' or image.mode not in ('RGB', 'L'):
                format_needs_conversion = True
        except Exception as e:
            logger.warning(f"Error checking image format: {str(e)}")
    
    # Process if user requested preprocessing OR format needs conversion
    needs_processing = has_preprocessing or format_needs_conversion
    
    if needs_processing:
        # Apply preprocessing
        logger.info(f"Applying preprocessing with options: {preprocessing_options}")
        logger.info(f"Using document type '{document_type}' with advanced preprocessing options")
        
        # Add filename to preprocessing options for logging if available
        if hasattr(file_bytes, 'name'):
            preprocessing_options["filename"] = file_bytes.name
        
        processed_bytes = preprocess_image(file_bytes, preprocessing_options)
        
        # Save processed image to temp file
        temp_path = create_temp_file(processed_bytes, file_ext, temp_file_paths)
        return temp_path, True  # Return path and flag indicating preprocessing was applied
    else:
        # No preprocessing needed, just save the original file
        logger.info("No preprocessing applied - using original image")
        temp_path = create_temp_file(file_bytes, file_ext, temp_file_paths)
        return temp_path, False  # Return path and flag indicating no preprocessing was applied