# Import spaces module for ZeroGPU support - Must be first import
try:
    import spaces
    HAS_SPACES = True
except ImportError:
    HAS_SPACES = False

from pathlib import Path
import os
import logging
import sys
import tempfile
import shutil
from typing import Dict, List, Optional, Any, Union
import copy

from src.parsers.parser_interface import DocumentParser
from src.parsers.parser_registry import ParserRegistry

# Import latex2markdown for conversion - No longer needed, using Gemini API
# import latex2markdown

# Configure logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# Constants
MODEL_NAME = "stepfun-ai/GOT-OCR-2.0-hf"
STOP_STR = "<|im_end|>"

class GotOcrParser(DocumentParser):
    """Parser implementation using GOT-OCR 2.0 for document text extraction using transformers.
    
    This implementation uses the transformers model directly for better integration with
    ZeroGPU and avoids subprocess complexity.
    """
    
    # Class variables to hold model information only (not the actual model)
    _model_loaded = False
    
    @classmethod
    def get_name(cls) -> str:
        return "GOT-OCR (jpg,png only)"
    
    @classmethod
    def get_supported_ocr_methods(cls) -> List[Dict[str, Any]]:
        return [
            {
                "id": "plain",
                "name": "Plain Text",
                "default_params": {}
            },
            {
                "id": "format",
                "name": "Formatted Text",
                "default_params": {}
            }
        ]
    
    @classmethod
    def get_description(cls) -> str:
        return "GOT-OCR 2.0 parser for converting images to text (requires CUDA)"
    
    @classmethod
    def _check_dependencies(cls) -> bool:
        """Check if all required dependencies are installed."""
        try:
            import torch
            import transformers
            
            # Only check if the modules are importable, DO NOT use torch.cuda here
            # as it would initialize CUDA in the main process
            return True
        except ImportError as e:
            logger.error(f"Missing dependency: {e}")
            return False
    
    def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
        """Parse a document using GOT-OCR 2.0.
        
        Args:
            file_path: Path to the image file
            ocr_method: OCR method to use ('plain', 'format')
            **kwargs: Additional arguments to pass to the model
            
        Returns:
            Extracted text from the image, converted to Markdown if formatted
        """
        # Verify dependencies are installed without initializing CUDA
        if not self._check_dependencies():
            raise ImportError(
                "Required dependencies are missing. Please install: "
                "torch transformers"
            )
        
        # Validate file path and extension
        file_path = Path(file_path)
        if not file_path.exists():
            raise FileNotFoundError(f"Image file not found: {file_path}")
        
        if file_path.suffix.lower() not in ['.jpg', '.jpeg', '.png']:
            raise ValueError(
                f"GOT-OCR only supports JPG and PNG formats. "
                f"Received file with extension: {file_path.suffix}"
            )
        
        # Determine OCR mode based on method
        use_format = ocr_method == "format"
        
        # Log the OCR method being used
        logger.info(f"Using OCR method: {ocr_method or 'plain'}")
        
        # Filter kwargs to remove any objects that can't be pickled (like thread locks)
        safe_kwargs = {}
        for key, value in kwargs.items():
            # Skip thread locks and unpicklable objects
            if not key.startswith('_') and not isinstance(value, type):
                try:
                    # Test if it can be copied - this helps identify unpicklable objects
                    copy.deepcopy(value)
                    safe_kwargs[key] = value
                except (TypeError, pickle.PickleError):
                    logger.warning(f"Skipping unpicklable kwarg: {key}")
        
        # Process the image using transformers
        try:
            # Use the spaces.GPU decorator if available
            if HAS_SPACES:
                # Use string path instead of Path object for better pickling
                image_path_str = str(file_path)
                
                # Call the wrapper function that handles ZeroGPU safely
                return self._safe_gpu_process(image_path_str, use_format, **safe_kwargs)
            else:
                # Fallback for environments without spaces
                return self._process_image_without_gpu(
                    str(file_path), 
                    use_format=use_format,
                    **safe_kwargs
                )
            
        except Exception as e:
            logger.error(f"Error processing image with GOT-OCR: {str(e)}")
            
            # Handle specific errors with helpful messages
            error_type = type(e).__name__
            if error_type == 'OutOfMemoryError':
                raise RuntimeError(
                    "GPU out of memory while processing with GOT-OCR. "
                    "Try using a smaller image or a different parser."
                )
            elif "bfloat16" in str(e):
                raise RuntimeError(
                    "CUDA device does not support bfloat16. This is a known issue with some GPUs. "
                    "Please try using a different parser or contact support."
                )
            elif "CUDA must not be initialized" in str(e):
                raise RuntimeError(
                    "CUDA initialization error. This is likely due to model loading in the main process. "
                    "In ZeroGPU environments, CUDA must only be initialized within @spaces.GPU decorated functions."
                )
            elif "cannot pickle" in str(e):
                raise RuntimeError(
                    f"Serialization error with ZeroGPU: {str(e)}. "
                    "This may be due to thread locks or other unpicklable objects being passed."
                )
            
            # Generic error
            raise RuntimeError(f"Error processing document with GOT-OCR: {str(e)}")
    
    def _safe_gpu_process(self, image_path: str, use_format: bool, **kwargs):
        """Safe wrapper for GPU processing to avoid pickle issues with thread locks."""
        import pickle
        
        try:
            # Call the GPU-decorated function with minimal, picklable arguments
            return self._process_image_with_gpu(image_path, use_format)
        except pickle.PickleError as e:
            logger.error(f"Pickle error in ZeroGPU processing: {str(e)}")
            # Fall back to CPU processing if pickling fails
            logger.warning("Falling back to CPU processing due to pickling error")
            return self._process_image_without_gpu(image_path, use_format=use_format)
    
    def _process_image_without_gpu(self, image_path: str, use_format: bool = False, **kwargs) -> str:
        """Process an image with GOT-OCR model when not using ZeroGPU."""
        logger.warning("ZeroGPU not available. Using direct model loading, which may not work in Spaces.")
        
        # Import here to avoid CUDA initialization in main process
        import torch
        from transformers import AutoModelForImageTextToText, AutoProcessor
        from transformers.image_utils import load_image
        
        # Load the image
        image = load_image(image_path)
        
        # Load processor and model
        processor = AutoProcessor.from_pretrained(MODEL_NAME)
        
        # Use CPU if in main process to avoid CUDA initialization issues
        device = "cpu"
        model = AutoModelForImageTextToText.from_pretrained(
            MODEL_NAME, 
            low_cpu_mem_usage=True,
            device_map=device
        )
        model = model.eval()
        
        # Process the image based on the selected OCR method
        if use_format:
            # Format mode
            inputs = processor([image], return_tensors="pt", format=True)
            # Keep on CPU to avoid CUDA initialization
            
            # Generate text
            with torch.no_grad():
                generate_ids = model.generate(
                    **inputs,
                    do_sample=False,
                    tokenizer=processor.tokenizer,
                    stop_strings=STOP_STR,
                    max_new_tokens=4096,
                )
            
            # Decode the generated text
            result = processor.decode(
                generate_ids[0, inputs["input_ids"].shape[1]:],
                skip_special_tokens=True,
            )
            
            # Return raw LaTeX output - let post-processing handle conversion
            # This allows for more advanced conversion in the integration module
            logger.info("Returning raw LaTeX output for external processing")
            
        else:
            # Plain text mode
            inputs = processor([image], return_tensors="pt")
            
            # Generate text
            with torch.no_grad():
                generate_ids = model.generate(
                    **inputs,
                    do_sample=False,
                    tokenizer=processor.tokenizer,
                    stop_strings=STOP_STR,
                    max_new_tokens=4096,
                )
            
            # Decode the generated text
            result = processor.decode(
                generate_ids[0, inputs["input_ids"].shape[1]:],
                skip_special_tokens=True,
            )
        
        # Clean up to free memory
        del model
        del processor
        import gc
        gc.collect()
        
        return result.strip()
    
    # Define the GPU-decorated function for ZeroGPU
    if HAS_SPACES:
        @spaces.GPU()  # Use default ZeroGPU allocation timeframe, matching HF implementation
        def _process_image_with_gpu(self, image_path: str, use_format: bool = False) -> str:
            """Process an image with GOT-OCR model using GPU allocation.
            
            IMPORTANT: All model loading and CUDA operations must happen inside this method.
            NOTE: Function must receive only picklable arguments (no thread locks, etc).
            """
            logger.info("Processing with ZeroGPU allocation")
            
            # Imports inside the GPU-decorated function
            import torch
            from transformers import AutoModelForImageTextToText, AutoProcessor
            from transformers.image_utils import load_image
            
            # Load the image
            image = load_image(image_path)
            
            # Now we can load the model inside the GPU-decorated function
            device = "cuda" if torch.cuda.is_available() else "cpu"
            
            logger.info(f"Loading GOT-OCR model from {MODEL_NAME} on {device}")
            
            # Load processor
            processor = AutoProcessor.from_pretrained(MODEL_NAME)
            
            # Load model
            model = AutoModelForImageTextToText.from_pretrained(
                MODEL_NAME, 
                low_cpu_mem_usage=True,
                device_map=device
            )
            
            # Set model to evaluation mode
            model = model.eval()
            
            # Process the image with the model based on the selected OCR method
            if use_format:
                # Format mode (for LaTeX, etc.)
                inputs = processor([image], return_tensors="pt", format=True)
                if torch.cuda.is_available():
                    inputs = inputs.to("cuda")
                
                # Generate text
                with torch.no_grad():
                    generate_ids = model.generate(
                        **inputs,
                        do_sample=False,
                        tokenizer=processor.tokenizer,
                        stop_strings=STOP_STR,
                        max_new_tokens=4096,
                    )
                
                # Decode the generated text
                result = processor.decode(
                    generate_ids[0, inputs["input_ids"].shape[1]:],
                    skip_special_tokens=True,
                )
                
                # Return raw LaTeX output - let post-processing handle conversion
                # This allows for more advanced conversion in the integration module
                logger.info("Returning raw LaTeX output for external processing")
            else:
                # Plain text mode
                inputs = processor([image], return_tensors="pt")
                if torch.cuda.is_available():
                    inputs = inputs.to("cuda")
                
                # Generate text
                with torch.no_grad():
                    generate_ids = model.generate(
                        **inputs,
                        do_sample=False,
                        tokenizer=processor.tokenizer,
                        stop_strings=STOP_STR,
                        max_new_tokens=4096,
                    )
                
                # Decode the generated text
                result = processor.decode(
                    generate_ids[0, inputs["input_ids"].shape[1]:],
                    skip_special_tokens=True,
                )
            
            # Clean up the result
            if result.endswith(STOP_STR):
                result = result[:-len(STOP_STR)]
            
            # Clean up to free memory
            del model
            del processor
            import gc
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                logger.info("CUDA cache cleared")
            
            return result.strip()
    else:
        # Define a dummy method if spaces is not available
        def _process_image_with_gpu(self, image_path: str, use_format: bool = False) -> str:
            # This should never be called if HAS_SPACES is False
            return self._process_image_without_gpu(
                image_path, 
                use_format=use_format
            )
    
    @classmethod
    def release_model(cls):
        """Release model resources - not needed with new implementation."""
        logger.info("Model resources managed by ZeroGPU decorator")

# Try to register the parser
try:
    # Only check basic imports, no CUDA initialization
    import torch
    import transformers
    import pickle  # Import pickle for serialization error handling
    ParserRegistry.register(GotOcrParser)
    logger.info("GOT-OCR parser registered successfully")
except ImportError as e:
    logger.warning(f"Could not register GOT-OCR parser: {str(e)}")