# src/implementations/document_service.py
from pathlib import Path
import shutil
import os
import uuid
from typing import List, Tuple
from fastapi import UploadFile, BackgroundTasks
from ..vectorstores.chroma_vectorstore import ChromaVectorStore
from ..utils.document_processor import DocumentProcessor
from ..models import DocumentResponse, DocumentInfo, BatchUploadResponse
from ..utils.logger import logger

class DocumentService:
    def __init__(self, doc_processor: DocumentProcessor):
        self.doc_processor = doc_processor
        self.upload_dir = Path("temp_uploads")
        self.upload_dir.mkdir(exist_ok=True)

    async def process_documents(
        self,
        files: List[UploadFile],
        vector_store: ChromaVectorStore,
        background_tasks: BackgroundTasks
    ) -> BatchUploadResponse:
        """Process multiple document uploads"""
        processed_files, failed_files = await self._handle_file_uploads(
            files, 
            vector_store, 
            background_tasks
        )

        return BatchUploadResponse(
            message=f"Processed {len(processed_files)} documents with {len(failed_files)} failures",
            processed_files=processed_files,
            failed_files=failed_files
        )

    async def _handle_file_uploads(
        self,
        files: List[UploadFile],
        vector_store: ChromaVectorStore,
        background_tasks: BackgroundTasks
    ) -> Tuple[List[DocumentResponse], List[dict]]:
        """Handle individual file uploads and processing"""
        processed_files = []
        failed_files = []

        for file in files:
            try:
                if not self._is_supported_format(file.filename):
                    failed_files.append(self._create_failed_file_entry(
                        file.filename, 
                        "Unsupported file format"
                    ))
                    continue

                document_response = await self._process_single_file(
                    file, 
                    vector_store, 
                    background_tasks
                )
                processed_files.append(document_response)

            except Exception as e:
                logger.error(f"Error processing file {file.filename}: {str(e)}")
                failed_files.append(self._create_failed_file_entry(
                    file.filename, 
                    str(e)
                ))

        return processed_files, failed_files

    def _is_supported_format(self, filename: str) -> bool:
        """Check if file format is supported"""
        return any(filename.lower().endswith(ext) 
                  for ext in self.doc_processor.supported_formats)

    async def _process_single_file(
        self,
        file: UploadFile,
        vector_store: ChromaVectorStore,
        background_tasks: BackgroundTasks
    ) -> DocumentResponse:
        """Process a single file upload"""
        document_id = str(uuid.uuid4())
        temp_path = self.upload_dir / f"{document_id}_{file.filename}"
        
        # Save file
        with open(temp_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)

        # Add background task for processing
        background_tasks.add_task(
            self._process_and_store_document,
            temp_path,
            vector_store,
            document_id
        )

        return DocumentResponse(
            message="Document queued for processing",
            document_id=document_id,
            status="processing",
            document_info=DocumentInfo(
                original_filename=file.filename,
                size=os.path.getsize(temp_path),
                content_type=file.content_type
            )
        )

    async def _process_and_store_document(
        self,
        file_path: Path,
        vector_store: ChromaVectorStore,
        document_id: str
    ):
        """Process document and store in vector database"""
        try:
            processed_doc = await self.doc_processor.process_document(file_path)
            
            vector_store.add_documents(
                documents=processed_doc['chunks'],
                metadatas=[{
                    'document_id': document_id,
                    'chunk_id': i,
                    'source': str(file_path.name),
                    'metadata': processed_doc['metadata']
                } for i in range(len(processed_doc['chunks']))],
                ids=[f"{document_id}_chunk_{i}" for i in range(len(processed_doc['chunks']))]
            )
            
            return processed_doc
        finally:
            if file_path.exists():
                file_path.unlink()

    def _create_failed_file_entry(self, filename: str, error: str) -> dict:
        """Create a failed file entry"""
        return {
            "filename": filename,
            "error": error
        }

    def cleanup(self):
        """Clean up upload directory"""
        if self.upload_dir.exists() and not any(self.upload_dir.iterdir()):
            self.upload_dir.rmdir()