import gradio as gr
from transformers import pipeline
from translatepy import Translator
import logging
import random
import time
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import os
from typing import Dict, Optional
from functools import lru_cache

# Configure logging with more detailed format
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('app.log')
    ]
)
logger = logging.getLogger(__name__)

# Environment configuration with defaults
class Config:
    NLTK_DATA = os.getenv('NLTK_DATA', '/home/user/nltk_data')
    CACHE_DIR = os.getenv('CACHE_DIR', '/home/user/model_cache')
    MAX_TEXT_LENGTH = 10000
    CHUNK_SIZE = 500
    
# Set up NLTK data path with error handling
def setup_nltk():
    try:
        os.makedirs(Config.NLTK_DATA, exist_ok=True)
        nltk.data.path.append(Config.NLTK_DATA)
        required_packages = ['punkt']
        for package in required_packages:
            try:
                nltk.data.find(f'tokenizers/{package}')
            except LookupError:
                nltk.download(package, download_dir=Config.NLTK_DATA, quiet=True)
    except Exception as e:
        logger.error(f"NLTK setup failed: {str(e)}")
        raise

class TextHumanizer:
    def __init__(self, cache_dir: str = Config.CACHE_DIR):
        """Initialize with better error handling and resource management"""
        try:
            os.makedirs(cache_dir, exist_ok=True)
            
            # Initialize models with timeout and retry logic
            self.detector = self._init_pipeline(
                "text-classification",
                "roberta-base-openai-detector",
                cache_dir
            )
            
            self.humanizer = self._init_pipeline(
                "text2text-generation",
                "facebook/bart-large-cnn",
                cache_dir
            )
            
            self.translator = Translator()
            
            # Move prompts to a separate configuration file in production
            self.tone_prompts = {
                "Casual": [
                    "Rewrite this casually as if you're texting a friend: {text}",
                    "Make this sound like natural conversation: {text}",
                    "Convert this to everyday spoken English: {text}"
                ],
                "Business": [
                    "Rephrase this in professional corporate language: {text}",
                    "Transform this into formal business communication: {text}",
                    "Rewrite for a professional email: {text}"
                ],
                "Academic": [
                    "Rephrase this in scholarly academic language: {text}",
                    "Convert to academic paper style: {text}",
                    "Rewrite for a research publication: {text}"
                ],
                "Creative": [
                    "Transform this into vivid, imaginative writing: {text}",
                    "Rewrite with creative metaphors and sensory details: {text}",
                    "Convert to engaging storytelling style: {text}"
                ]
            }
            
            self.human_patterns = self._load_patterns()
            
        except Exception as e:
            logger.error(f"Initialization failed: {str(e)}")
            raise

    @staticmethod
    def _init_pipeline(task: str, model: str, cache_dir: str, max_retries: int = 3):
        """Initialize pipeline with retry logic"""
        for attempt in range(max_retries):
            try:
                return pipeline(task, model=model, cache_dir=cache_dir, device=-1)
            except Exception as e:
                if attempt == max_retries - 1:
                    raise
                logger.warning(f"Pipeline initialization attempt {attempt + 1} failed: {str(e)}")
                time.sleep(2 ** attempt)  # Exponential backoff

    @staticmethod
    def _load_patterns():
        """Load human-like patterns with enhanced variety"""
        return {
            'fillers': ["well", "you know", "actually", "I mean", "basically", 
                       "to be honest", "kind of", "sort of", "like"],
            'contractions': {
                "cannot": "can't",
                "could not": "couldn't",
                "would not": "wouldn't",
                "is not": "isn't",
                "do not": "don't",
                "will not": "won't",
                "should not": "shouldn't",
                "have not": "haven't"
            },
            'sentence_variants': [
                lambda s: s.lower(),
                lambda s: s.capitalize(),
                lambda s: s[:-1] + ", which is interesting." if s.endswith('.') else s,
                lambda s: s[:-1] + ", you know?" if s.endswith('.') else s,
                lambda s: s[:-1] + "..." if s.endswith('.') else s
            ]
        }

    @lru_cache(maxsize=1000)
    def _add_human_touches(self, text: str) -> str:
        """Apply multiple layers of human-like modifications with caching"""
        try:
            sentences = sent_tokenize(text)
            
            # Enhanced sentence modification with better randomization
            modified_sentences = []
            for sent in sentences:
                if random.random() < 0.4:
                    filler = random.choice(self.human_patterns['fillers'])
                    sent = f"{filler}, {sent.lower()}"
                
                # Smart sentence splitting for long sentences
                if len(sent.split()) > 12 and random.random() < 0.3:
                    words = word_tokenize(sent)
                    split_point = len(words)//2 + random.randint(-2, 2)
                    modified_sentences.extend([
                        ' '.join(words[:split_point]) + ',',
                        ' '.join(words[split_point:])
                    ])
                else:
                    modified_sentences.append(sent)
            
            # Apply contractions and variations
            text = ' '.join(modified_sentences)
            for formal, casual in self.human_patterns['contractions'].items():
                text = text.replace(f" {formal} ", f" {casual} ")
            
            # Apply sentence variants with natural distribution
            final_sentences = []
            for sent in sent_tokenize(text):
                if random.random() < 0.7:  # 70% chance of modification
                    sent = random.choice(self.human_patterns['sentence_variants'])(sent)
                final_sentences.append(sent)
            
            return ' '.join(final_sentences)

        except Exception as e:
            logger.error(f"Humanization error: {str(e)}")
            return text

    def detect_ai_text(self, text: str) -> float:
        """Enhanced AI detection with better chunk handling"""
        try:
            if not text.strip():
                return 0.0
                
            chunks = [text[i:i+Config.CHUNK_SIZE] for i in range(0, len(text), Config.CHUNK_SIZE)]
            scores = []
            
            for chunk in chunks:
                if len(chunk.strip()) < 50:  # Skip very short chunks
                    continue
                result = self.detector(chunk)[0]
                if result['label'] == 'ARTIFICIAL':
                    scores.append(result['score'])
            
            return sum(scores)/len(scores) if scores else 0.0
            
        except Exception as e:
            logger.error(f"Detection error: {str(e)}")
            return 0.0

    def humanize_text(self, text: str, tone: str, translate_to: Optional[str] = None) -> str:
        """Improved humanization pipeline with better error handling and quality control"""
        try:
            if not text or len(text) > Config.MAX_TEXT_LENGTH:
                raise ValueError(f"Text must be between 1 and {Config.MAX_TEXT_LENGTH} characters")

            # Track processing metrics
            metrics = {'start_time': time.time()}
            
            original_score = self.detect_ai_text(text)
            logger.info(f"Initial AI score: {original_score:.2f}")

            # Generate humanized text with enhanced parameters
            prompt = random.choice(self.tone_prompts[tone]).format(text=text)
            generated = self.humanizer(
                prompt,
                max_length=min(len(text)*2, 1024),
                temperature=0.9,
                top_p=0.95,
                num_beams=4,
                repetition_penalty=1.2,
                no_repeat_ngram_size=3
            )[0]['generated_text']

            # Multi-pass humanization with quality control
            humanized = self._add_human_touches(generated)
            final_score = self.detect_ai_text(humanized)
            
            # Adaptive humanization based on scores
            if final_score > original_score * 0.8:
                logger.info("Applying additional humanization pass")
                humanized = self._add_human_touches(humanized)
            
            # Translation with error handling
            if translate_to and translate_to != "None":
                try:
                    lang_code = translate_to.split()[0]
                    humanized = self.translator.translate(humanized, lang_code).result
                except Exception as e:
                    logger.error(f"Translation failed: {str(e)}")
                    raise ValueError(f"Translation failed: {str(e)}")

            metrics['processing_time'] = time.time() - metrics['start_time']
            logger.info(f"Processing completed in {metrics['processing_time']:.2f} seconds")
            
            return humanized

        except Exception as e:
            logger.error(f"Humanization failed: {str(e)}")
            raise

def create_interface():
    """Create Gradio interface with improved error handling and user experience"""
    try:
        humanizer = TextHumanizer()
        setup_nltk()
        
        def process_text(text: str, tone: str, translate_to: str) -> Dict:
            try:
                if not text.strip():
                    return {
                        "data": ["Please enter some text to process"],
                        "success": False,
                        "error": "Empty input"
                    }
                
                start_time = time.time()
                result = humanizer.humanize_text(text, tone, translate_to)
                processing_time = time.time() - start_time
                
                return {
                    "data": [result],
                    "success": True,
                    "metrics": {
                        "processing_time": round(processing_time, 2),
                        "characters_processed": len(text),
                        "words_processed": len(text.split())
                    }
                }
            except Exception as e:
                logger.error(f"Text processing failed: {str(e)}")
                return {
                    "data": [],
                    "success": False,
                    "error": str(e)
                }
        
        iface = gr.Interface(
            fn=process_text,
            inputs=[
                gr.Textbox(
                    label="Input Text",
                    lines=5,
                    placeholder="Enter text to humanize..."
                ),
                gr.Dropdown(
                    choices=list(humanizer.tone_prompts.keys()),
                    label="Writing Style",
                    value="Casual"
                ),
                gr.Dropdown(
                    choices=["None"] + [f"{c} ({n})" for c, n in [
                        ("da", "Danish"), ("no", "Norwegian"),
                        ("sv", "Swedish"), ("es", "Spanish"),
                        ("fr", "French"), ("de", "German")
                    ]],
                    label="Translate to",
                    value="None"
                )
            ],
            outputs=gr.JSON(),
            title="Advanced AI Text Humanizer",
            description="Transform AI-generated text into more natural, human-like writing",
            examples=[
                ["Large language models demonstrate remarkable capabilities in natural language understanding tasks.", "Casual", "None"],
                ["The implementation requires careful consideration of multiple interdependent factors.", "Business", "es (Spanish)"]
            ],
            flagging_mode=None
        )
        
        iface.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=True
        )
        
    except Exception as e:
        logger.error(f"Interface creation failed: {str(e)}")
        raise

if __name__ == "__main__":
    create_interface()