import os
import gradio as gr
from transformers import pipeline
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from spellchecker import SpellChecker
import re
import string
import random

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')

# Initialize stopwords
stop_words = set(stopwords.words("english"))

# Words we don't want to replace
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}

# Initialize the English text classification pipeline for AI detection
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

# Initialize the spell checker
spell = SpellChecker()

# Ensure the SpaCy model is installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Exclude tags and words (adjusted for better precision)
exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}

def get_synonyms(word):
    """Find synonyms for a given word considering the context."""
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
                synonyms.add(lemma.name())
    return synonyms

def replace_with_synonyms(word, pos_tag):
    """Replace words with synonyms, keeping the original POS tag."""
    synonyms = get_synonyms(word)
    # Filter by POS tag
    filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag]
    if filtered_synonyms:
        return random.choice(filtered_synonyms)
    return word

def improve_paraphrasing_and_grammar(text):
    """Paraphrase and correct grammatical errors in the text."""
    doc = nlp(text)
    corrected_text = []

    for sent in doc.sents:
        sentence = []
        for token in sent:
            # Replace words with synonyms, excluding special POS tags
            if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation:
                synonym = replace_with_synonyms(token.text, token.tag_)
                sentence.append(synonym if synonym else token.text)
            else:
                sentence.append(token.text)

        corrected_text.append(' '.join(sentence))
    
    # Ensure proper punctuation and capitalization
    final_text = ' '.join(corrected_text)
    final_text = fix_possessives(final_text)
    final_text = fix_punctuation_spacing(final_text)
    final_text = capitalize_sentences(final_text)
    final_text = fix_article_errors(final_text)
    
    return final_text

def fix_punctuation_spacing(text):
    """Fix spaces before punctuation marks."""
    text = re.sub(r'\s+([,.!?])', r'\1', text)
    return text

def fix_possessives(text):
    """Correct possessives like 'John ' s' -> 'John's'."""
    return re.sub(r"(\w)\s?'\s?s", r"\1's", text)

def capitalize_sentences(text):
    """Capitalize the first letter of each sentence."""
    return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)])

def fix_article_errors(text):
    """Correct 'a' and 'an' usage based on following word's sound."""
    doc = nlp(text)
    corrected = []
    for token in doc:
        if token.text in ('a', 'an'):
            next_token = token.nbor(1)
            if token.text == "a" and next_token.text[0].lower() in "aeiou":
                corrected.append("an")
            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
                corrected.append("a")
            else:
                corrected.append(token.text)
        else:
            corrected.append(token.text)
    return ' '.join(corrected)

# Gradio app setup
def gradio_interface(text):
    """Gradio interface function to process the input text."""
    return improve_paraphrasing_and_grammar(text)

with gr.Blocks() as demo:
    gr.Markdown("## Text Paraphrasing and Grammar Correction")
    text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction')
    text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False)
    submit_button = gr.Button("🔄 Paraphrase and Correct")
    
    submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output)

# Launch the Gradio app
demo.launch(share=True)