import os import gradio as gr from transformers import pipeline import spacy import subprocess import nltk from nltk.corpus import wordnet from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from spellchecker import SpellChecker import re import string import random # Download necessary NLTK data nltk.download('punkt') nltk.download('stopwords') nltk.download('averaged_perceptron_tagger') nltk.download('averaged_perceptron_tagger_eng') nltk.download('wordnet') nltk.download('omw-1.4') nltk.download('punkt_tab') # Initialize stopwords stop_words = set(stopwords.words("english")) # Words we don't want to replace exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'} exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'} # Initialize the English text classification pipeline for AI detection pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta") # Initialize the spell checker spell = SpellChecker() # Ensure the SpaCy model is installed try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Load SpaCy model nlp = spacy.load("en_core_web_sm") # Exclude tags and words (adjusted for better precision) exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'} exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'} def get_synonyms(word): """Find synonyms for a given word considering the context.""" synonyms = set() for syn in wordnet.synsets(word): for lemma in syn.lemmas(): if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower(): synonyms.add(lemma.name()) return synonyms def replace_with_synonyms(word, pos_tag): """Replace words with synonyms, keeping the original POS tag.""" synonyms = get_synonyms(word) # Filter by POS tag filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag] if filtered_synonyms: return random.choice(filtered_synonyms) return word def improve_paraphrasing_and_grammar(text): """Paraphrase and correct grammatical errors in the text.""" doc = nlp(text) corrected_text = [] for sent in doc.sents: sentence = [] for token in sent: # Replace words with synonyms, excluding special POS tags if token.tag_ not in exclude_tags and token.text.lower() not in exclude_words and token.text not in string.punctuation: synonym = replace_with_synonyms(token.text, token.tag_) sentence.append(synonym if synonym else token.text) else: sentence.append(token.text) corrected_text.append(' '.join(sentence)) # Ensure proper punctuation and capitalization final_text = ' '.join(corrected_text) final_text = fix_possessives(final_text) final_text = fix_punctuation_spacing(final_text) final_text = capitalize_sentences(final_text) final_text = fix_article_errors(final_text) return final_text def fix_punctuation_spacing(text): """Fix spaces before punctuation marks.""" text = re.sub(r'\s+([,.!?])', r'\1', text) return text def fix_possessives(text): """Correct possessives like 'John ' s' -> 'John's'.""" return re.sub(r"(\w)\s?'\s?s", r"\1's", text) def capitalize_sentences(text): """Capitalize the first letter of each sentence.""" return '. '.join([s.capitalize() for s in re.split(r'(?<=\w[.!?])\s+', text)]) def fix_article_errors(text): """Correct 'a' and 'an' usage based on following word's sound.""" doc = nlp(text) corrected = [] for token in doc: if token.text in ('a', 'an'): next_token = token.nbor(1) if token.text == "a" and next_token.text[0].lower() in "aeiou": corrected.append("an") elif token.text == "an" and next_token.text[0].lower() not in "aeiou": corrected.append("a") else: corrected.append(token.text) else: corrected.append(token.text) return ' '.join(corrected) # Gradio app setup def gradio_interface(text): """Gradio interface function to process the input text.""" return improve_paraphrasing_and_grammar(text) with gr.Blocks() as demo: gr.Markdown("## Text Paraphrasing and Grammar Correction") text_input = gr.Textbox(lines=10, label='Enter text for paraphrasing and grammar correction') text_output = gr.Textbox(lines=10, label='Corrected Text', interactive=False) submit_button = gr.Button("🔄 Paraphrase and Correct") submit_button.click(fn=gradio_interface, inputs=text_input, outputs=text_output) # Launch the Gradio app demo.launch(share=True)