Spaces:
Running
Running
import streamlit as st | |
from transformers import T5ForConditionalGeneration, T5Tokenizer | |
from spellchecker import SpellChecker | |
import re | |
import torch | |
# Load model and tokenizer | |
def load_model(): | |
model_name = "vennify/t5-base-grammar-correction" | |
tokenizer = T5Tokenizer.from_pretrained(model_name) | |
model = T5ForConditionalGeneration.from_pretrained(model_name) | |
return tokenizer, model | |
tokenizer, model = load_model() | |
# Step 0: Preprocess the input | |
def preprocess_input(text): | |
# Remove special characters like '#' from the end | |
cleaned = re.sub(r'[^\w\s]$', '', text.strip()) | |
# Ensure sentence ends with a period if not already | |
if not cleaned.endswith('.'): | |
cleaned += '.' | |
return cleaned | |
# Step 1: Spelling correction | |
def correct_spelling(text): | |
spell = SpellChecker() | |
words = re.findall(r'\b\w+\b|\S', text) | |
corrected_words = [] | |
for word in words: | |
clean_word = re.sub(r'[^\w\s]', '', word) | |
if clean_word.isalpha(): | |
corrected_word = spell.correction(clean_word.lower()) or clean_word | |
trailing = ''.join(re.findall(r'[^\w\s]', word)) | |
corrected_words.append(corrected_word + trailing) | |
else: | |
corrected_words.append(word) | |
return ' '.join(corrected_words) | |
# Step 2: Grammar correction | |
def correct_grammar(text): | |
input_text = "gec: " + text | |
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) | |
outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True) | |
corrected = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return corrected | |
# Streamlit UI | |
st.set_page_config(page_title="Grammar & Spelling Assistant", page_icon="π§ ") | |
st.title("π§ Grammar & Spelling Correction Assistant") | |
st.write("Fixes grammar and spelling errors without changing your original meaning.") | |
user_input = st.text_area("βοΈ Enter your sentence:", height=150) | |
if st.button("Correct & Explain"): | |
if not user_input.strip(): | |
st.warning("Please enter a sentence.") | |
else: | |
# Step 0: Preprocess | |
preprocessed = preprocess_input(user_input) | |
# Step 1: Spell check | |
spelling_fixed = correct_spelling(preprocessed) | |
# Step 2: Grammar correction | |
final_output = correct_grammar(spelling_fixed) | |
# Output | |
st.markdown("### β Final Correction:") | |
st.success(final_output) | |
st.markdown("### π Explanation:") | |
st.info(f""" | |
**Original Sentence:** | |
{user_input} | |
**After Preprocessing (remove #, enforce period):** | |
{preprocessed} | |
**After Spelling Correction:** | |
{spelling_fixed} | |
**After Grammar Correction:** | |
{final_output} | |
**Explanation:** | |
- Special characters like `#` were removed | |
- Misspelled words like `ober` β `over`, `dogz` β `dogs` were fixed | |
- Grammar (capitalization, punctuation) was corrected | |
- No unwanted words like `#5` were added | |
""") | |