Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import T5ForConditionalGeneration, T5Tokenizer | |
from spellchecker import SpellChecker | |
import torch | |
# Load model and tokenizer | |
def load_model(): | |
model_name = "vennify/t5-base-grammar-correction" | |
tokenizer = T5Tokenizer.from_pretrained(model_name) | |
model = T5ForConditionalGeneration.from_pretrained(model_name) | |
return tokenizer, model | |
tokenizer, model = load_model() | |
# Step 1: Spell Correction | |
def correct_spelling(text): | |
spell = SpellChecker() | |
words = text.split() | |
corrected = [] | |
for word in words: | |
if word.isalpha(): | |
corrected.append(spell.correction(word) or word) | |
else: | |
# Handle punctuation-attached words | |
stripped = ''.join(filter(str.isalpha, word)) | |
corrected_word = spell.correction(stripped) if stripped else word | |
corrected.append(corrected_word + ''.join(filter(lambda c: not c.isalpha(), word))) | |
return ' '.join(corrected) | |
# Step 2: Grammar Correction using model | |
def correct_grammar(text): | |
input_text = "gec: " + text | |
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) | |
outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True) | |
corrected = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return corrected | |
# Streamlit UI | |
st.title("π Advanced Grammar & Spelling Correction Assistant") | |
st.write("Fixes spelling issues first, then corrects grammar while keeping the meaning intact.") | |
user_input = st.text_area("Enter your sentence:", height=150) | |
if st.button("Correct & Explain"): | |
if not user_input.strip(): | |
st.warning("Please enter a sentence.") | |
else: | |
step1 = correct_spelling(user_input) | |
corrected = correct_grammar(step1) | |
st.markdown("### β Correction:") | |
st.success(corrected) | |
st.markdown("### π Explanation:") | |
st.info(f""" | |
*Original:* {user_input} | |
*After Spellcheck:* {step1} | |
*Final Grammar Fix:* {corrected} | |
**Explanation:** | |
- Typos like `ober`, `laZy`, and `dogz#` were detected and fixed. | |
- Then grammar structure and capitalization were adjusted. | |
- This two-step method avoids changing the sentence meaning. | |
""") | |