Spaces:
Running
Running
File size: 2,991 Bytes
f8049b3 b1e4b23 f8049b3 b1e4b23 f8049b3 b1e4b23 f8049b3 c537159 b1e4b23 eb6dcce b1e4b23 4802f7f b1e4b23 bff2055 c537159 b1e4b23 f8049b3 c537159 b1e4b23 c537159 f8049b3 c537159 f8049b3 b1e4b23 c537159 4802f7f b1e4b23 f8049b3 c537159 b1e4b23 f8049b3 c537159 b1e4b23 eb6dcce c537159 b1e4b23 eb6dcce b1e4b23 f8049b3 b1e4b23 c537159 b1e4b23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer
from spellchecker import SpellChecker
import re
import torch
# Load model and tokenizer
@st.cache_resource
def load_model():
model_name = "vennify/t5-base-grammar-correction"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
return tokenizer, model
tokenizer, model = load_model()
# Step 0: Preprocess the input
def preprocess_input(text):
# Remove special characters like '#' from the end
cleaned = re.sub(r'[^\w\s]$', '', text.strip())
# Ensure sentence ends with a period if not already
if not cleaned.endswith('.'):
cleaned += '.'
return cleaned
# Step 1: Spelling correction
def correct_spelling(text):
spell = SpellChecker()
words = re.findall(r'\b\w+\b|\S', text)
corrected_words = []
for word in words:
clean_word = re.sub(r'[^\w\s]', '', word)
if clean_word.isalpha():
corrected_word = spell.correction(clean_word.lower()) or clean_word
trailing = ''.join(re.findall(r'[^\w\s]', word))
corrected_words.append(corrected_word + trailing)
else:
corrected_words.append(word)
return ' '.join(corrected_words)
# Step 2: Grammar correction
def correct_grammar(text):
input_text = "gec: " + text
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True)
corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
return corrected
# Streamlit UI
st.set_page_config(page_title="Grammar & Spelling Assistant", page_icon="π§ ")
st.title("π§ Grammar & Spelling Correction Assistant")
st.write("Fixes grammar and spelling errors without changing your original meaning.")
user_input = st.text_area("βοΈ Enter your sentence:", height=150)
if st.button("Correct & Explain"):
if not user_input.strip():
st.warning("Please enter a sentence.")
else:
# Step 0: Preprocess
preprocessed = preprocess_input(user_input)
# Step 1: Spell check
spelling_fixed = correct_spelling(preprocessed)
# Step 2: Grammar correction
final_output = correct_grammar(spelling_fixed)
# Output
st.markdown("### β
Final Correction:")
st.success(final_output)
st.markdown("### π Explanation:")
st.info(f"""
**Original Sentence:**
{user_input}
**After Preprocessing (remove #, enforce period):**
{preprocessed}
**After Spelling Correction:**
{spelling_fixed}
**After Grammar Correction:**
{final_output}
**Explanation:**
- Special characters like `#` were removed
- Misspelled words like `ober` β `over`, `dogz` β `dogs` were fixed
- Grammar (capitalization, punctuation) was corrected
- No unwanted words like `#5` were added
""")
|