import gradio as gr import torch import asyncio from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Load model and tokenizer model_name = "hassaanik/grammar-correction-model" tokenizer = AutoTokenizer.from_pretrained(model_name) # Use GPU if available, otherwise fall back to CPU device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device) # Use FP16 for faster inference on GPU if torch.cuda.is_available(): model.half() # Async grammar correction function async def correct_grammar_async(text): # Tokenize input and move it to the correct device (CPU/GPU) inputs = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True).to(device) # Asynchronous operation to run grammar correction outputs = await asyncio.to_thread(model.generate, inputs, max_length=512, num_beams=5, early_stopping=True) # Decode output and return corrected text corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return corrected_text # Gradio interface function to handle input and output def correct_grammar_interface(text): corrected_text = asyncio.run(correct_grammar_async(text)) return corrected_text # Create Gradio Interface with gr.Blocks() as grammar_app: gr.Markdown("