File size: 2,529 Bytes
da53684
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import spacy
from spacy import displacy
from transformers import T5ForConditionalGeneration, T5Tokenizer
import gradio as gr
import torch
import difflib
import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize


nlp = spacy.load("en_test_L1_model")
model = T5ForConditionalGeneration.from_pretrained("Unbabel/gec-t5_small")
tokenizer = T5Tokenizer.from_pretrained('t5-small')


def text_analysis(text):
    sentences = sent_tokenize(text)
    processed_sentences = []
    highlighted_sentences = []
    for sentence in sentences:
        doc = nlp(sentence)
        html_highlight = displacy.render(doc, style="span", options = {"compact": True})
        html_highlight = (
            "<div style='max-width:100%; max-height:360px; overflow:auto'>"
            + html_highlight
            + "</div>"
        )
        processed_sentences.append(html_highlight)
        inputs = tokenizer("gec: " + sentence, return_tensors="pt")
        with torch.no_grad():
            outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
        corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
        diff = difflib.ndiff(sentence.split(), corrected_sentence.split())
        highlighted_output = ""
        for word in diff:
            if word.startswith("+ "):
                highlighted_output += f"<span style='color: green; font-weight: bold;'>{word[2:]}</span> "
            elif word.startswith("- "):
                highlighted_output += f"<span style='color: red; text-decoration: line-through;'>{word[2:]}</span> "
            else:
                highlighted_output += word[2:] + " "
        highlighted_sentences.append(f"<p><b>Corrected:</b> {highlighted_output}</p>")
    return "<hr>".join(processed_sentences) + "<hr>", "<hr>".join(highlighted_sentences)
        

demo = gr.Interface(
    text_analysis,
    gr.Textbox(placeholder="Enter sentence here..."),
    ["html", "html"],
    examples=[
        ["Then there was a sharp decrease so by 2013 the worldwide outlay accounted for 214 billions. Moreother there is a huge difference between part of 60+ years people. It is clearly seen that in Yemen the share of children before 14 years tend to become less - from 50,1% in 2000 to 37% in 2050."],
        ["In post - school 70 percent were the same men a postgraduate diploma and women undergraduate diploma. Parents can try to know friends of their child, so they will know what they are doing and who they are."],
    ],
)

demo.launch()