sashtech commited on
Commit
3b5e5a8
·
verified ·
1 Parent(s): 463d2eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -30,7 +30,7 @@ def get_synonyms_nltk(word, pos):
30
  synsets = wordnet.synsets(word, pos=pos)
31
  if synsets:
32
  lemmas = synsets[0].lemmas()
33
- return [lemma.name() for lemma in lemmas]
34
  return []
35
 
36
  # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
@@ -55,14 +55,14 @@ def capitalize_sentences_and_nouns(text):
55
  def correct_tense_errors(text):
56
  doc = nlp(text)
57
  corrected_text = []
 
58
  for token in doc:
59
- # Check for tense correction based on modal verbs
60
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
61
- # Replace with appropriate verb form
62
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
63
- corrected_text.append(lemma)
64
  else:
65
  corrected_text.append(token.text)
 
66
  return ' '.join(corrected_text)
67
 
68
  # Function to correct singular/plural errors (Singular/Plural Correction)
@@ -72,17 +72,15 @@ def correct_singular_plural_errors(text):
72
 
73
  for token in doc:
74
  if token.pos_ == "NOUN":
75
- # Check if the noun is singular or plural
76
  if token.tag_ == "NN": # Singular noun
77
- # Look for determiners like "many" to correct to plural
78
- if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
79
- corrected_text.append(token.lemma_ + 's')
80
  else:
81
  corrected_text.append(token.text)
82
  elif token.tag_ == "NNS": # Plural noun
83
- # Look for determiners like "a", "one" to correct to singular
84
- if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
85
- corrected_text.append(token.lemma_)
86
  else:
87
  corrected_text.append(token.text)
88
  else:
@@ -96,13 +94,16 @@ def correct_singular_plural_errors(text):
96
  def correct_article_errors(text):
97
  doc = nlp(text)
98
  corrected_text = []
99
- for token in doc:
100
- if token.text in ['a', 'an']:
101
- next_token = token.nbor(1)
102
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
103
- corrected_text.append("an")
104
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
105
- corrected_text.append("a")
 
 
 
106
  else:
107
  corrected_text.append(token.text)
108
  else:
@@ -115,28 +116,30 @@ def paraphrase_with_spacy_nltk(text):
115
  paraphrased_words = []
116
 
117
  for token in doc:
118
- # Map SpaCy POS tags to WordNet POS tags
119
  pos = None
120
- if token.pos_ in {"NOUN"}:
121
  pos = wordnet.NOUN
122
- elif token.pos_ in {"VERB"}:
123
  pos = wordnet.VERB
124
- elif token.pos_ in {"ADJ"}:
125
  pos = wordnet.ADJ
126
- elif token.pos_ in {"ADV"}:
127
  pos = wordnet.ADV
128
 
129
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
130
 
131
- # Replace with a synonym only if it makes sense
132
- if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
133
- paraphrased_words.append(synonyms[0])
 
 
 
 
 
134
  else:
135
  paraphrased_words.append(token.text)
136
 
137
- # Join the words back into a sentence
138
  paraphrased_sentence = ' '.join(paraphrased_words)
139
-
140
  return paraphrased_sentence
141
 
142
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
@@ -146,15 +149,11 @@ def paraphrase_and_correct(text):
146
 
147
  # Step 2: Apply grammatical corrections on the paraphrased text
148
  corrected_text = correct_article_errors(paraphrased_text)
149
-
150
  corrected_text = capitalize_sentences_and_nouns(corrected_text)
151
-
152
  corrected_text = correct_singular_plural_errors(corrected_text)
 
153
 
154
- # Step 3: Capitalize sentences and proper nouns (final correction step)
155
- final_text = correct_tense_errors(corrected_text)
156
-
157
- return final_text
158
 
159
  # Gradio app setup with two tabs
160
  with gr.Blocks() as demo:
@@ -162,18 +161,18 @@ with gr.Blocks() as demo:
162
  t1 = gr.Textbox(lines=5, label='Text')
163
  button1 = gr.Button("🤖 Predict!")
164
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
165
- score1 = gr.Textbox(lines=1, label='Prob')
166
-
167
  # Connect the prediction function to the button
168
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
169
 
170
  with gr.Tab("Humanifier"):
171
- text_input = gr.Textbox(lines=5, label="Input Text")
172
  paraphrase_button = gr.Button("Paraphrase & Correct")
173
  output_text = gr.Textbox(label="Paraphrased Text")
174
-
175
  # Connect the paraphrasing function to the button
176
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
177
 
178
  # Launch the app with the remaining functionalities
179
- demo.launch()
 
30
  synsets = wordnet.synsets(word, pos=pos)
31
  if synsets:
32
  lemmas = synsets[0].lemmas()
33
+ return [lemma.name().replace('_', ' ') for lemma in lemmas]
34
  return []
35
 
36
  # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
 
55
  def correct_tense_errors(text):
56
  doc = nlp(text)
57
  corrected_text = []
58
+
59
  for token in doc:
60
+ if token.tag_ in {"VBD", "VBN"} and token.lemma_:
61
+ # Convert past tense verbs to their base form
62
+ corrected_text.append(token.lemma_)
 
 
63
  else:
64
  corrected_text.append(token.text)
65
+
66
  return ' '.join(corrected_text)
67
 
68
  # Function to correct singular/plural errors (Singular/Plural Correction)
 
72
 
73
  for token in doc:
74
  if token.pos_ == "NOUN":
 
75
  if token.tag_ == "NN": # Singular noun
76
+ if any(child.text.lower() in {'many', 'several', 'few', 'a', 'one'} for child in token.head.children):
77
+ corrected_text.append(token.text if token.text.endswith('s') else token.text + 's')
 
78
  else:
79
  corrected_text.append(token.text)
80
  elif token.tag_ == "NNS": # Plural noun
81
+ if any(child.text.lower() in {'a', 'one'} for child in token.head.children):
82
+ singular = token.lemma_
83
+ corrected_text.append(singular)
84
  else:
85
  corrected_text.append(token.text)
86
  else:
 
94
  def correct_article_errors(text):
95
  doc = nlp(text)
96
  corrected_text = []
97
+ tokens = list(doc)
98
+
99
+ for i, token in enumerate(tokens):
100
+ if token.text.lower() in {'a', 'an'}:
101
+ if i + 1 < len(tokens):
102
+ next_token = tokens[i + 1]
103
+ if next_token.text[0].lower() in 'aeiou':
104
+ corrected_text.append('an')
105
+ else:
106
+ corrected_text.append('a')
107
  else:
108
  corrected_text.append(token.text)
109
  else:
 
116
  paraphrased_words = []
117
 
118
  for token in doc:
 
119
  pos = None
120
+ if token.pos_ == "NOUN":
121
  pos = wordnet.NOUN
122
+ elif token.pos_ == "VERB":
123
  pos = wordnet.VERB
124
+ elif token.pos_ == "ADJ":
125
  pos = wordnet.ADJ
126
+ elif token.pos_ == "ADV":
127
  pos = wordnet.ADV
128
 
129
  synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
130
 
131
+ # Replace with a synonym only if it's more common and fits the context
132
+ if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:
133
+ # Avoid replacing with the same word or rare synonyms
134
+ synonym = synonyms[0]
135
+ if synonym != token.text.lower() and len(synonym.split()) == 1:
136
+ paraphrased_words.append(synonym)
137
+ else:
138
+ paraphrased_words.append(token.text)
139
  else:
140
  paraphrased_words.append(token.text)
141
 
 
142
  paraphrased_sentence = ' '.join(paraphrased_words)
 
143
  return paraphrased_sentence
144
 
145
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
 
149
 
150
  # Step 2: Apply grammatical corrections on the paraphrased text
151
  corrected_text = correct_article_errors(paraphrased_text)
 
152
  corrected_text = capitalize_sentences_and_nouns(corrected_text)
 
153
  corrected_text = correct_singular_plural_errors(corrected_text)
154
+ corrected_text = correct_tense_errors(corrected_text)
155
 
156
+ return corrected_text
 
 
 
157
 
158
  # Gradio app setup with two tabs
159
  with gr.Blocks() as demo:
 
161
  t1 = gr.Textbox(lines=5, label='Text')
162
  button1 = gr.Button("🤖 Predict!")
163
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
164
+ score1 = gr.Textbox(lines=1, label='Probability')
165
+
166
  # Connect the prediction function to the button
167
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
168
 
169
  with gr.Tab("Humanifier"):
170
+ text_input = gr.Textbox(lines=10, label="Input Text")
171
  paraphrase_button = gr.Button("Paraphrase & Correct")
172
  output_text = gr.Textbox(label="Paraphrased Text")
173
+
174
  # Connect the paraphrasing function to the button
175
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
176
 
177
  # Launch the app with the remaining functionalities
178
+ demo.launch()