Zaherrr commited on
Commit
a7b55ec
·
verified ·
1 Parent(s): f9cb70e

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +218 -0
  2. english_tokenizer.pickle +3 -0
  3. french_tokenizer.pickle +3 -0
  4. model2_v2.h5 +3 -0
app.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import pandas as pd
4
+ import numpy as np
5
+ import tensorflow as tf
6
+ from tensorflow.keras.preprocessing.text import Tokenizer
7
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
8
+ import pickle
9
+
10
+ import sys
11
+ from tensorflow.keras import preprocessing
12
+ sys.modules['keras.src.preprocessing'] = preprocessing
13
+ from tensorflow import keras
14
+ sys.modules['keras'] = keras
15
+
16
+ # ---------------------------------------------------------------------------------------------------------------------------------------
17
+ # Loading the translation model and english and french tokenizers
18
+
19
+ with open('english_tokenizer.pickle', 'rb') as handle:
20
+ english_tokenizer = pickle.load(handle)
21
+
22
+ with open('french_tokenizer.pickle', 'rb') as handle:
23
+ french_tokenizer = pickle.load(handle)
24
+
25
+ translation_model = tf.keras.models.load_model('model2_v2.h5')
26
+
27
+ # ---------------------------------------------------------------------------------------------------------------------------------------
28
+ # Translate sentence function
29
+ MAX_LEN_EN = 15
30
+ MAX_LEN_FR = 21
31
+
32
+ VOCAB_SIZE_EN = len(english_tokenizer.word_index)
33
+ VOCAB_SIZE_FR = len(french_tokenizer.word_index)
34
+
35
+ # print(f'MAX_LEN_EN: {MAX_LEN_EN}')
36
+ # print(f'MAX_LEN_FR: {MAX_LEN_FR}')
37
+ # print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}')
38
+ # print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}')
39
+
40
+ # function implemented earlier, modified it to be used with gradio.
41
+ def translate_sentence(sentence, verbose=False):
42
+ # Preprocess the input sentence
43
+ sequence = english_tokenizer.texts_to_sequences([sentence])
44
+ padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post')
45
+
46
+ # Initialize the target sequence with the start token
47
+ start_token = VOCAB_SIZE_FR #344
48
+ target_sequence = np.zeros((1, MAX_LEN_FR))
49
+ target_sequence[0, 0] = start_token
50
+
51
+ # Placeholder for the translation
52
+ translation = ''
53
+
54
+ # Step-by-step translation
55
+ for i in range(1, MAX_LEN_FR):
56
+ # Predict the next word
57
+ output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose)
58
+
59
+ # Get the most likely next word
60
+ sampled_token_index = np.argmax(output_tokens[0, i - 1, :])
61
+ if verbose:
62
+ print(f'sampled_token_index: {sampled_token_index}')
63
+ if sampled_token_index == 0: # End token
64
+ break
65
+ sampled_word = french_tokenizer.index_word[sampled_token_index]
66
+ if verbose:
67
+ print(f'sampled_word: {sampled_word}')
68
+ # Append the word to the translation
69
+ translation += ' ' + sampled_word
70
+
71
+ # Update the target sequence
72
+ target_sequence[0, i] = sampled_token_index
73
+
74
+ return translation.strip()
75
+
76
+ # Example usage:
77
+ # english_sentence = "paris is relaxing during december but it is usually chilly in july"
78
+ # print(english_sentence)
79
+ # translated_sentence = translate_sentence(english_sentence)
80
+ # print(translated_sentence)
81
+
82
+
83
+
84
+ # ----------------------------------------------------------------------------------------------------------------------------------------
85
+ # Gradio app
86
+
87
+ # Function to update the history block with status
88
+ def update_history_with_status(english, french, history, status):
89
+ history.append((english, french, status))
90
+ history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
91
+ return history_text, history
92
+
93
+ def revert_last_action(history):
94
+ if history:
95
+ # Revert history
96
+ history.pop()
97
+
98
+ # Update history block text
99
+ history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
100
+
101
+ # Revert last row in the CSV file
102
+ if row_indices:
103
+ last_index = row_indices.pop()
104
+ # Remove the last row from the CSV file
105
+ csv_file = "flagged_translations/log.csv"
106
+ if os.path.exists(csv_file):
107
+ df = pd.read_csv(csv_file)
108
+ # print('read the csv file')
109
+ df = df.drop(last_index-1).reset_index(drop=True)
110
+ # print('removed the last index')
111
+ df.to_csv(csv_file, index=False)
112
+ # print('dumped the df to csv')
113
+ return history_text, history
114
+
115
+ # CSV Logger for flagging
116
+ flagging_callback = gr.CSVLogger() # logs the flagged data into a csv file
117
+
118
+ # Define the Gradio interface
119
+ with gr.Blocks(theme='gstaff/sketch') as demo:
120
+ gr.Markdown("<center><h1>Translate English to French</h1></center>")
121
+ with gr.Row():
122
+ with gr.Column():
123
+ english = gr.Textbox(label="English", placeholder="Input English text here")
124
+ Translate_button = gr.Button(value="Translate", variant="primary")
125
+ hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False)
126
+ flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False)
127
+ with gr.Column():
128
+ french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False)
129
+ corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here")
130
+ with gr.Column():
131
+ with gr.Row():
132
+ accept_button = gr.Button(value="Accept", variant="primary")
133
+ flag_button = gr.Button(value="Flag", variant="secondary")
134
+ revert_button = gr.Button(value="Revert", variant="secondary")
135
+
136
+ # This needs to be called at some point prior to the first call to flagging.callback.flag()
137
+ # flagging_callback.setup([english, french, corrected_french, "IsFlagged"], "flagged_translations")
138
+ flagging_callback.setup([english, french, corrected_french, flagged_successful], "flagged_translations")
139
+
140
+
141
+ examples = gr.Examples(examples=[
142
+ "paris is relaxing during december but it is usually chilly in july",
143
+ "She is driving the truck"],
144
+ inputs=english)
145
+
146
+ gr.Markdown("History:")
147
+ history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50)
148
+ history = gr.State([])
149
+
150
+ # Track the row indices in the CSVLogger
151
+ row_indices = []
152
+ def flag_action(english, french, corrected_french, flagged_successful, history):
153
+ data = [english, french, corrected_french, flagged_successful]
154
+ # Add the IsFlagged column with value True
155
+ flagged_value = flagged_successful if flagged_successful else "Flagged"
156
+ print(f"Flag Action - flagged_successful: {flagged_value}")
157
+ print(f"flagged_successful object: {flagged_successful}")
158
+
159
+ index = flagging_callback.flag(data)
160
+ row_indices.append(index)
161
+ return update_history_with_status(english, french, history, "Flagged")
162
+
163
+ def accept_action(english, french, hidden_text, flagged_successful, history):
164
+ data = [english, french, hidden_text, flagged_successful]
165
+ # Add the IsFlagged column with value False
166
+ # Extract value from flagged_successful
167
+ flagged_value = flagged_successful if flagged_successful else "Accepted"
168
+ print(f"Accept Action - flagged_successful: {flagged_value}")
169
+ print(f"flagged_successful object: {flagged_successful}")
170
+ index = flagging_callback.flag(data)
171
+ row_indices.append(index)
172
+ return update_history_with_status(english, french, history, "Accepted")
173
+
174
+ gr.on(
175
+ triggers=[english.submit, Translate_button.click],
176
+ fn=translate_sentence,
177
+ inputs=english,
178
+ outputs=[french],
179
+ ).then(
180
+ fn=lambda: gr.Textbox(visible=False),
181
+ inputs=None,
182
+ outputs=flagged_successful,
183
+ )
184
+
185
+ gr.on(
186
+ triggers=[flag_button.click],
187
+ fn=lambda: gr.Textbox(value="Flagged", visible=True),
188
+ outputs=flagged_successful,
189
+ ).then(
190
+ fn=flag_action,
191
+ inputs=[english, french, corrected_french, flagged_successful, history],
192
+ outputs=[history_block, history],
193
+ )
194
+
195
+
196
+ gr.on(
197
+ triggers=[accept_button.click],
198
+ fn=lambda: gr.Textbox(value="Accepted", visible=True),
199
+ outputs=flagged_successful,
200
+
201
+ ).then(
202
+ fn=accept_action,
203
+ inputs=[english, french, hidden_text, flagged_successful, history],
204
+ outputs=[history_block, history],
205
+ )
206
+
207
+ gr.on(
208
+ triggers=[revert_button.click],
209
+ fn=revert_last_action,
210
+ inputs=[history],
211
+ outputs=[history_block, history],
212
+ ).then(
213
+ fn=lambda: gr.Textbox(placeholder="Reverted", visible=True),
214
+ outputs=flagged_successful,
215
+ )
216
+
217
+ demo.launch(share=True, auth=('username', 'Zaka_module7'),
218
+ auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>")#, debug=True)
english_tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7e198626a7c26d022e8db734e247db2b1792ec483ffadd6f4e9977620624c1
3
+ size 6044
french_tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9773dd49cef95f1a2eb08c37ddaae08fdd0ba13a31b077deb7c5934ffbc2bae7
3
+ size 11453
model2_v2.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3f9304c423113437cb1a22a5f73ae99db50557f1ac079befd949fade2dfe323
3
+ size 23313472