Spaces:
Sleeping
Sleeping
File size: 8,710 Bytes
a7b55ec 1368f1a d61a403 1368f1a d61a403 1368f1a a7b55ec c11bc92 4fd611d c11bc92 4fd611d 8916d3c c11bc92 4fd611d a7b55ec 4fd611d a7b55ec 4fd611d 8916d3c a7b55ec 4fd611d 3822e44 4fd611d 8916d3c 4fd611d 3822e44 4fd611d 8916d3c 4fd611d a7b55ec cd90af5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import gradio as gr
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import sys
from tensorflow.keras import preprocessing
sys.modules['keras.src.preprocessing'] = preprocessing
from tensorflow import keras
sys.modules['keras'] = keras
from huggingface_hub import HfApi
# Set your Hugging Face API token in the settings of this space as a secret variable
# Authenticate using HfApi
# api = HfApi()
# api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
from huggingface_hub import login
login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
# ---------------------------------------------------------------------------------------------------------------------------------------
# Loading the translation model and english and french tokenizers
with open('english_tokenizer.pickle', 'rb') as handle:
english_tokenizer = pickle.load(handle)
with open('french_tokenizer.pickle', 'rb') as handle:
french_tokenizer = pickle.load(handle)
translation_model = tf.keras.models.load_model('model2_v2.h5')
# ---------------------------------------------------------------------------------------------------------------------------------------
# Translate sentence function
MAX_LEN_EN = 15
MAX_LEN_FR = 21
VOCAB_SIZE_EN = len(english_tokenizer.word_index)
VOCAB_SIZE_FR = len(french_tokenizer.word_index)
# print(f'MAX_LEN_EN: {MAX_LEN_EN}')
# print(f'MAX_LEN_FR: {MAX_LEN_FR}')
# print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}')
# print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}')
# function implemented earlier, modified it to be used with gradio.
def translate_sentence(sentence, verbose=False):
# Preprocess the input sentence
sequence = english_tokenizer.texts_to_sequences([sentence])
padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post')
# Initialize the target sequence with the start token
start_token = VOCAB_SIZE_FR #344
target_sequence = np.zeros((1, MAX_LEN_FR))
target_sequence[0, 0] = start_token
# Placeholder for the translation
translation = ''
# Step-by-step translation
for i in range(1, MAX_LEN_FR):
# Predict the next word
output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose)
# Get the most likely next word
sampled_token_index = np.argmax(output_tokens[0, i - 1, :])
if verbose:
print(f'sampled_token_index: {sampled_token_index}')
if sampled_token_index == 0: # End token
break
sampled_word = french_tokenizer.index_word[sampled_token_index]
if verbose:
print(f'sampled_word: {sampled_word}')
# Append the word to the translation
translation += ' ' + sampled_word
# Update the target sequence
target_sequence[0, i] = sampled_token_index
return translation.strip()
# Example usage:
# english_sentence = "paris is relaxing during december but it is usually chilly in july"
# print(english_sentence)
# translated_sentence = translate_sentence(english_sentence)
# print(translated_sentence)
# ----------------------------------------------------------------------------------------------------------------------------------------
# Gradio app
from datasets import load_dataset, Dataset
# Function to load the dataset from Hugging Face
def load_hf_dataset():
dataset = load_dataset("Zaherrr/translation_log")
return dataset["train"] # Access the dataset without split specification
def update_history_with_status(english, french, history, status):
history.append((english, french, status))
history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
return history_text, history
def revert_last_action(history):
if history:
# Revert history
history.pop()
# Update history block text
history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
# Revert last row in the dataset
if row_indices:
last_index = row_indices.pop()
# Remove the last row from the dataset
dataset = load_hf_dataset()
df = dataset.to_pandas()
df = df.drop(last_index).reset_index(drop=True)
updated_dataset = Dataset.from_pandas(df)
updated_dataset.push_to_hub("Zaherrr/translation_log")
return history_text, history
# Function to flag data
def flag_action(english, french, corrected_french, flagged_successful, history):
data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful}
dataset = load_hf_dataset()
df = dataset.to_pandas()
new_df = pd.DataFrame([data])
df = pd.concat([df, new_df], ignore_index=True)
updated_dataset = Dataset.from_pandas(df)
updated_dataset.push_to_hub("Zaherrr/translation_log")
index = len(df) - 1
row_indices.append(index)
return update_history_with_status(english, french, history, "Flagged")
# Function to accept data
def accept_action(english, french, hidden_text, flagged_successful, history):
data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful}
dataset = load_hf_dataset()
df = dataset.to_pandas()
new_df = pd.DataFrame([data])
df = pd.concat([df, new_df], ignore_index=True)
updated_dataset = Dataset.from_pandas(df)
updated_dataset.push_to_hub("Zaherrr/translation_log")
index = len(df) - 1
row_indices.append(index)
return update_history_with_status(english, french, history, "Accepted")
# Define the Gradio interface
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("<center><h1>Translate English to French</h1></center>")
with gr.Row():
with gr.Column():
english = gr.Textbox(label="English", placeholder="Input English text here")
Translate_button = gr.Button(value="Translate", variant="primary")
hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False)
flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False)
with gr.Column():
french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False)
corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here")
with gr.Column():
with gr.Row():
accept_button = gr.Button(value="Accept", variant="primary")
flag_button = gr.Button(value="Flag", variant="secondary")
revert_button = gr.Button(value="Revert", variant="secondary")
examples = gr.Examples(examples=[
"paris is relaxing during december but it is usually chilly in july",
"She is driving the truck"],
inputs=english)
gr.Markdown("History:")
history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50)
history = gr.State([])
# Track the row indices in the CSVLogger
row_indices = []
gr.on(
triggers=[english.submit, Translate_button.click],
fn=translate_sentence,
inputs=english,
outputs=[french],
).then(
fn=lambda: gr.Textbox(visible=False),
inputs=None,
outputs=flagged_successful,
)
gr.on(
triggers=[flag_button.click],
fn=lambda: gr.Textbox(value="Flagged", visible=True),
outputs=flagged_successful,
).then(
fn=flag_action,
inputs=[english, french, corrected_french, flagged_successful, history],
outputs=[history_block, history],
)
gr.on(
triggers=[accept_button.click],
fn=lambda: gr.Textbox(value="Accepted", visible=True),
outputs=flagged_successful,
).then(
fn=accept_action,
inputs=[english, french, hidden_text, flagged_successful, history],
outputs=[history_block, history],
)
gr.on(
triggers=[revert_button.click],
fn=revert_last_action,
inputs=[history],
outputs=[history_block, history],
).then(
fn=lambda: gr.Textbox(placeholder="Reverted", visible=True),
outputs=flagged_successful,
)
demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>") |