Spaces:

Zaherrr
/

Translation_model_demo

Sleeping

File size: 8,710 Bytes

import gradio as gr
import os 
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

import sys
from tensorflow.keras import preprocessing
sys.modules['keras.src.preprocessing'] = preprocessing
from tensorflow import keras
sys.modules['keras'] = keras

from huggingface_hub import HfApi

# Set your Hugging Face API token in the settings of this space as a secret variable

# Authenticate using HfApi
# api = HfApi()
# api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))

from huggingface_hub import login
login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))

# ---------------------------------------------------------------------------------------------------------------------------------------
# Loading the translation model and english and french tokenizers

with open('english_tokenizer.pickle', 'rb') as handle:
    english_tokenizer = pickle.load(handle)

with open('french_tokenizer.pickle', 'rb') as handle:
    french_tokenizer = pickle.load(handle)

translation_model = tf.keras.models.load_model('model2_v2.h5')

# ---------------------------------------------------------------------------------------------------------------------------------------
# Translate sentence function
MAX_LEN_EN = 15
MAX_LEN_FR = 21

VOCAB_SIZE_EN = len(english_tokenizer.word_index)
VOCAB_SIZE_FR = len(french_tokenizer.word_index)

# print(f'MAX_LEN_EN: {MAX_LEN_EN}')
# print(f'MAX_LEN_FR: {MAX_LEN_FR}')
# print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}')
# print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}')

# function implemented earlier, modified it to be used with gradio.
def translate_sentence(sentence, verbose=False):
    # Preprocess the input sentence
    sequence = english_tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post')

    # Initialize the target sequence with the start token
    start_token = VOCAB_SIZE_FR  #344
    target_sequence = np.zeros((1, MAX_LEN_FR))
    target_sequence[0, 0] = start_token

    # Placeholder for the translation
    translation = ''

    # Step-by-step translation
    for i in range(1, MAX_LEN_FR):
        # Predict the next word
        output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose)

        # Get the most likely next word
        sampled_token_index = np.argmax(output_tokens[0, i - 1, :])
        if verbose:
          print(f'sampled_token_index: {sampled_token_index}')
        if sampled_token_index == 0:  # End token
            break
        sampled_word = french_tokenizer.index_word[sampled_token_index]
        if verbose:
          print(f'sampled_word: {sampled_word}')
        # Append the word to the translation
        translation += ' ' + sampled_word

        # Update the target sequence
        target_sequence[0, i] = sampled_token_index

    return translation.strip()

# Example usage:
# english_sentence = "paris is relaxing during december but it is usually chilly in july"
# print(english_sentence)
# translated_sentence = translate_sentence(english_sentence)
# print(translated_sentence)



# ----------------------------------------------------------------------------------------------------------------------------------------
# Gradio app

from datasets import load_dataset, Dataset

# Function to load the dataset from Hugging Face
def load_hf_dataset():
    dataset = load_dataset("Zaherrr/translation_log")
    return dataset["train"]  # Access the dataset without split specification

def update_history_with_status(english, french, history, status):
    history.append((english, french, status))
    history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
    return history_text, history

def revert_last_action(history):
    if history:
        # Revert history
        history.pop()
        
        # Update history block text
        history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
        
        # Revert last row in the dataset
        if row_indices:
            last_index = row_indices.pop()
            # Remove the last row from the dataset
            dataset = load_hf_dataset()
            df = dataset.to_pandas()
            df = df.drop(last_index).reset_index(drop=True)
            updated_dataset = Dataset.from_pandas(df)
            updated_dataset.push_to_hub("Zaherrr/translation_log")
    return history_text, history

# Function to flag data
def flag_action(english, french, corrected_french, flagged_successful, history):
    data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful}
    dataset = load_hf_dataset()
    df = dataset.to_pandas()
    new_df = pd.DataFrame([data])
    df = pd.concat([df, new_df], ignore_index=True)
    updated_dataset = Dataset.from_pandas(df)
    updated_dataset.push_to_hub("Zaherrr/translation_log")
    index = len(df) - 1
    row_indices.append(index)
    return update_history_with_status(english, french, history, "Flagged")

# Function to accept data
def accept_action(english, french, hidden_text, flagged_successful, history):
    data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful}
    dataset = load_hf_dataset()
    df = dataset.to_pandas()
    new_df = pd.DataFrame([data])
    df = pd.concat([df, new_df], ignore_index=True)
    updated_dataset = Dataset.from_pandas(df)
    updated_dataset.push_to_hub("Zaherrr/translation_log")
    index = len(df) - 1
    row_indices.append(index)
    return update_history_with_status(english, french, history, "Accepted")

# Define the Gradio interface
with gr.Blocks(theme='gstaff/sketch') as demo:
    gr.Markdown("<center><h1>Translate English to French</h1></center>")
    with gr.Row():
        with gr.Column():
            english = gr.Textbox(label="English", placeholder="Input English text here")
            Translate_button = gr.Button(value="Translate", variant="primary")
            hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False)
            flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False)
        with gr.Column():
            french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False)
            corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here")
            with gr.Column():
                with gr.Row():
                    accept_button = gr.Button(value="Accept", variant="primary")
                    flag_button = gr.Button(value="Flag", variant="secondary")
                    revert_button = gr.Button(value="Revert", variant="secondary")

    examples = gr.Examples(examples=[
        "paris is relaxing during december but it is usually chilly in july",
        "She is driving the truck"],
        inputs=english)

    gr.Markdown("History:")
    history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50)
    history = gr.State([])

    # Track the row indices in the CSVLogger
    row_indices = []

    gr.on(
        triggers=[english.submit, Translate_button.click],
        fn=translate_sentence,
        inputs=english,
        outputs=[french],
    ).then(
        fn=lambda: gr.Textbox(visible=False),
        inputs=None,
        outputs=flagged_successful,
    )

    gr.on(
        triggers=[flag_button.click],
        fn=lambda: gr.Textbox(value="Flagged", visible=True),
        outputs=flagged_successful,
    ).then(
        fn=flag_action,
        inputs=[english, french, corrected_french, flagged_successful, history],
        outputs=[history_block, history],
    )

    gr.on(
        triggers=[accept_button.click],
        fn=lambda: gr.Textbox(value="Accepted", visible=True),
        outputs=flagged_successful,
        
    ).then(
        fn=accept_action,
        inputs=[english, french, hidden_text, flagged_successful, history],
        outputs=[history_block, history],
    )

    gr.on(
        triggers=[revert_button.click],
        fn=revert_last_action,
        inputs=[history],
        outputs=[history_block, history],
    ).then(
        fn=lambda: gr.Textbox(placeholder="Reverted", visible=True),
        outputs=flagged_successful,
    )

    demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>")