Spaces:

ArmelRandy
/

MT

Runtime error

File size: 3,572 Bytes

import os
import json
import shutil
import gradio as gr
from datasets import load_dataset
from huggingface_hub import upload_file
from io import StringIO
import pandas as pd
import datetime

HF_TOKEN = os.environ.get("HF_TOKEN", None)
DIALOGUES_DATASET = "ArmelRandy/MT_dialogues"

def load_data():
    dataset = load_dataset("ArmelR/oasst1_guanaco_english", use_auth_token=HF_TOKEN)
    return dataset

    
samples = load_data()
splits = list(samples.keys())
languages = ["Wolof"]
print(f"current directory {os.getcwd()}")
print(f"total path {os.path.dirname(os.path.realpath(__file__))}")

custom_css = """
#banner-image {
    display: block;
    margin-left: auto;
    margin-right: auto;
}
#chat-message {
    font-size: 14px;
    min-height: 300px;
}
"""
def caller_split(s):
    return 0, samples[s][0]["prompt"], samples[s][0]["completion"]

def identity(index, split):
    ds = samples[split][index]
    return ds["prompt"], ds["completion"]
    
def save(index, language, split, prompt, completion):
    buffer = StringIO()
    now = datetime.datetime.now()
    timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
    file_name = f"prompts_{timestamp}.jsonl"
    if len(prompt) != 0 and len(completion) != 0 :
        print("Saving ...")
        data = {"prompt": prompt, "completion": completion, "language": language, "index": index}
        pd.DataFrame([data]).to_json(buffer, orient="records", lines=True)
        # Push to Hub
        upload_file(
            path_in_repo=f"{now.date()}/{now.hour}/{file_name}",
            path_or_fileobj=buffer.getvalue().encode(),
            repo_id=DIALOGUES_DATASET,
            token=HF_TOKEN,
            repo_type="dataset",
        )
    
        # Clean and rerun
        buffer.close()
        next_index = min(1+index, len(samples[split])-1)
        return next_index, samples[split][next_index]["prompt"], samples[split][next_index]["completion"], "", ""
    else :
        return index, samples[split][index]["prompt"], samples[split][index]["completion"], "", ""

with gr.Blocks(analytics_enabled=False, css=custom_css) as demo:
    gr.HTML("""<h1 align="center">MT💫</h1>""")
    # gr.Markdown("""""")
    with gr.Blocks():
        with gr.Row() : 
            split = gr.Dropdown(choices=splits, label="Dataset split", value=splits[0])
        with gr.Row() :
            index_example = gr.Slider(minimum=0, maximum=10000, step=1, value=0, interactive=True, info=f"Index of the chosen instruction-output pair.")
        with gr.Row() :
            with gr.Column():
                prompt = gr.Textbox(label="prompt")
            with gr.Column():
                completion = gr.Code(label="Completion")
    with gr.Blocks():
        with gr.Row() :
            language = gr.Dropdown(choices=languages, label="Translation language", value=languages[0])
        with gr.Row() :
            with gr.Column() :
                translated_prompt = gr.Textbox(label="Translated prompt")
            with gr.Column() :
                translated_completion = gr.Textbox(label="Translated completion")
        with gr.Row() :
            button = gr.Button(value="Submit")
    
    split.change(caller_split, inputs=[split], outputs=[index_example, prompt, completion])
    index_example.release(identity, inputs=[index_example, split], outputs=[prompt, completion])
    button.click(save, inputs=[index_example, language, split, translated_prompt, translated_completion], outputs=[index_example, prompt, completion, translated_prompt, translated_completion])
    
demo.launch(debug=True)