File size: 4,585 Bytes
94d7cca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e953af3
94d7cca
 
 
 
 
 
 
 
 
 
 
 
 
e953af3
94d7cca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the models and tokenizers
tokenizer1 = AutoTokenizer.from_pretrained("JasperV13/Fhamator-30000")
model1 = AutoModelForCausalLM.from_pretrained("JasperV13/Fhamator-30000")

tokenizer2 = AutoTokenizer.from_pretrained("JasperV13/Fhamator-SFT")
model2 = AutoModelForCausalLM.from_pretrained("JasperV13/Fhamator-SFT")

def generate_text_fhamator(input_text, max_length, num_return_sequences, no_repeat_ngram_size, top_k, top_p, temperature):
    input_ids = tokenizer1.encode(input_text, return_tensors='pt')
    output = model1.generate(
        input_ids,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        no_repeat_ngram_size=no_repeat_ngram_size,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        do_sample=True
    )
    generated_texts = [tokenizer1.decode(output[i], skip_special_tokens=True) for i in range(num_return_sequences)]
    return "\n\n".join(generated_texts)

def generate_text_sft(input_text, max_length, num_return_sequences, no_repeat_ngram_size, top_k, top_p, temperature):
    inputs = tokenizer2(input_text, return_tensors="pt")
    output = model2.generate(
        inputs['input_ids'],
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        no_repeat_ngram_size=no_repeat_ngram_size,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        do_sample=True
    )
    generated_texts = [tokenizer2.decode(output[i], skip_special_tokens=True) for i in range(num_return_sequences)]
    return "\n\n".join(generated_texts)

with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown("## 🤖✨ Fhamator: 3ata-Ja !")
    
    with gr.Tab("📖 Explanation"):
      gr.Markdown("""
      # 📚 Explanation of the Work Done
      Welcome to the **Fhamator** application! This app consists of three main tabs, each with a unique purpose:
    
      1. **🔍 Explanation**: Provides an overview of what this app does and how it works.
      2. **🧠 Fhamator Model**: Test the base 'Fhamator-30000' language model and tweak its hyperparameters to see how it performs.
        3. **🚀 Fhamator-SFT Model**: Experiment with the fine-tuned 'Fhamator-SFT' model, designed for more specific tasks, also with customizable hyperparameters.
    """)
    
    with gr.Tab("Test Fhamator-30000 Model"):
        with gr.Group():
            input_text = gr.Textbox(label="Input Prompt", value="أعلنت السلطات ")
            max_length = gr.Slider(50, 200, value=100, step=1, label="Max Length")
            num_return_sequences = gr.Slider(1, 5, value=3, step=1, label="Number of Sequences")
            no_repeat_ngram_size = gr.Slider(1, 5, value=2, step=1, label="No Repeat N-Gram Size")
            top_k = gr.Slider(1, 100, value=50, step=1, label="Top K")
            top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Top P")
            temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
            output_text = gr.Textbox(label="Generated Texts", lines=8)
            generate_btn = gr.Button("Generate")
        
        generate_btn.click(
            generate_text_fhamator,
            inputs=[input_text, max_length, num_return_sequences, no_repeat_ngram_size, top_k, top_p, temperature],
            outputs=output_text
        )
    
    with gr.Tab("Test Fhamator-SFT Model"):
        with gr.Group():
            input_text_sft = gr.Textbox(label="Instruction", value="السؤال :س - التاريخ المغربي؟\n\n: الجواب\n")
            max_length = gr.Slider(50, 200, value=60, step=1, label="Max Length")
            num_return_sequences = gr.Slider(1, 5, value=5, step=1, label="Number of Sequences")
            no_repeat_ngram_size = gr.Slider(1, 5, value=2, step=1, label="No Repeat N-Gram Size")
            top_k_sft = gr.Slider(1, 100, value=50, step=1, label="Top K")
            top_p_sft = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Top P")
            temperature_sft = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
            output_text_sft = gr.Textbox(label="Generated Texts", lines=8)
            generate_btn_sft = gr.Button("Generate")
        
        generate_btn_sft.click(
            generate_text_sft,
            inputs=[input_text_sft, max_length, num_return_sequences, no_repeat_ngram_size, top_k_sft, top_p_sft, temperature_sft],
            outputs=output_text_sft
        )

demo.launch(debug = True)