File size: 4,360 Bytes
76f2392
 
 
 
 
 
 
 
51a0881
 
 
de3e0a3
17e7bb2
de3e0a3
 
 
 
76f2392
 
 
b5403e9
 
 
 
 
76f2392
 
 
 
 
 
 
 
b5403e9
76f2392
b5403e9
76f2392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110c73f
76f2392
124ef84
76f2392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5403e9
76f2392
 
ed26cb6
422b355
 
b5403e9
422b355
76f2392
b5403e9
 
9fb571c
76f2392
 
 
 
 
 
 
9fb571c
 
51a0881
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import json
import os
import shutil
import requests
import gradio as gr
from huggingface_hub import Repository, InferenceClient

HF_TOKEN = os.environ.get("HF_TOKEN", None)
USER_NAME = os.environ.get("USER_NAME", None)
APP_PASSWORD = os.environ.get("APP_PASSWORD", None)

# Define the model URL using the model_name
model_url = 'https://api-inference.huggingface.co/models/google/flan-t5-small'

# Create the InferenceClient
client = InferenceClient(model_url, headers={"Authorization": f"Bearer {HF_TOKEN}"})

STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]

EXAMPLES = [
    ["Please explain me about machine learning"],
    ["Do you know about python programming? Please create simple application for me."],
    ["What is the history of AI?"],
    ["Can you tell me more about Data Science?"],
    ["Can you write a short tweet about the release of our latest AI model, Falcon 180B LLM?"]
    ]

def format_prompt(message, history, system_prompt):
  prompt = ""
  if system_prompt:
    prompt += f"System: {system_prompt}\n"
  for user_prompt, bot_response in history:
    prompt += f"User: {user_prompt}\n"
    prompt += f"GuruAI: {bot_response}\n" # Response already contains "GuruAI: "
  prompt += f"""User: {message}
GuruAI:"""
  return prompt

seed = 42

def generate(
    prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    global seed
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        stop_sequences=STOP_SEQUENCES,
        do_sample=True,
        seed=seed,
    )
    seed = seed + 1
    formatted_prompt = format_prompt(prompt, history, system_prompt)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text

        for stop_str in STOP_SEQUENCES:
            if output.endswith(stop_str):
                output = output[:-len(stop_str)]
                output = output.rstrip()
                yield output
        yield output
    return output

additional_inputs=[
    gr.Textbox("", label="Optional system prompt"),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=1088,
        minimum=0,
        maximum=8192,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=0.4):
            gr.Image("https://padek.jawapos.com/wp-content/uploads/2022/10/861213472.jpg", elem_id="banner-image", show_label=False)
        with gr.Column():
            gr.Markdown(
                """
                # GuruAI
                
                This is AI as Teacher, It will teach you about anything.
                
                ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.

                Example:
                Model Name = "tiiuae/falcon-180B-chat"
                """
            )

    gr.ChatInterface(
        generate, 
        examples=EXAMPLES,
        additional_inputs=additional_inputs,
    )

demo.queue(concurrency_count=100, api_open=True).launch(show_api=True, auth=(USER_NAME, APP_PASSWORD))