Spaces:

itsanurag
/

RAG_FRIDAY_mark3

Sleeping

File size: 3,313 Bytes

9ea9df4
 
4846a19
96f6bc4
9ea9df4
a0f1a6a
96f6bc4
a0f1a6a
96f6bc4
 
9ea9df4
 
96f6bc4
 
 
 
 
 
9ea9df4
29a09c1
9ea9df4
 
 
 
 
 
 
 
 
 
 
e912e94
9ea9df4
 
29a09c1
 
9ea9df4
 
 
 
 
96f6bc4
9ea9df4
96f6bc4
d53b4e4
 
 
 
 
 
 
29a09c1
9ea9df4
 
 
 
 
 
 
 
 
 
 
29a09c1
 
 
9ea9df4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29a09c1
 
 
 
 
 
 
 
 
9cbaed2
7bd3d0f
40f302f
29a09c1
 
9ea9df4
96f6bc4

from huggingface_hub import InferenceClient
import gradio as gr
import random
from langchain_community.tools import DuckDuckGoSearchRun

API_URL = "https://api-inference.huggingface.co/models/"
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")

# Initialize DuckDuckGo search tool
duckduckgo_search = DuckDuckGoSearchRun()

def format_prompt(message, history):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=random.randint(0, 10**7),
    )

    formatted_prompt = format_prompt(prompt, history)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        # Yield model's response first
        yield output
        
    # Now, perform DuckDuckGo search and yield results
    search_result = duckduckgo_search.run(prompt)
    if search_result:
        yield search_result
    else:
        yield "Sorry, I couldn't find any relevant information."

additional_inputs=[
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=512,
        minimum=64,
        maximum=1024,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]

customCSS = """
#component-7 { # this is the default element ID of the chat component
  height: 800px; # adjust the height as needed
  flex-grow: 1;
}
"""

with gr.Blocks(css=customCSS) as demo:
    gr.ChatInterface(
        generate,
        title = "RAG_FRIDAY_3.0🤖 WELCOME TO OPEN-SOURCE FREEDOM🤗",
        description = "Getting real-time updated results for prompts is still propreitary in face of GPT-4,Co-Pilot etc. This app serves as a open-source alternative for this! UPDATE: Previous version of this app i.e. RAG_FRIDAY_mark_2 has faced some techncial issues due to rate limit errors. Problem and solution have been updated by me thanks to this community thread: https://github.com/joaomdmoura/crewAI/issues/136",
        additional_inputs=additional_inputs,
    )

demo.queue().launch(debug=True)