Spaces:
Sleeping
Sleeping
File size: 3,313 Bytes
9ea9df4 4846a19 96f6bc4 9ea9df4 a0f1a6a 96f6bc4 a0f1a6a 96f6bc4 9ea9df4 96f6bc4 9ea9df4 29a09c1 9ea9df4 e912e94 9ea9df4 29a09c1 9ea9df4 96f6bc4 9ea9df4 96f6bc4 d53b4e4 29a09c1 9ea9df4 29a09c1 9ea9df4 29a09c1 9cbaed2 7bd3d0f 40f302f 29a09c1 9ea9df4 96f6bc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
from huggingface_hub import InferenceClient
import gradio as gr
import random
from langchain_community.tools import DuckDuckGoSearchRun
API_URL = "https://api-inference.huggingface.co/models/"
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
# Initialize DuckDuckGo search tool
duckduckgo_search = DuckDuckGoSearchRun()
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=random.randint(0, 10**7),
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
# Yield model's response first
yield output
# Now, perform DuckDuckGo search and yield results
search_result = duckduckgo_search.run(prompt)
if search_result:
yield search_result
else:
yield "Sorry, I couldn't find any relevant information."
additional_inputs=[
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=512,
minimum=64,
maximum=1024,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
customCSS = """
#component-7 { # this is the default element ID of the chat component
height: 800px; # adjust the height as needed
flex-grow: 1;
}
"""
with gr.Blocks(css=customCSS) as demo:
gr.ChatInterface(
generate,
title = "RAG_FRIDAY_3.0🤖 WELCOME TO OPEN-SOURCE FREEDOM🤗",
description = "Getting real-time updated results for prompts is still propreitary in face of GPT-4,Co-Pilot etc. This app serves as a open-source alternative for this! UPDATE: Previous version of this app i.e. RAG_FRIDAY_mark_2 has faced some techncial issues due to rate limit errors. Problem and solution have been updated by me thanks to this community thread: https://github.com/joaomdmoura/crewAI/issues/136",
additional_inputs=additional_inputs,
)
demo.queue().launch(debug=True)
|