CyberNative's picture
Update app.py
931cd7a verified
raw
history blame contribute delete
4.3 kB
import gradio as gr
import os
import spaces
from transformers import AutoTokenizer, TextIteratorStreamer
from threading import Thread
from llama_cpp import Llama
# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)
DESCRIPTION = '''
<div>
<h1 style="text-align: center;">CyberNative-AI/Colibri_8b_v0.1</h1>
<p>This Space demonstrates the CyberSecurity-tuned model <a href="https://huggingface.co/CyberNative-AI/Colibri_8b_v0.1"><b>Colibri_8b_v0.1</b></a>.
</div>
'''
LICENSE = """
<p/>
---
Colibri v0.1 is built on top of Dolphin Llama 3
"""
PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
<img src="https://huggingface.co/CyberNative-AI/Colibri_8b_v0.1/resolve/main/cybernative_ai_colibri_logo.jpeg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Colibri_v0.1</h1>
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
</div>
"""
css = """
h1 {
text-align: center;
display: block;
}
#duplicate-button {
margin: auto;
color: white;
background: #1565c0;
border-radius: 100vh;
}
"""
@spaces.GPU(duration=120)
def chat_llama3_8b(message: str,
history: list,
temperature: float,
max_new_tokens: int
) -> str:
"""
Generate a streaming response using the llama3-8b model.
Args:
message (str): The input message.
history (list): The conversation history used by ChatInterface.
temperature (float): The temperature for generating the response.
max_new_tokens (int): The maximum number of new tokens to generate.
Returns:
str: The generated response.
"""
conversation = []
conversation.append({"role": "system", "content": "You are Colibri, an advanced cybersecurity AI assistant developed by CyberNative AI."})
for user, assistant in history:
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
conversation.append({"role": "user", "content": message})
llm = Llama.from_pretrained(
repo_id="CyberNative-AI/Colibri_8b_v0.1_q5_gguf",
filename="*Q5_K_M.gguf",
chat_format="chatml",
verbose=False,
max_tokens=max_new_tokens,
stop=["<|im_end|>"]
)
response=llm.create_chat_completion(messages=conversation, temperature=temperature)
# Access the first (and likely only) choice in the response
choice = response['choices'][0]
# Extract the text content from the message within the choice
text_response = choice['message']['content']
yield text_response
# Gradio block
chatbot=gr.Chatbot(height=700, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
with gr.Blocks(fill_height=True, css=css) as demo:
gr.Markdown(DESCRIPTION)
gr.ChatInterface(
fn=chat_llama3_8b,
chatbot=chatbot,
fill_height=True,
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
additional_inputs=[
gr.Slider(minimum=0,
maximum=1,
step=0.1,
value=0.6,
label="Temperature",
render=False),
gr.Slider(minimum=128,
maximum=4096,
step=1,
value=512,
label="Max new tokens",
render=False ),
],
examples=[
['What are the two main methods used in the research to collect DKIM information?'],
['What is the primary purpose of OS fingerprinting using tools like Nmap, and why might it not always be 100% accurate?'],
['What is 9,000 * 9,000?'],
['What technique can be used to enumerate SMB shares within a Windows environment from a Windows client?'],
['What is the primary benefit of interleaving in cybersecurity education and training?']
],
cache_examples=False,
)
gr.Markdown(LICENSE)
if __name__ == "__main__":
demo.launch()