Spaces:

CyberNative-AI
/

Colibri_8b_v0.1_chat

Running

App Files Files Community

Colibri_8b_v0.1_chat / app.py

CyberNative

Update app.py

931cd7a verified 12 months ago

raw

history blame contribute delete

4.3 kB

	import gradio as gr
	import os
	import spaces
	from transformers import AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	from llama_cpp import Llama

	# Set an environment variable
	HF_TOKEN = os.environ.get("HF_TOKEN", None)


	DESCRIPTION = '''
	<div>
	<h1 style="text-align: center;">CyberNative-AI/Colibri_8b_v0.1</h1>
	<p>This Space demonstrates the CyberSecurity-tuned model <a href="https://huggingface.co/CyberNative-AI/Colibri_8b_v0.1"><b>Colibri_8b_v0.1</b></a>.
	</div>
	'''

	LICENSE = """
	<p/>
	---
	Colibri v0.1 is built on top of Dolphin Llama 3
	"""

	PLACEHOLDER = """
	<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
	<img src="https://huggingface.co/CyberNative-AI/Colibri_8b_v0.1/resolve/main/cybernative_ai_colibri_logo.jpeg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
	<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Colibri_v0.1</h1>
	<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
	</div>
	"""


	css = """
	h1 {
	text-align: center;
	display: block;
	}
	#duplicate-button {
	margin: auto;
	color: white;
	background: #1565c0;
	border-radius: 100vh;
	}
	"""

	@spaces.GPU(duration=120)
	def chat_llama3_8b(message: str,
	history: list,
	temperature: float,
	max_new_tokens: int
	) -> str:
	"""
	Generate a streaming response using the llama3-8b model.
	Args:
	message (str): The input message.
	history (list): The conversation history used by ChatInterface.
	temperature (float): The temperature for generating the response.
	max_new_tokens (int): The maximum number of new tokens to generate.
	Returns:
	str: The generated response.
	"""
	conversation = []
	conversation.append({"role": "system", "content": "You are Colibri, an advanced cybersecurity AI assistant developed by CyberNative AI."})
	for user, assistant in history:
	conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	conversation.append({"role": "user", "content": message})

	llm = Llama.from_pretrained(
	repo_id="CyberNative-AI/Colibri_8b_v0.1_q5_gguf",
	filename="*Q5_K_M.gguf",
	chat_format="chatml",
	verbose=False,
	max_tokens=max_new_tokens,
	stop=["<\|im_end\|>"]
	)

	response=llm.create_chat_completion(messages=conversation, temperature=temperature)
	# Access the first (and likely only) choice in the response
	choice = response['choices'][0]

	# Extract the text content from the message within the choice
	text_response = choice['message']['content']

	yield text_response


	# Gradio block
	chatbot=gr.Chatbot(height=700, placeholder=PLACEHOLDER, label='Gradio ChatInterface')

	with gr.Blocks(fill_height=True, css=css) as demo:

	gr.Markdown(DESCRIPTION)
	gr.ChatInterface(
	fn=chat_llama3_8b,
	chatbot=chatbot,
	fill_height=True,
	additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
	additional_inputs=[
	gr.Slider(minimum=0,
	maximum=1,
	step=0.1,
	value=0.6,
	label="Temperature",
	render=False),
	gr.Slider(minimum=128,
	maximum=4096,
	step=1,
	value=512,
	label="Max new tokens",
	render=False ),
	],
	examples=[
	['What are the two main methods used in the research to collect DKIM information?'],
	['What is the primary purpose of OS fingerprinting using tools like Nmap, and why might it not always be 100% accurate?'],
	['What is 9,000 * 9,000?'],
	['What technique can be used to enumerate SMB shares within a Windows environment from a Windows client?'],
	['What is the primary benefit of interleaving in cybersecurity education and training?']
	],
	cache_examples=False,
	)

	gr.Markdown(LICENSE)

	if __name__ == "__main__":
	demo.launch()