Spaces:

sandz7
/

loki

Runtime error

App Files Files Community

loki / app.py

sandz7

made a list over the example list

e8bc467 12 months ago

raw

history blame

4.5 kB

	import torch
	import pandas as pd
	import numpy as np
	import gradio as gr
	import re
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
	import re
	from huggingface_hub import login
	import os
	from threading import Thread

	# HF_TOKEN
	TOKEN = os.getenv('HF_AUTH_TOKEN')
	login(token=TOKEN,
	add_to_git_credential=False)

	# Open ai api key
	API_KEY = os.getenv('OPEN_AI_API_KEY')

	DESCRIPTION = '''
	<div>
	<h1 style="text-align: center;">Amphisbeana 🐍</h1>
	<p>This uses Llama 3 and GPT-4o as generation, both of these make the final generation. <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B"><b>Llama3-8b</b></a>and <a href="https://platform.openai.com/docs/models/gpt-4o"><b>GPT-4o</b></a></p>
	</div>
	'''

	# Place transformers in hardware to prepare for process and generation
	llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
	llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
	terminators = [
	llama_tokenizer.eos_token_id,
	llama_tokenizer.convert_tokens_to_ids("<\|eot_id\|>")
	]

	# Place just input pass and return generation output
	def llama_generation(input_text: str,
	history: list,
	temperature: float,
	max_new_tokens: int):
	"""
	Pass input texts, tokenize, output and back to text.
	"""

	conversation = []
	for user, assistant in history:
	conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	conversation.append({"role": "user", "content": input_text})

	input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)

	# Skip_prompt, ignores the prompt in the chatbot
	streamer = TextIteratorStreamer(llama_tokenizer, skip_prompt=True, skip_special_tokens=True)

	# generation arguments to pass in llm generate() eventually
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	eos_token_id=terminators
	)

	# This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
	if temperature == 0:
	generate_kwargs["do_sample"] = False

	# In order to use the generate_kwargs we need to place it in a thread which can also allow the UI to run different commands even when the model is generating
	# place the function as target and place the kwargs next as the kwargs
	thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
	thread.start()

	outputs = []
	for text in streamer:
	outputs.append(text)
	return "".join(outputs)

	# Let's just make sure the llama is returning as it should and than place that return output into a function making it fit into a base
	# Prompt for gpt-4o

	chatbot=gr.Chatbot(height=600, label="Amphisbeana AI")

	with gr.Blocks(fill_height=True) as demo:
	gr.Markdown(DESCRIPTION)
	gr.ChatInterface(
	fn=llama_generation,
	chatbot=chatbot,
	fill_height=True,
	# These will effect the parameters args and kwargs inside the llama_generation function, that the ui can interact with from the code
	additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
	additional_inputs=[
	# Slider feature users can interactive to effect the temperature of model
	gr.Slider(minimum=0,
	maximum=1,
	step=0.1,
	value=0.95,
	label="Temperature",
	render=False),
	# Sliding feature for the max tokens for generation on model
	gr.Slider(minimum=128,
	maximum=1500,
	step=1,
	value=512,
	label="Max new tokens",
	render=False),
	],
	examples=[
	["Make a poem of batman inside willy wonka"],
	["How can you a burrito with just flour?"],
	["How was saturn formed in 3 sentences"],
	["How does the frontal lobe effect playing soccer"],
	],
	cache_examples=False
	)

	if __name__ == "__main__":
	demo.launch()