loki / app.py
sandz7's picture
header with user input, and grabbed the first response of output_ids[0]
db79da7
raw
history blame
2.53 kB
import torch
import pandas as pd
import numpy as np
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
from huggingface_hub import login
import os
# HF_TOKEN
TOKEN = os.getenv('HF_AUTH_TOKEN')
login(token=TOKEN,
add_to_git_credential=False)
# Open ai api key
API_KEY = os.getenv('OPEN_AI_API_KEY')
DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Amphisbeana 🐍</h1>
<p>This uses Llama 3 and GPT-4o as generation, both of these make the final generation. <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B"><b>Llama3-8b</b></a>and <a href="https://platform.openai.com/docs/models/gpt-4o"><b>GPT-4o</b></a></p>
</div>
'''
# Place transformers in hardware to prepare for process and generation
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
# Place just input pass and return generation output
def llama_generation(input_text: str,
history):
"""
Pass input texts, tokenize, output and back to text.
"""
# Header prompt
header = '''Your are a helpful AI called amphisbeana.
You will help the user, by giving accurate but creative response'''
input_ids = llama_tokenizer.encode(input_text + header,
return_tensors='pt').to('cuda')
# llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
output_ids = llama_model.generate(input_ids=input_ids)
# Decode
output_text = llama_tokenizer.decode(output_ids[0],
skip_special_tokens=True)
return output_text
# Let's just make sure the llama is returning as it should and than place that return output into a function making it fit into a base
# Prompt for gpt-4o
chatbot=gr.Chatbot(height=600, label="Amphisbeana AI")
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(DESCRIPTION)
gr.ChatInterface(
fn=llama_generation,
chatbot=chatbot,
fill_height=True,
examples=["Make a poem of batman inside willy wonka",
"How can you a burrito with just flour?",
"How was saturn formed in 3 sentences",
"How does the frontal lobe effect playing soccer"],
cache_examples=False
)
if __name__ == "__main__":
demo.launch()