File size: 2,531 Bytes
e83f85a b28bc26 92c5d55 b28bc26 db79da7 ec72fe8 b28bc26 ec72fe8 92c5d55 b28bc26 db79da7 b28bc26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import torch
import pandas as pd
import numpy as np
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
from huggingface_hub import login
import os
# HF_TOKEN
TOKEN = os.getenv('HF_AUTH_TOKEN')
login(token=TOKEN,
add_to_git_credential=False)
# Open ai api key
API_KEY = os.getenv('OPEN_AI_API_KEY')
DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Amphisbeana π</h1>
<p>This uses Llama 3 and GPT-4o as generation, both of these make the final generation. <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B"><b>Llama3-8b</b></a>and <a href="https://platform.openai.com/docs/models/gpt-4o"><b>GPT-4o</b></a></p>
</div>
'''
# Place transformers in hardware to prepare for process and generation
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')
# Place just input pass and return generation output
def llama_generation(input_text: str,
history):
"""
Pass input texts, tokenize, output and back to text.
"""
# Header prompt
header = '''Your are a helpful AI called amphisbeana.
You will help the user, by giving accurate but creative response'''
input_ids = llama_tokenizer.encode(input_text + header,
return_tensors='pt').to('cuda')
# llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
output_ids = llama_model.generate(input_ids=input_ids)
# Decode
output_text = llama_tokenizer.decode(output_ids[0],
skip_special_tokens=True)
return output_text
# Let's just make sure the llama is returning as it should and than place that return output into a function making it fit into a base
# Prompt for gpt-4o
chatbot=gr.Chatbot(height=600, label="Amphisbeana AI")
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(DESCRIPTION)
gr.ChatInterface(
fn=llama_generation,
chatbot=chatbot,
fill_height=True,
examples=["Make a poem of batman inside willy wonka",
"How can you a burrito with just flour?",
"How was saturn formed in 3 sentences",
"How does the frontal lobe effect playing soccer"],
cache_examples=False
)
if __name__ == "__main__":
demo.launch() |