Spaces:

sandz7
/

loki

Runtime error

File size: 2,531 Bytes

e83f85a
 
 
 
 
b28bc26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92c5d55
 
b28bc26
 
 
 
db79da7
 
 
 
 
ec72fe8
b28bc26
ec72fe8
92c5d55
b28bc26
 
db79da7
b28bc26

import torch
import pandas as pd
import numpy as np
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
from huggingface_hub import login
import os

# HF_TOKEN
TOKEN = os.getenv('HF_AUTH_TOKEN')
login(token=TOKEN,
      add_to_git_credential=False)

# Open ai api key
API_KEY = os.getenv('OPEN_AI_API_KEY')

DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Amphisbeana 🐍</h1>
<p>This uses Llama 3 and GPT-4o as generation, both of these make the final generation. <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B"><b>Llama3-8b</b></a>and <a href="https://platform.openai.com/docs/models/gpt-4o"><b>GPT-4o</b></a></p>
</div>
'''

# Place transformers in hardware to prepare for process and generation
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", token=TOKEN, torch_dtype=torch.float16).to('cuda')

# Place just input pass and return generation output
def llama_generation(input_text: str,
                     history):
    """
    Pass input texts, tokenize, output and back to text.
    """

    # Header prompt
    header = '''Your are a helpful AI called amphisbeana.
    You will help the user, by giving accurate but creative response'''

    input_ids = llama_tokenizer.encode(input_text + header,
                                       return_tensors='pt').to('cuda')
    
    # llama generation looks for the numeric vectors not the tensors so there is no need for **input_ids rather just input_ids
    output_ids = llama_model.generate(input_ids=input_ids) 

    # Decode
    output_text = llama_tokenizer.decode(output_ids[0],
                                         skip_special_tokens=True)
    
    return output_text

# Let's just make sure the llama is returning as it should and than place that return output into a function making it fit into a base
# Prompt for gpt-4o

chatbot=gr.Chatbot(height=600, label="Amphisbeana AI")

with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(DESCRIPTION)
    gr.ChatInterface(
        fn=llama_generation,
        chatbot=chatbot,
        fill_height=True,
        examples=["Make a poem of batman inside willy wonka",
                  "How can you a burrito with just flour?",
                  "How was saturn formed in 3 sentences",
                  "How does the frontal lobe effect playing soccer"],
        cache_examples=False
    )

if __name__ == "__main__":
    demo.launch()