Spaces:

yakine
/

model

Sleeping

App Files Files Community

yakine commited on Aug 11, 2024

Commit

d3ea071

verified ·

1 Parent(s): 26ca4a0

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -10

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import transformers
 from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import HfFolder
 from io import StringIO
-from tqdm import tqdm  # To display progress bar in Streamlit
 # Access the Hugging Face API token from environment variables
 hf_token = os.getenv('HF_API_TOKEN')
@@ -26,16 +26,14 @@ model_gpt2 = GPT2LMHeadModel.from_pretrained('gpt2')
 text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokenizer_gpt2)
 # Load the Llama-3 model and tokenizer once during startup
-tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B", token=hf_token)
 model_llama = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Meta-Llama-3.1-8B",
-    torch_dtype= 'auto',
-    device_map= 'auto',
     token=hf_token
 )
 # Define your prompt template
 prompt_template = """\
 You are an expert in generating synthetic data for machine learning models.
@@ -59,7 +57,7 @@ Columns:
 Output: """
 def preprocess_user_prompt(user_prompt):
-    generated_text = text_generator(user_prompt, max_length=60, num_return_sequences=1)[0]["generated_text"]
     return generated_text
 def format_prompt(description, columns):
@@ -80,8 +78,8 @@ def generate_synthetic_data(description, columns):
         # Prepare the input for the Llama model
         formatted_prompt = format_prompt(description, columns)
-        # Tokenize the prompt
-        inputs = tokenizer_llama(formatted_prompt, return_tensors="pt").to(model_llama.device)
         # Generate synthetic data
         with torch.no_grad():

 from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import HfFolder
 from io import StringIO
+from tqdm import tqdm
 # Access the Hugging Face API token from environment variables
 hf_token = os.getenv('HF_API_TOKEN')
 text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokenizer_gpt2)
 # Load the Llama-3 model and tokenizer once during startup
+tokenizer_llama = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", token=hf_token)
 model_llama = AutoModelForCausalLM.from_pretrained(
+    "meta-llama/Meta-Llama-3-8B",
+    torch_dtype='auto',
+    device_map='auto',
     token=hf_token
 )
 # Define your prompt template
 prompt_template = """\
 You are an expert in generating synthetic data for machine learning models.
 Output: """
 def preprocess_user_prompt(user_prompt):
+    generated_text = text_generator(user_prompt, max_length=60, num_return_sequences=1, truncation=True)[0]["generated_text"]
     return generated_text
 def format_prompt(description, columns):
         # Prepare the input for the Llama model
         formatted_prompt = format_prompt(description, columns)
+        # Tokenize the prompt with truncation enabled
+        inputs = tokenizer_llama(formatted_prompt, return_tensors="pt", truncation=True).to(model_llama.device)
         # Generate synthetic data
         with torch.no_grad():