Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from huggingface_hub import login | |
import os | |
# Retrieve the Hugging Face token from the Space secrets | |
token = os.getenv("HF_TOKEN") | |
# Log in using the token | |
login(token=token) | |
# Load model and tokenizer | |
model_name = "nikunjcepatel/gpt_finetune_test" #"meta-llama/Llama-3.2-3B" # Replace with the correct model name if necessary | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Define inference function | |
def generate_text(input_text): | |
inputs = tokenizer(input_text, return_tensors="pt") | |
#outputs = model.generate(inputs["input_ids"], max_length=256, num_return_sequences=1) | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_length=256, # Set max length for output | |
num_return_sequences=1, | |
temperature=0.2, # Control randomness (higher is more random) | |
top_k=50, # Top-k sampling to limit vocabulary to top 50 choices | |
top_p=0.8, # Nucleus sampling to choose tokens with 95% cumulative probability | |
repetition_penalty=1.2, # Penalize repetition; increase if repetitions persist | |
do_sample=True # Enable sampling for non-deterministic output | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
# Create Gradio interface | |
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text") | |
# Launch the interface | |
iface.launch() | |