Spaces:

Mihaiii
/

backtrack_sampler_demo

Running on Zero

App Files Files Community

Mihaiii commited on Oct 12, 2024

Commit

d1e1697

verified ·

1 Parent(s): 3e181c7

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -18

app.py CHANGED Viewed

@@ -10,18 +10,17 @@ description = """## Compare Creative Writing: Standard Sampler vs. Backtrack Sam
 This is a demo of the [Backtrack Sampler](https://github.com/Mihaiii/backtrack_sampler) framework using "Creative Writing Strategy".
 <br />On the left you have the output of the standard sampling and on the write the output privided by Backtrack Sampler.
 """
-# Load tokenizer
 model_name = "unsloth/Llama-3.2-1B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Load two instances of the model on CUDA for parallel inference
 model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
 provider = TransformersProvider(model, tokenizer, device)
 strategy = CreativeWritingStrategy(provider)
 creative_sampler = BacktrackSampler(strategy, provider)
-# Helper function to create message array for the chat template
 def create_chat_template_messages(history, prompt):
     messages = [{"role": "user", "content": prompt}]
@@ -33,14 +32,11 @@ def create_chat_template_messages(history, prompt):
 @spaces.GPU
 def generate_responses(prompt, history):
-    # Create messages array for chat history and apply template
     messages = create_chat_template_messages(history, prompt)
     wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
-    #already has special tokens
     inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
-    # Custom sampler task: loop over generator and collect outputs in a list
     async def custom_sampler_task():
         generated_list = []
         generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=1)
@@ -50,51 +46,39 @@ def generate_responses(prompt, history):
     custom_output = asyncio.run(custom_sampler_task())
     standard_output = model.generate(inputs, max_length=2048, temperature=1)
-    # Decode standard output and remove the prompt from the generated response
     standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)
     return standard_response.strip(), custom_output.strip()
-# Create the Gradio interface with the Citrus theme
 with gr.Blocks(theme=gr.themes.Citrus()) as demo:
     gr.Markdown(description)
-    # Chatbot components
     with gr.Row():
         standard_chat = gr.Chatbot(label="Standard Sampler")
         custom_chat = gr.Chatbot(label="Creative Writing Strategy")
-    # Input components
     with gr.Row():
         prompt_input = gr.Textbox(label="Enter your prompt", placeholder="Type your message here...", lines=1)
-    # Example prompts
     examples = [
         "Write me a short story about a talking dog who wants to be a detective.",
         "Tell me a short tale of a dragon who is afraid of heights.",
         "Create a short story where aliens land on Earth, but they just want to throw a party."
     ]
-    # Add example buttons
     gr.Examples(examples=examples, inputs=prompt_input)
-    # Button to submit the prompt
     submit_button = gr.Button("Submit")
-    # Function to handle chat updates
     def update_chat(prompt, standard_history, custom_history):
         standard_response, custom_response = generate_responses(prompt, standard_history)
-        # Append new responses to chat histories
         standard_history = standard_history + [(prompt, standard_response)]
         custom_history = custom_history + [(prompt, custom_response)]
-        # Clear the input field after submission
         return standard_history, custom_history, ""
-    # Bind the submit button to the update function and allow pressing Enter to submit
     prompt_input.submit(fn=update_chat, inputs=[prompt_input, standard_chat, custom_chat], outputs=[standard_chat, custom_chat, prompt_input])
     submit_button.click(fn=update_chat, inputs=[prompt_input, standard_chat, custom_chat], outputs=[standard_chat, custom_chat, prompt_input])
-# Launch the app with queueing and sharing enabled
 demo.queue().launch(debug=True)

 This is a demo of the [Backtrack Sampler](https://github.com/Mihaiii/backtrack_sampler) framework using "Creative Writing Strategy".
 <br />On the left you have the output of the standard sampling and on the write the output privided by Backtrack Sampler.
 """
 model_name = "unsloth/Llama-3.2-1B-Instruct"
+device = torch.device('cuda')
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
 provider = TransformersProvider(model, tokenizer, device)
 strategy = CreativeWritingStrategy(provider)
 creative_sampler = BacktrackSampler(strategy, provider)
 def create_chat_template_messages(history, prompt):
     messages = [{"role": "user", "content": prompt}]
 @spaces.GPU
 def generate_responses(prompt, history):
     messages = create_chat_template_messages(history, prompt)
     wrapped_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_special_tokens=True, add_generation_prompt=True)
     inputs = tokenizer.encode(wrapped_prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
     async def custom_sampler_task():
         generated_list = []
         generator = creative_sampler.generate(wrapped_prompt, max_length=2048, temperature=1)
     custom_output = asyncio.run(custom_sampler_task())
     standard_output = model.generate(inputs, max_length=2048, temperature=1)
     standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)
     return standard_response.strip(), custom_output.strip()
 with gr.Blocks(theme=gr.themes.Citrus()) as demo:
     gr.Markdown(description)
     with gr.Row():
         standard_chat = gr.Chatbot(label="Standard Sampler")
         custom_chat = gr.Chatbot(label="Creative Writing Strategy")
     with gr.Row():
         prompt_input = gr.Textbox(label="Enter your prompt", placeholder="Type your message here...", lines=1)
     examples = [
         "Write me a short story about a talking dog who wants to be a detective.",
         "Tell me a short tale of a dragon who is afraid of heights.",
         "Create a short story where aliens land on Earth, but they just want to throw a party."
     ]
     gr.Examples(examples=examples, inputs=prompt_input)
     submit_button = gr.Button("Submit")
     def update_chat(prompt, standard_history, custom_history):
         standard_response, custom_response = generate_responses(prompt, standard_history)
         standard_history = standard_history + [(prompt, standard_response)]
         custom_history = custom_history + [(prompt, custom_response)]
         return standard_history, custom_history, ""
     prompt_input.submit(fn=update_chat, inputs=[prompt_input, standard_chat, custom_chat], outputs=[standard_chat, custom_chat, prompt_input])
     submit_button.click(fn=update_chat, inputs=[prompt_input, standard_chat, custom_chat], outputs=[standard_chat, custom_chat, prompt_input])
 demo.queue().launch(debug=True)