Safetensors
qwen2

Need Example for inference code

#5
by djuna - opened

Can you add the inference code with the text completion format?
Something like

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Base", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Base", trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
input_text = """<|fim▁begin|>def quick_sort(arr):
    if len(arr) <= 1:
        return arr
    pivot = arr[0]
    left = []
    right = []
<|fim▁hole|>
        if arr[i] < pivot:
            left.append(arr[i])
        else:
            right.append(arr[i])
    return quick_sort(left) + [pivot] + quick_sort(right)<|fim▁end|>"""
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=128)
print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):])

This is my suspicion, but I'd love somebody to confirm this


alpaca_prompt = """### Instruction:
You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.

### User Edits:

{}

### User Excerpt:

{}

### Response:

"""
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def formatting_prompts_func(examples, prompt):
    events = examples["events"]
    inputs = examples["input"]
    texts = []
    for events, input in zip(events, inputs):
        text = prompt.format(events, input)
        texts.append(text)
    return { "text" : texts, }

def main(prompt: str = alpaca_prompt):
    model = AutoModelForCausalLM.from_pretrained("zed-industries/zeta", torch_dtype=torch.bfloat16).to("cuda")
    dataset = load_dataset("zed-industries/zeta")
    tokenizer = AutoTokenizer.from_pretrained("zed-industries/zeta")
    dataset = dataset.map(formatting_prompts_func, batched = True, fn_kwargs={"prompt": prompt})
    for example in dataset["eval"]:
        events, input, output, rejected, assertions, text = example.values()
        output_toks = model.generate(**tokenizer(text, return_tensors="pt").to("cuda"))
        output_text = tokenizer.decode(output_toks[0], skip_special_tokens=True)[len(text):]
        print("Prediction:")
        print(output_text)
        print("----")
        print("Ground Truth:")
        print(output)
        breakpoint()
if __name__ == "__main__":
    main()

EDIT: the related code https://paste.ee/r/6lAkAv3D/0
So, for standard text completion. it looks like this, right?

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("zed-industries/zeta")
model = AutoModelForCausalLM.from_pretrained("zed-industries/zeta", torch_dtype=torch.bfloat16).to("cuda")
alpaca_prompt = """### Instruction:
You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.

### User Edits:

User edited "models/customer.rb":

```diff
@@ -2,5 +2,5 @@
class Customer
def initialize
- 

@name
	 = name
+ 

@name
	 = name.capitalize


@email
	 = email


@phone
	 = phone

User Excerpt:

def initialize
<|editable_region_start|>


@name
	 = name.capitalize<|user_cursor_is_here|>


@email
	 = email


@phone
	 = phone


@address
	 = address
end

def to_s


@name
	
end

<|editable_region_end|>
private

def validate_email


@email
	.include?('@')

Response:

"""

inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=128)
print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):])


(Seems like the markdown couldn't be rendered properly)

@djuna the formatting of your message got a bit out of hand, but in principle, yes, i think this is correct.

Your need to confirm your account before you can post a new comment.

Sign up or log in to comment