Need Example for inference code

by djuna - opened Feb 17

Feb 17

Can you add the inference code with the text completion format?
Something like

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Base", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Base", trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
input_text = """<｜fim▁begin｜>def quick_sort(arr):
    if len(arr) <= 1:
        return arr
    pivot = arr[0]
    left = []
    right = []
<｜fim▁hole｜>
        if arr[i] < pivot:
            left.append(arr[i])
        else:
            right.append(arr[i])
    return quick_sort(left) + [pivot] + quick_sort(right)<｜fim▁end｜>"""
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=128)
print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):])

dtransposed

20 days ago

This is my suspicion, but I'd love somebody to confirm this


alpaca_prompt = """### Instruction:
You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.

### User Edits:

{}

### User Excerpt:

{}

### Response:

"""
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def formatting_prompts_func(examples, prompt):
    events = examples["events"]
    inputs = examples["input"]
    texts = []
    for events, input in zip(events, inputs):
        text = prompt.format(events, input)
        texts.append(text)
    return { "text" : texts, }

def main(prompt: str = alpaca_prompt):
    model = AutoModelForCausalLM.from_pretrained("zed-industries/zeta", torch_dtype=torch.bfloat16).to("cuda")
    dataset = load_dataset("zed-industries/zeta")
    tokenizer = AutoTokenizer.from_pretrained("zed-industries/zeta")
    dataset = dataset.map(formatting_prompts_func, batched = True, fn_kwargs={"prompt": prompt})
    for example in dataset["eval"]:
        events, input, output, rejected, assertions, text = example.values()
        output_toks = model.generate(**tokenizer(text, return_tensors="pt").to("cuda"))
        output_text = tokenizer.decode(output_toks[0], skip_special_tokens=True)[len(text):]
        print("Prediction:")
        print(output_text)
        print("----")
        print("Ground Truth:")
        print(output)
        breakpoint()
if __name__ == "__main__":
    main()

djuna

19 days ago

•

edited 14 days ago

EDIT: the related code https://paste.ee/r/6lAkAv3D/0
So, for standard text completion. it looks like this, right?

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained("zed-industries/zeta")
model = AutoModelForCausalLM.from_pretrained("zed-industries/zeta", torch_dtype=torch.bfloat16).to("cuda")
alpaca_prompt = """### Instruction:
You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location.

### User Edits:

User edited "models/customer.rb":

```diff
@@ -2,5 +2,5 @@
class Customer
def initialize
- 

@name
	 = name
+ 

@name
	 = name.capitalize


@email
	 = email


@phone
	 = phone

User Excerpt:

def initialize
<|editable_region_start|>


@name
	 = name.capitalize<|user_cursor_is_here|>


@email
	 = email


@phone
	 = phone


@address
	 = address
end

def to_s


@name
	
end

<|editable_region_end|>
private

def validate_email


@email
	.include?('@')

Response:

"""

inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=128)
print(tokenizer.decode(outputs[0], skip_special_tokens=True)[len(input_text):])


(Seems like the markdown couldn't be rendered properly)

dtransposed

19 days ago

@djuna the formatting of your message got a bit out of hand, but in principle, yes, i think this is correct.

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

Your need to confirm your account before you can post a new comment.

· Sign up or log in to comment