nikravan commited on
Commit
ed592a2
·
verified ·
1 Parent(s): f4c4266

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -29,41 +29,41 @@ def respond(
29
 
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
 
32
- MODEL_PATH = "THUDM/GLM-4-Z1-32B-0414"
33
-
34
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
35
- model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto")
36
-
37
- message = [{"role": "user", "content": "Let a, b be positive real numbers such that ab = a + b + 3. Determine the range of possible values for a + b."}]
38
-
39
- inputs = tokenizer.apply_chat_template(
40
- message,
41
- return_tensors="pt",
42
- add_generation_prompt=True,
43
- return_dict=True,
44
- ).to(model.device)
45
-
46
- generate_kwargs = {
47
- "input_ids": inputs["input_ids"],
48
- "attention_mask": inputs["attention_mask"],
49
- "max_new_tokens": 4096,
50
- "do_sample": False,
51
- }
52
- out = model.generate(**generate_kwargs)
53
- print(tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True))
54
 
 
 
 
 
 
 
 
 
55
 
56
- for message in client.chat_completion(
57
- messages,
58
- max_tokens=max_tokens,
59
- stream=True,
60
- temperature=temperature,
61
- top_p=top_p,
62
- ):
63
- token = message.choices[0].delta.content
 
64
 
65
- response += token
66
- yield response
67
 
68
 
69
  """
 
29
 
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
 
32
+ MODEL_PATH = "THUDM/GLM-4-Z1-32B-0414"
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
35
+ model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto")
36
+
37
+ message = [{"role": "user", "content": "Let a, b be positive real numbers such that ab = a + b + 3. Determine the range of possible values for a + b."}]
38
+
39
+ inputs = tokenizer.apply_chat_template(
40
+ message,
41
+ return_tensors="pt",
42
+ add_generation_prompt=True,
43
+ return_dict=True,
44
+ ).to(model.device)
 
 
 
 
 
 
 
 
 
45
 
46
+ generate_kwargs = {
47
+ "input_ids": inputs["input_ids"],
48
+ "attention_mask": inputs["attention_mask"],
49
+ "max_new_tokens": 4096,
50
+ "do_sample": False,
51
+ }
52
+ out = model.generate(**generate_kwargs)
53
+ response=(tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True))
54
 
55
+ yield response
56
+ # for message in client.chat_completion(
57
+ # messages,
58
+ # max_tokens=max_tokens,
59
+ # stream=True,
60
+ # temperature=temperature,
61
+ # top_p=top_p,
62
+ # ):
63
+ # token = message.choices[0].delta.content
64
 
65
+ # response += token
66
+ # yield response
67
 
68
 
69
  """