Tonic commited on
Commit
c84e5ec
·
unverified ·
1 Parent(s): 0825bc6

testing the new one bit

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. README.md +1 -1
  3. app.py +126 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ex1.py
2
+ ex2.py
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Native 1-bit LLM
3
- emoji: 🦀
4
  colorFrom: purple
5
  colorTo: red
6
  sdk: gradio
 
1
  ---
2
  title: Native 1-bit LLM
3
+ emoji: 🫦
4
  colorFrom: purple
5
  colorTo: red
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import uuid
5
+
6
+ def load_model():
7
+ model_id = "microsoft/bitnet-b1.58-2B-4T"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id,
11
+ torch_dtype=torch.bfloat16
12
+ )
13
+ return model, tokenizer
14
+
15
+ def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
16
+ model, tokenizer = load_model()
17
+
18
+ messages = [
19
+ {"role": "system", "content": system_prompt},
20
+ {"role": "user", "content": user_input},
21
+ ]
22
+
23
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
24
+ chat_input = tokenizer(prompt, return_tensors="pt").to(model.device)
25
+
26
+ # Generate response
27
+ chat_outputs = model.generate(
28
+ **chat_input,
29
+ max_new_tokens=max_new_tokens,
30
+ temperature=temperature,
31
+ top_p=top_p,
32
+ top_k=top_k,
33
+ do_sample=True
34
+ )
35
+
36
+ # Decode response
37
+ response = tokenizer.decode(chat_outputs[0][chat_input['input_ids'].shape[-1]:], skip_special_tokens=True)
38
+
39
+ # Update history
40
+ history.append({"role": "user", "content": user_input})
41
+ history.append({"role": "assistant", "content": response})
42
+ return history, history
43
+
44
+ # Gradio interface
45
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
46
+ gr.Markdown("# BitNet b1.58 2B4T Demo")
47
+
48
+ with gr.Row():
49
+ with gr.Column():
50
+ gr.Markdown("""
51
+ ## About BitNet b1.58 2B4T
52
+ BitNet b1.58 2B4T is the first open-source, native 1-bit Large Language Model with 2 billion parameters, developed by Microsoft Research. Trained on 4 trillion tokens, it matches the performance of full-precision models while offering significant efficiency gains in memory, energy, and latency. Features include:
53
+ - Transformer-based architecture with BitLinear layers
54
+ - Native 1.58-bit weights and 8-bit activations
55
+ - Maximum context length of 4096 tokens
56
+ - Optimized for efficient inference with bitnet.cpp
57
+ """)
58
+
59
+ with gr.Column():
60
+ gr.Markdown("""
61
+ ## About Tonic AI
62
+ Tonic AI is a vibrant community of AI enthusiasts and developers always building cool demos and pushing the boundaries of what's possible with AI. We're passionate about creating innovative, accessible, and engaging AI experiences for everyone. Join us in exploring the future of AI!
63
+ """)
64
+
65
+ with gr.Row():
66
+ with gr.Column():
67
+ user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
68
+ system_prompt = gr.Textbox(
69
+ label="System Prompt",
70
+ value="You are a helpful AI assistant.",
71
+ placeholder="Enter system prompt..."
72
+ )
73
+
74
+ with gr.Accordion("Advanced Options", open=False):
75
+ max_new_tokens = gr.Slider(
76
+ minimum=10,
77
+ maximum=500,
78
+ value=50,
79
+ step=10,
80
+ label="Max New Tokens"
81
+ )
82
+ temperature = gr.Slider(
83
+ minimum=0.1,
84
+ maximum=2.0,
85
+ value=0.7,
86
+ step=0.1,
87
+ label="Temperature"
88
+ )
89
+ top_p = gr.Slider(
90
+ minimum=0.1,
91
+ maximum=1.0,
92
+ value=0.9,
93
+ step=0.05,
94
+ label="Top P"
95
+ )
96
+ top_k = gr.Slider(
97
+ minimum=1,
98
+ maximum=100,
99
+ value=50,
100
+ step=1,
101
+ label="Top K"
102
+ )
103
+
104
+ submit_btn = gr.Button("Send")
105
+
106
+ with gr.Column():
107
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
108
+
109
+ chat_history = gr.State([])
110
+
111
+ submit_btn.click(
112
+ fn=generate_response,
113
+ inputs=[
114
+ user_input,
115
+ system_prompt,
116
+ max_new_tokens,
117
+ temperature,
118
+ top_p,
119
+ top_k,
120
+ chat_history
121
+ ],
122
+ outputs=[chatbot, chat_history]
123
+ )
124
+
125
+ if __name__ == "__main__":
126
+ demo.launch(ssr_mode=False)