Spaces:
Build error
Build error
using docker initial commit
Browse files- .gitignore +0 -2
- Dockerfile +45 -0
- app.py +64 -48
- apt.txt +5 -0
- requirements.txt +3 -0
.gitignore
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
ex1.py
|
2 |
-
ex2.py
|
|
|
|
|
|
Dockerfile
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM ubuntu:22.04
|
2 |
+
|
3 |
+
# Set non-interactive frontend for apt
|
4 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
5 |
+
|
6 |
+
# Install system dependencies
|
7 |
+
RUN apt-get update && apt-get install -y \
|
8 |
+
build-essential \
|
9 |
+
cmake \
|
10 |
+
clang-18 \
|
11 |
+
git \
|
12 |
+
python3.9 \
|
13 |
+
python3-pip \
|
14 |
+
wget \
|
15 |
+
&& rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Set Clang as default compiler
|
18 |
+
ENV CC=/usr/bin/clang-18
|
19 |
+
ENV CXX=/usr/bin/clang++-18
|
20 |
+
|
21 |
+
# Create a non-root user (Hugging Face Spaces run as non-root)
|
22 |
+
RUN useradd -m -u 1000 user
|
23 |
+
USER user
|
24 |
+
WORKDIR /home/user/app
|
25 |
+
|
26 |
+
# Install Python dependencies
|
27 |
+
COPY requirements.txt .
|
28 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
29 |
+
|
30 |
+
# Clone and build bitnet.cpp
|
31 |
+
RUN git clone https://github.com/microsoft/BitNet.git bitnet.cpp
|
32 |
+
WORKDIR /home/user/app/bitnet.cpp
|
33 |
+
RUN mkdir build && cd build && \
|
34 |
+
cmake .. -DCMAKE_C_COMPILER=clang-18 -DCMAKE_CXX_COMPILER=clang++-18 && \
|
35 |
+
make -j$(nproc)
|
36 |
+
|
37 |
+
# Copy application code
|
38 |
+
WORKDIR /home/user/app
|
39 |
+
COPY app.py .
|
40 |
+
|
41 |
+
# Expose port 7860 (required for Hugging Face Spaces)
|
42 |
+
EXPOSE 7860
|
43 |
+
|
44 |
+
# Run Gradio app
|
45 |
+
CMD ["python3", "app.py"]
|
app.py
CHANGED
@@ -1,69 +1,87 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
|
|
|
|
|
|
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
)
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
)
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def manage_history(history):
|
30 |
-
# Limit to 3 turns (
|
31 |
-
max_messages = 6
|
32 |
if len(history) > max_messages:
|
33 |
history = history[-max_messages:]
|
34 |
|
35 |
# Limit total character count to 300
|
36 |
total_chars = sum(len(msg["content"]) for msg in history)
|
37 |
while total_chars > 300 and history:
|
38 |
-
history.pop(0)
|
39 |
-
total_chars = sum(len(msg["content"]) for msg in
|
40 |
|
41 |
return history
|
42 |
|
43 |
def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
|
44 |
-
|
45 |
-
|
46 |
-
messages = [
|
47 |
-
{"role": "system", "content": system_prompt},
|
48 |
-
{"role": "user", "content": user_input},
|
49 |
-
]
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
chat_outputs = model.generate(
|
56 |
-
**chat_input,
|
57 |
-
max_new_tokens=max_new_tokens,
|
58 |
temperature=temperature,
|
59 |
top_p=top_p,
|
60 |
-
top_k=top_k
|
61 |
-
do_sample=True
|
62 |
)
|
63 |
|
64 |
-
# Decode response
|
65 |
-
response = tokenizer.decode(chat_outputs[0][chat_input['input_ids'].shape[-1]:], skip_special_tokens=True)
|
66 |
-
|
67 |
# Update history
|
68 |
history.append({"role": "user", "content": user_input})
|
69 |
history.append({"role": "assistant", "content": response})
|
@@ -155,6 +173,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
155 |
)
|
156 |
|
157 |
if __name__ == "__main__":
|
158 |
-
|
159 |
-
load_model()
|
160 |
-
demo.launch(ssr_mode=False, share=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
import subprocess
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
+
import logging
|
7 |
|
8 |
+
# Setup logging
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
|
12 |
+
# Paths
|
13 |
+
BITNET_BINARY = "/home/user/app/bitnet.cpp/build/bin/main"
|
14 |
+
MODEL_DIR = "/home/user/app/models"
|
15 |
+
MODEL_PATH = os.path.join(MODEL_DIR, "bitnet-b1.58-2B-4T.gguf")
|
16 |
+
MODEL_REPO = "microsoft/bitnet-b1.58-2B-4T-gguf"
|
17 |
+
MODEL_FILE = "bitnet-b1.58-2B-4T.gguf"
|
18 |
+
|
19 |
+
# Download model weights if not present
|
20 |
+
def download_model():
|
21 |
+
if not os.path.exists(MODEL_PATH):
|
22 |
+
logger.info("Downloading model weights...")
|
23 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
24 |
+
hf_hub_download(
|
25 |
+
repo_id=MODEL_REPO,
|
26 |
+
filename=MODEL_FILE,
|
27 |
+
local_dir=MODEL_DIR,
|
28 |
+
local_dir_use_symlinks=False
|
29 |
)
|
30 |
+
logger.info("Model weights downloaded successfully.")
|
31 |
+
else:
|
32 |
+
logger.info("Model weights already exist.")
|
33 |
+
|
34 |
+
# Run model download on startup
|
35 |
+
download_model()
|
36 |
+
|
37 |
+
def run_bitnet_inference(prompt, max_tokens=50, temperature=0.7, top_p=0.9, top_k=50):
|
38 |
+
# Prepare the command to call bitnet.cpp binary
|
39 |
+
cmd = [
|
40 |
+
BITNET_BINARY,
|
41 |
+
"-m", MODEL_PATH,
|
42 |
+
"-p", prompt,
|
43 |
+
"--max-tokens", str(max_tokens),
|
44 |
+
"--temperature", str(temperature),
|
45 |
+
"--top-p", str(top_p),
|
46 |
+
"--top-k", str(top_k)
|
47 |
+
]
|
48 |
+
|
49 |
+
try:
|
50 |
+
# Run the command and capture output
|
51 |
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
52 |
+
output = result.stdout.strip()
|
53 |
+
return output
|
54 |
+
except subprocess.CalledProcessError as e:
|
55 |
+
logger.error(f"Inference error: {e.stderr}")
|
56 |
+
return f"Error during inference: {e.stderr}"
|
57 |
|
58 |
def manage_history(history):
|
59 |
+
# Limit to 3 turns (user + assistant = 2 messages per turn)
|
60 |
+
max_messages = 6
|
61 |
if len(history) > max_messages:
|
62 |
history = history[-max_messages:]
|
63 |
|
64 |
# Limit total character count to 300
|
65 |
total_chars = sum(len(msg["content"]) for msg in history)
|
66 |
while total_chars > 300 and history:
|
67 |
+
history.pop(0)
|
68 |
+
total_chars = sum(len(msg["content"]) for msg in dobrohistory)
|
69 |
|
70 |
return history
|
71 |
|
72 |
def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
|
73 |
+
# Format the prompt for bitnet.cpp
|
74 |
+
full_prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
# Run inference
|
77 |
+
response = run_bitnet_inference(
|
78 |
+
full_prompt,
|
79 |
+
max_tokens=max_new_tokens,
|
|
|
|
|
|
|
80 |
temperature=temperature,
|
81 |
top_p=top_p,
|
82 |
+
top_k=top_k
|
|
|
83 |
)
|
84 |
|
|
|
|
|
|
|
85 |
# Update history
|
86 |
history.append({"role": "user", "content": user_input})
|
87 |
history.append({"role": "assistant", "content": response})
|
|
|
173 |
)
|
174 |
|
175 |
if __name__ == "__main__":
|
176 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, share=True)
|
|
|
|
apt.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
build-essential
|
2 |
+
cmake
|
3 |
+
clang-18
|
4 |
+
git
|
5 |
+
wget
|
requirements.txt
CHANGED
@@ -1,3 +1,6 @@
|
|
1 |
torch
|
2 |
git+https://github.com/shumingma/transformers.git
|
3 |
accelerate
|
|
|
|
|
|
|
|
1 |
torch
|
2 |
git+https://github.com/shumingma/transformers.git
|
3 |
accelerate
|
4 |
+
gradio
|
5 |
+
numpy
|
6 |
+
huggingface_hub
|