wolfofbackstreet commited on
Commit
0aa5cd7
·
verified ·
1 Parent(s): 8d469fc

Init files

Browse files
Files changed (5) hide show
  1. .gitignore +4 -0
  2. Dockerfile +34 -0
  3. README.md +22 -14
  4. app.py +127 -0
  5. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .idea/ C:\Users\james/.ssh/id_ed25518
2
+ cat C:\Users\james\.ssh\id_ed25518.pub
3
+
4
+ ssh -i C:\Users\james\.ssh\id_ed25518 [email protected]
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ RUN pip install -U gptqmodel --no-build-isolation -v
10
+
11
+ # Set up a new user named "user" with user ID 1000
12
+ RUN useradd -m -u 1000 user
13
+ # Switch to the "user" user
14
+ USER user
15
+ # Set home to the user's home directory
16
+ ENV HOME=/home/user \
17
+ PATH=/home/user/.local/bin:$PATH \
18
+ PYTHONPATH=$HOME/app \
19
+ PYTHONUNBUFFERED=1 \
20
+ GRADIO_ALLOW_FLAGGING=never \
21
+ GRADIO_NUM_PORTS=1 \
22
+ GRADIO_SERVER_NAME=0.0.0.0 \
23
+ GRADIO_THEME=huggingface \
24
+ SYSTEM=spaces
25
+
26
+ # Set the working directory to the user's home directory
27
+ WORKDIR $HOME/app
28
+
29
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
30
+ COPY --chown=user . $HOME/app
31
+
32
+ EXPOSE 7860
33
+
34
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,14 +1,22 @@
1
- ---
2
- title: Tiny GPTQ V2 LLM On CPU
3
- emoji: 🌖
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.27.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: A Simple app to test out the potentials of small GPTQ LLM mo
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Tiny GPTQ V2 LLM On CPU
3
+ emoji: 🌖
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 5.27.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: A Simple app to test out the potentials of small GPTQ LLM mo
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+
17
+ ### Build and Run on Docker
18
+
19
+ ```text
20
+ docker build -t my_gradio_app .
21
+ docker run -d -p 7860:7860 my_gradio_app
22
+ ```
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
2
+ import inspect
3
+ from typing import get_type_hints, Callable, Any
4
+ import gradio as gr
5
+
6
+ model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
7
+ # Load tokenizer
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+ # Define GPTQ configuration
11
+ gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)
12
+
13
+ # Load pre-quantized model on CPU
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ quantization_config=gptq_config,
17
+ device_map="cpu" # Explicitly enforce CPU execution
18
+ )
19
+
20
+
21
+ def parse_docstring(func):
22
+ doc = inspect.getdoc(func)
23
+ if not doc:
24
+ return {"title": "Untitled", "description": ""}
25
+
26
+ lines = doc.splitlines()
27
+ title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
28
+ description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
29
+ description = description.replace("Description:", "").strip()
30
+
31
+ return {"title": title, "description": description}
32
+
33
+ def gradio_app_with_docs(func: Callable) -> Callable:
34
+ sig = inspect.signature(func)
35
+ type_hints = get_type_hints(func)
36
+ metadata = parse_docstring(func)
37
+
38
+ """
39
+ A decorator that automatically builds and launches a Gradio interface
40
+ based on function type hints.
41
+
42
+ Args:
43
+ func: A callable with type-hinted parameters and return type.
44
+
45
+ Returns:
46
+ The wrapped function with a `.launch()` method to start the app.
47
+ """
48
+ # Infer Gradio components from type hints
49
+ def _map_type(t: type) -> gr.Component:
50
+ if t == str:
51
+ return gr.Textbox(label="Input")
52
+ elif t == int:
53
+ return gr.Number(precision=0)
54
+ elif t == float:
55
+ return gr.Number()
56
+ elif t == bool:
57
+ return gr.Checkbox()
58
+ elif hasattr(t, "__origin__") and t.__origin__ == list: # Handle List[type]
59
+ elem_type = t.__args__[0]
60
+ if elem_type == str:
61
+ return gr.Dropdown(choices=["Option1", "Option2"])
62
+ else:
63
+ raise ValueError(f"Unsupported list element type: {elem_type}")
64
+ else:
65
+ raise ValueError(f"Unsupported type: {t}")
66
+
67
+ # Extract function signature and type hints
68
+ sig = inspect.signature(func)
69
+ type_hints = get_type_hints(func)
70
+
71
+ # Map parameters to Gradio inputs
72
+ inputs = []
73
+ for name, param in sig.parameters.items():
74
+ if name == "self":
75
+ continue # Skip self in class methods
76
+ param_type = type_hints.get(name, Any)
77
+ component = _map_type(param_type)
78
+ component.label = name.replace("_", " ").title()
79
+ inputs.append(component)
80
+
81
+ # Map return type to Gradio output
82
+ return_type = type_hints.get("return", Any)
83
+ outputs = _map_type(return_type)
84
+
85
+ # Wrap function with Gradio interface
86
+ interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
87
+
88
+ with gr.Blocks() as demo:
89
+ gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
90
+ interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
91
+
92
+ def wrapper(*args, **kwargs):
93
+ return func(*args, **kwargs)
94
+
95
+ wrapper.launch = lambda: demo.launch()
96
+ return wrapper
97
+
98
+
99
+ @gradio_app_with_docs
100
+ def generate_response(prompt: str) -> str:
101
+ """
102
+ Title: Super Tiny GPTQ V2 Model on CPU
103
+ Description: A Simple app to test out the potentials of small GPTQ LLM model.
104
+
105
+ Args:
106
+ prompt (str): A simple prompt.
107
+
108
+ Returns:
109
+ str: Simplified response.
110
+ """
111
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Move inputs to CPU
112
+ outputs = model.generate(
113
+ **inputs,
114
+ max_new_tokens=50,
115
+ temperature=0.7,
116
+ top_p=0.9
117
+ )
118
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
119
+
120
+ # # Example usage
121
+ # prompt = "Explain quantum computing in simple terms."
122
+ # response = generate_response(prompt)
123
+ # print(response)
124
+
125
+
126
+ if __name__ == "__main__":
127
+ generate_response.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ optimum
2
+ gradio
3
+ torch
4
+ transformers