wolfofbackstreet commited on
Commit
019895a
·
verified ·
1 Parent(s): 0f099c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -126
app.py CHANGED
@@ -1,127 +1,127 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
2
- import inspect
3
- from typing import get_type_hints, Callable, Any
4
- import gradio as gr
5
-
6
- model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
7
- # Load tokenizer
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
-
10
- # Define GPTQ configuration
11
- gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)
12
-
13
- # Load pre-quantized model on CPU
14
- model = AutoModelForCausalLM.from_pretrained(
15
- model_name,
16
- quantization_config=gptq_config,
17
- device_map="cpu" # Explicitly enforce CPU execution
18
- )
19
-
20
-
21
- def parse_docstring(func):
22
- doc = inspect.getdoc(func)
23
- if not doc:
24
- return {"title": "Untitled", "description": ""}
25
-
26
- lines = doc.splitlines()
27
- title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
28
- description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
29
- description = description.replace("Description:", "").strip()
30
-
31
- return {"title": title, "description": description}
32
-
33
- def gradio_app_with_docs(func: Callable) -> Callable:
34
- sig = inspect.signature(func)
35
- type_hints = get_type_hints(func)
36
- metadata = parse_docstring(func)
37
-
38
- """
39
- A decorator that automatically builds and launches a Gradio interface
40
- based on function type hints.
41
-
42
- Args:
43
- func: A callable with type-hinted parameters and return type.
44
-
45
- Returns:
46
- The wrapped function with a `.launch()` method to start the app.
47
- """
48
- # Infer Gradio components from type hints
49
- def _map_type(t: type) -> gr.Component:
50
- if t == str:
51
- return gr.Textbox(label="Input")
52
- elif t == int:
53
- return gr.Number(precision=0)
54
- elif t == float:
55
- return gr.Number()
56
- elif t == bool:
57
- return gr.Checkbox()
58
- elif hasattr(t, "__origin__") and t.__origin__ == list: # Handle List[type]
59
- elem_type = t.__args__[0]
60
- if elem_type == str:
61
- return gr.Dropdown(choices=["Option1", "Option2"])
62
- else:
63
- raise ValueError(f"Unsupported list element type: {elem_type}")
64
- else:
65
- raise ValueError(f"Unsupported type: {t}")
66
-
67
- # Extract function signature and type hints
68
- sig = inspect.signature(func)
69
- type_hints = get_type_hints(func)
70
-
71
- # Map parameters to Gradio inputs
72
- inputs = []
73
- for name, param in sig.parameters.items():
74
- if name == "self":
75
- continue # Skip self in class methods
76
- param_type = type_hints.get(name, Any)
77
- component = _map_type(param_type)
78
- component.label = name.replace("_", " ").title()
79
- inputs.append(component)
80
-
81
- # Map return type to Gradio output
82
- return_type = type_hints.get("return", Any)
83
- outputs = _map_type(return_type)
84
-
85
- # Wrap function with Gradio interface
86
- interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
87
-
88
- with gr.Blocks() as demo:
89
- gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
90
- interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
91
-
92
- def wrapper(*args, **kwargs):
93
- return func(*args, **kwargs)
94
-
95
- wrapper.launch = lambda: demo.launch()
96
- return wrapper
97
-
98
-
99
- @gradio_app_with_docs
100
- def generate_response(prompt: str) -> str:
101
- """
102
- Title: Super Tiny GPTQ V2 Model on CPU
103
- Description: A Simple app to test out the potentials of small GPTQ LLM model.
104
-
105
- Args:
106
- prompt (str): A simple prompt.
107
-
108
- Returns:
109
- str: Simplified response.
110
- """
111
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Move inputs to CPU
112
- outputs = model.generate(
113
- **inputs,
114
- max_new_tokens=50,
115
- temperature=0.7,
116
- top_p=0.9
117
- )
118
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
119
-
120
- # # Example usage
121
- # prompt = "Explain quantum computing in simple terms."
122
- # response = generate_response(prompt)
123
- # print(response)
124
-
125
-
126
- if __name__ == "__main__":
127
  generate_response.launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
2
+ import inspect
3
+ from typing import get_type_hints, Callable, Any
4
+ import gradio as gr
5
+
6
+ model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
7
+ # Load tokenizer
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+ # Define GPTQ configuration
11
+ #gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)
12
+
13
+ # Load pre-quantized model on CPU
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ device_map="cpu" # Explicitly enforce CPU execution
17
+ # quantization_config=gptq_config,
18
+ )
19
+
20
+
21
+ def parse_docstring(func):
22
+ doc = inspect.getdoc(func)
23
+ if not doc:
24
+ return {"title": "Untitled", "description": ""}
25
+
26
+ lines = doc.splitlines()
27
+ title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
28
+ description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
29
+ description = description.replace("Description:", "").strip()
30
+
31
+ return {"title": title, "description": description}
32
+
33
+ def gradio_app_with_docs(func: Callable) -> Callable:
34
+ sig = inspect.signature(func)
35
+ type_hints = get_type_hints(func)
36
+ metadata = parse_docstring(func)
37
+
38
+ """
39
+ A decorator that automatically builds and launches a Gradio interface
40
+ based on function type hints.
41
+
42
+ Args:
43
+ func: A callable with type-hinted parameters and return type.
44
+
45
+ Returns:
46
+ The wrapped function with a `.launch()` method to start the app.
47
+ """
48
+ # Infer Gradio components from type hints
49
+ def _map_type(t: type) -> gr.Component:
50
+ if t == str:
51
+ return gr.Textbox(label="Input")
52
+ elif t == int:
53
+ return gr.Number(precision=0)
54
+ elif t == float:
55
+ return gr.Number()
56
+ elif t == bool:
57
+ return gr.Checkbox()
58
+ elif hasattr(t, "__origin__") and t.__origin__ == list: # Handle List[type]
59
+ elem_type = t.__args__[0]
60
+ if elem_type == str:
61
+ return gr.Dropdown(choices=["Option1", "Option2"])
62
+ else:
63
+ raise ValueError(f"Unsupported list element type: {elem_type}")
64
+ else:
65
+ raise ValueError(f"Unsupported type: {t}")
66
+
67
+ # Extract function signature and type hints
68
+ sig = inspect.signature(func)
69
+ type_hints = get_type_hints(func)
70
+
71
+ # Map parameters to Gradio inputs
72
+ inputs = []
73
+ for name, param in sig.parameters.items():
74
+ if name == "self":
75
+ continue # Skip self in class methods
76
+ param_type = type_hints.get(name, Any)
77
+ component = _map_type(param_type)
78
+ component.label = name.replace("_", " ").title()
79
+ inputs.append(component)
80
+
81
+ # Map return type to Gradio output
82
+ return_type = type_hints.get("return", Any)
83
+ outputs = _map_type(return_type)
84
+
85
+ # Wrap function with Gradio interface
86
+ interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
87
+
88
+ with gr.Blocks() as demo:
89
+ gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
90
+ interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
91
+
92
+ def wrapper(*args, **kwargs):
93
+ return func(*args, **kwargs)
94
+
95
+ wrapper.launch = lambda: demo.launch()
96
+ return wrapper
97
+
98
+
99
+ @gradio_app_with_docs
100
+ def generate_response(prompt: str) -> str:
101
+ """
102
+ Title: Super Tiny GPTQ V2 Model on CPU
103
+ Description: A Simple app to test out the potentials of small GPTQ LLM model.
104
+
105
+ Args:
106
+ prompt (str): A simple prompt.
107
+
108
+ Returns:
109
+ str: Simplified response.
110
+ """
111
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Move inputs to CPU
112
+ outputs = model.generate(
113
+ **inputs,
114
+ max_new_tokens=50,
115
+ temperature=0.7,
116
+ top_p=0.9
117
+ )
118
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
119
+
120
+ # # Example usage
121
+ # prompt = "Explain quantum computing in simple terms."
122
+ # response = generate_response(prompt)
123
+ # print(response)
124
+
125
+
126
+ if __name__ == "__main__":
127
  generate_response.launch()