Spaces:

wolfofbackstreet
/

tiny-gguf-on-cpu

Sleeping

App Files Files Community

wolfofbackstreet commited on Apr 29

Commit

019895a

verified ·

1 Parent(s): 0f099c1

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -126

app.py CHANGED Viewed

@@ -1,127 +1,127 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
-import inspect
-from typing import get_type_hints, Callable, Any
-import gradio as gr
-model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
-# Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Define GPTQ configuration
-gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)
-# Load pre-quantized model on CPU
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    quantization_config=gptq_config,
-    device_map="cpu"  # Explicitly enforce CPU execution
-)
-def parse_docstring(func):
-    doc = inspect.getdoc(func)
-    if not doc:
-        return {"title": "Untitled", "description": ""}
-    lines = doc.splitlines()
-    title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
-    description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
-    description = description.replace("Description:", "").strip()
-    return {"title": title, "description": description}
-def gradio_app_with_docs(func: Callable) -> Callable:
-    sig = inspect.signature(func)
-    type_hints = get_type_hints(func)
-    metadata = parse_docstring(func)
-    """
-    A decorator that automatically builds and launches a Gradio interface
-    based on function type hints.
-    Args:
-        func: A callable with type-hinted parameters and return type.
-    Returns:
-        The wrapped function with a `.launch()` method to start the app.
-    """
-    # Infer Gradio components from type hints
-    def _map_type(t: type) -> gr.Component:
-        if t == str:
-            return gr.Textbox(label="Input")
-        elif t == int:
-            return gr.Number(precision=0)
-        elif t == float:
-            return gr.Number()
-        elif t == bool:
-            return gr.Checkbox()
-        elif hasattr(t, "__origin__") and t.__origin__ == list:  # Handle List[type]
-            elem_type = t.__args__[0]
-            if elem_type == str:
-                return gr.Dropdown(choices=["Option1", "Option2"])
-            else:
-                raise ValueError(f"Unsupported list element type: {elem_type}")
-        else:
-            raise ValueError(f"Unsupported type: {t}")
-    # Extract function signature and type hints
-    sig = inspect.signature(func)
-    type_hints = get_type_hints(func)
-    # Map parameters to Gradio inputs
-    inputs = []
-    for name, param in sig.parameters.items():
-        if name == "self":
-            continue  # Skip self in class methods
-        param_type = type_hints.get(name, Any)
-        component = _map_type(param_type)
-        component.label = name.replace("_", " ").title()
-        inputs.append(component)
-    # Map return type to Gradio output
-    return_type = type_hints.get("return", Any)
-    outputs = _map_type(return_type)
-    # Wrap function with Gradio interface
-    interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
-    with gr.Blocks() as demo:
-        gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
-        interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
-    def wrapper(*args, **kwargs):
-        return func(*args, **kwargs)
-    wrapper.launch = lambda: demo.launch()
-    return wrapper
-@gradio_app_with_docs
-def generate_response(prompt: str) -> str:
-    """
-    Title: Super Tiny GPTQ V2 Model on CPU
-    Description: A Simple app to test out the potentials of small GPTQ LLM model.
-    Args:
-        prompt (str): A simple prompt.
-    Returns:
-        str: Simplified response.
-    """
-    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")  # Move inputs to CPU
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=50,
-        temperature=0.7,
-        top_p=0.9
-    )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# # Example usage
-# prompt = "Explain quantum computing in simple terms."
-# response = generate_response(prompt)
-# print(response)
-if __name__ == "__main__":
     generate_response.launch()

+from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
+import inspect
+from typing import get_type_hints, Callable, Any
+import gradio as gr
+model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Define GPTQ configuration
+#gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)
+# Load pre-quantized model on CPU
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="cpu"  # Explicitly enforce CPU execution
+    # quantization_config=gptq_config,
+)
+def parse_docstring(func):
+    doc = inspect.getdoc(func)
+    if not doc:
+        return {"title": "Untitled", "description": ""}
+    lines = doc.splitlines()
+    title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
+    description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
+    description = description.replace("Description:", "").strip()
+    return {"title": title, "description": description}
+def gradio_app_with_docs(func: Callable) -> Callable:
+    sig = inspect.signature(func)
+    type_hints = get_type_hints(func)
+    metadata = parse_docstring(func)
+    """
+    A decorator that automatically builds and launches a Gradio interface
+    based on function type hints.
+    Args:
+        func: A callable with type-hinted parameters and return type.
+    Returns:
+        The wrapped function with a `.launch()` method to start the app.
+    """
+    # Infer Gradio components from type hints
+    def _map_type(t: type) -> gr.Component:
+        if t == str:
+            return gr.Textbox(label="Input")
+        elif t == int:
+            return gr.Number(precision=0)
+        elif t == float:
+            return gr.Number()
+        elif t == bool:
+            return gr.Checkbox()
+        elif hasattr(t, "__origin__") and t.__origin__ == list:  # Handle List[type]
+            elem_type = t.__args__[0]
+            if elem_type == str:
+                return gr.Dropdown(choices=["Option1", "Option2"])
+            else:
+                raise ValueError(f"Unsupported list element type: {elem_type}")
+        else:
+            raise ValueError(f"Unsupported type: {t}")
+    # Extract function signature and type hints
+    sig = inspect.signature(func)
+    type_hints = get_type_hints(func)
+    # Map parameters to Gradio inputs
+    inputs = []
+    for name, param in sig.parameters.items():
+        if name == "self":
+            continue  # Skip self in class methods
+        param_type = type_hints.get(name, Any)
+        component = _map_type(param_type)
+        component.label = name.replace("_", " ").title()
+        inputs.append(component)
+    # Map return type to Gradio output
+    return_type = type_hints.get("return", Any)
+    outputs = _map_type(return_type)
+    # Wrap function with Gradio interface
+    interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
+    with gr.Blocks() as demo:
+        gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
+        interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
+    def wrapper(*args, **kwargs):
+        return func(*args, **kwargs)
+    wrapper.launch = lambda: demo.launch()
+    return wrapper
+@gradio_app_with_docs
+def generate_response(prompt: str) -> str:
+    """
+    Title: Super Tiny GPTQ V2 Model on CPU
+    Description: A Simple app to test out the potentials of small GPTQ LLM model.
+    Args:
+        prompt (str): A simple prompt.
+    Returns:
+        str: Simplified response.
+    """
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")  # Move inputs to CPU
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=50,
+        temperature=0.7,
+        top_p=0.9
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# # Example usage
+# prompt = "Explain quantum computing in simple terms."
+# response = generate_response(prompt)
+# print(response)
+if __name__ == "__main__":
     generate_response.launch()