wolfofbackstreet's picture
Update app.py
019895a verified
raw
history blame
4.06 kB
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
import inspect
from typing import get_type_hints, Callable, Any
import gradio as gr
model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Define GPTQ configuration
#gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False)
# Load pre-quantized model on CPU
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu" # Explicitly enforce CPU execution
# quantization_config=gptq_config,
)
def parse_docstring(func):
doc = inspect.getdoc(func)
if not doc:
return {"title": "Untitled", "description": ""}
lines = doc.splitlines()
title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
description = description.replace("Description:", "").strip()
return {"title": title, "description": description}
def gradio_app_with_docs(func: Callable) -> Callable:
sig = inspect.signature(func)
type_hints = get_type_hints(func)
metadata = parse_docstring(func)
"""
A decorator that automatically builds and launches a Gradio interface
based on function type hints.
Args:
func: A callable with type-hinted parameters and return type.
Returns:
The wrapped function with a `.launch()` method to start the app.
"""
# Infer Gradio components from type hints
def _map_type(t: type) -> gr.Component:
if t == str:
return gr.Textbox(label="Input")
elif t == int:
return gr.Number(precision=0)
elif t == float:
return gr.Number()
elif t == bool:
return gr.Checkbox()
elif hasattr(t, "__origin__") and t.__origin__ == list: # Handle List[type]
elem_type = t.__args__[0]
if elem_type == str:
return gr.Dropdown(choices=["Option1", "Option2"])
else:
raise ValueError(f"Unsupported list element type: {elem_type}")
else:
raise ValueError(f"Unsupported type: {t}")
# Extract function signature and type hints
sig = inspect.signature(func)
type_hints = get_type_hints(func)
# Map parameters to Gradio inputs
inputs = []
for name, param in sig.parameters.items():
if name == "self":
continue # Skip self in class methods
param_type = type_hints.get(name, Any)
component = _map_type(param_type)
component.label = name.replace("_", " ").title()
inputs.append(component)
# Map return type to Gradio output
return_type = type_hints.get("return", Any)
outputs = _map_type(return_type)
# Wrap function with Gradio interface
interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
with gr.Blocks() as demo:
gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
wrapper.launch = lambda: demo.launch()
return wrapper
@gradio_app_with_docs
def generate_response(prompt: str) -> str:
"""
Title: Super Tiny GPTQ V2 Model on CPU
Description: A Simple app to test out the potentials of small GPTQ LLM model.
Args:
prompt (str): A simple prompt.
Returns:
str: Simplified response.
"""
inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Move inputs to CPU
outputs = model.generate(
**inputs,
max_new_tokens=50,
temperature=0.7,
top_p=0.9
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# # Example usage
# prompt = "Explain quantum computing in simple terms."
# response = generate_response(prompt)
# print(response)
if __name__ == "__main__":
generate_response.launch()