Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig | |
import inspect | |
from typing import get_type_hints, Callable, Any | |
import gradio as gr | |
model_name = "wolfofbackstreet/SmolLM2-135M-int4-qptq-v2" | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Define GPTQ configuration | |
#gptq_config = GPTQConfig(bits=4, use_exllama=False, use_cuda_fp16=False) | |
# Load pre-quantized model on CPU | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="cpu" # Explicitly enforce CPU execution | |
# quantization_config=gptq_config, | |
) | |
def parse_docstring(func): | |
doc = inspect.getdoc(func) | |
if not doc: | |
return {"title": "Untitled", "description": ""} | |
lines = doc.splitlines() | |
title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled") | |
description = "\n".join(line.strip() for line in lines if line.startswith("Description:")) | |
description = description.replace("Description:", "").strip() | |
return {"title": title, "description": description} | |
def gradio_app_with_docs(func: Callable) -> Callable: | |
sig = inspect.signature(func) | |
type_hints = get_type_hints(func) | |
metadata = parse_docstring(func) | |
""" | |
A decorator that automatically builds and launches a Gradio interface | |
based on function type hints. | |
Args: | |
func: A callable with type-hinted parameters and return type. | |
Returns: | |
The wrapped function with a `.launch()` method to start the app. | |
""" | |
# Infer Gradio components from type hints | |
def _map_type(t: type) -> gr.Component: | |
if t == str: | |
return gr.Textbox(label="Input") | |
elif t == int: | |
return gr.Number(precision=0) | |
elif t == float: | |
return gr.Number() | |
elif t == bool: | |
return gr.Checkbox() | |
elif hasattr(t, "__origin__") and t.__origin__ == list: # Handle List[type] | |
elem_type = t.__args__[0] | |
if elem_type == str: | |
return gr.Dropdown(choices=["Option1", "Option2"]) | |
else: | |
raise ValueError(f"Unsupported list element type: {elem_type}") | |
else: | |
raise ValueError(f"Unsupported type: {t}") | |
# Extract function signature and type hints | |
sig = inspect.signature(func) | |
type_hints = get_type_hints(func) | |
# Map parameters to Gradio inputs | |
inputs = [] | |
for name, param in sig.parameters.items(): | |
if name == "self": | |
continue # Skip self in class methods | |
param_type = type_hints.get(name, Any) | |
component = _map_type(param_type) | |
component.label = name.replace("_", " ").title() | |
inputs.append(component) | |
# Map return type to Gradio output | |
return_type = type_hints.get("return", Any) | |
outputs = _map_type(return_type) | |
# Wrap function with Gradio interface | |
interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs) | |
with gr.Blocks() as demo: | |
gr.Markdown(f"## {metadata['title']}\n{metadata['description']}") | |
interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs) | |
def wrapper(*args, **kwargs): | |
return func(*args, **kwargs) | |
wrapper.launch = lambda: demo.launch() | |
return wrapper | |
def generate_response(prompt: str) -> str: | |
""" | |
Title: Super Tiny GPTQ V2 Model on CPU | |
Description: A Simple app to test out the potentials of small GPTQ LLM model. | |
Args: | |
prompt (str): A simple prompt. | |
Returns: | |
str: Simplified response. | |
""" | |
inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Move inputs to CPU | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=50, | |
temperature=0.7, | |
top_p=0.9 | |
) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# # Example usage | |
# prompt = "Explain quantum computing in simple terms." | |
# response = generate_response(prompt) | |
# print(response) | |
if __name__ == "__main__": | |
generate_response.launch() |