|
import inspect |
|
from typing import get_type_hints, Callable, Any |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
model_id = "unsloth/SmolLM2-135M-Instruct-GGUF" |
|
filename = "SmolLM2-135M-Instruct-Q8_0.gguf" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename) |
|
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename) |
|
|
|
|
|
SYSTEM_PROMPT = """You are a helpful AI assistant. Your job is to provide clear and concise responses based on the user's input. |
|
Keep your answers straightforward and avoid unnecessary information.""" |
|
|
|
def parse_docstring(func): |
|
doc = inspect.getdoc(func) |
|
if not doc: |
|
return {"title": "Untitled", "description": ""} |
|
|
|
lines = doc.splitlines() |
|
title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled") |
|
description = "\n".join(line.strip() for line in lines if line.startswith("Description:")) |
|
description = description.replace("Description:", "").strip() |
|
|
|
return {"title": title, "description": description} |
|
|
|
|
|
def gradio_app_with_docs(func: Callable) -> Callable: |
|
sig = inspect.signature(func) |
|
type_hints = get_type_hints(func) |
|
metadata = parse_docstring(func) |
|
|
|
""" |
|
A decorator that automatically builds and launches a Gradio interface |
|
based on function type hints. |
|
Args: |
|
func: A callable with type-hinted parameters and return type. |
|
Returns: |
|
The wrapped function with a `.launch()` method to start the app. |
|
""" |
|
|
|
def _map_type(t: type) -> gr.Component: |
|
if t == str: |
|
return gr.Textbox(label="Input") |
|
elif t == int: |
|
return gr.Number(precision=0) |
|
elif t == float: |
|
return gr.Number() |
|
elif t == bool: |
|
return gr.Checkbox() |
|
elif hasattr(t, "__origin__") and t.__origin__ == list: |
|
elem_type = t.__args__[0] |
|
if elem_type == str: |
|
return gr.Dropdown(choices=["Option1", "Option2"]) |
|
else: |
|
raise ValueError(f"Unsupported list element type: {elem_type}") |
|
else: |
|
raise ValueError(f"Unsupported type: {t}") |
|
|
|
|
|
inputs = [] |
|
for name, param in sig.parameters.items(): |
|
if name == "self": |
|
continue |
|
param_type = type_hints.get(name, Any) |
|
component = _map_type(param_type) |
|
component.label = name.replace("_", " ").title() |
|
inputs.append(component) |
|
|
|
|
|
return_type = type_hints.get("return", Any) |
|
outputs = _map_type(return_type) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(f"## {metadata['title']}\n{metadata['description']}") |
|
gr.Interface(fn=func, inputs=inputs, outputs=outputs) |
|
|
|
def wrapper(*args, **kwargs): |
|
return func(*args, **kwargs) |
|
|
|
wrapper.launch = lambda: demo.launch() |
|
return wrapper |
|
|
|
|
|
@gradio_app_with_docs |
|
def generate_response(prompt: str) -> str: |
|
""" |
|
Title: Super Tiny GGUF Model on CPU |
|
Description: A Simple app to test out the potentials of small GGUF LLM model. |
|
Args: |
|
prompt (str): A simple prompt. |
|
Returns: |
|
str: Simplified response. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
messages = [ |
|
{"role": "system", "content": SYSTEM_PROMPT}, |
|
{"role": "user", "content": prompt} |
|
] |
|
|
|
text = tokenizer.apply_chat_template( |
|
messages, |
|
tokenize=False, |
|
add_generation_prompt=True, |
|
enable_thinking=True |
|
) |
|
|
|
inputs = tokenizer([text], return_tensors="pt").to(model.device) |
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=100, |
|
|
|
|
|
) |
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
if __name__ == "__main__": |
|
generate_response.launch() |