File size: 4,210 Bytes
f17f776
 
 
 
 
b1e40ab
f17f776
 
 
 
 
 
b1e40ab
 
 
f17f776
 
 
 
 
 
 
 
 
 
 
 
 
b1e40ab
f17f776
 
 
 
 
 
 
 
 
 
 
 
 
b1e40ab
f17f776
 
 
 
 
 
 
 
 
b1e40ab
f17f776
 
 
 
 
 
 
 
b1e40ab
f17f776
 
 
b1e40ab
f17f776
 
 
 
 
b1e40ab
f17f776
 
 
 
 
 
b1e40ab
f17f776
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1e40ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f17f776
 
b1e40ab
 
 
f17f776
 
 
 
 
0aa5cd7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import inspect
from typing import get_type_hints, Callable, Any
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- Load Model and Tokenizer ---
model_id = "unsloth/SmolLM2-135M-Instruct-GGUF"
filename = "SmolLM2-135M-Instruct-Q8_0.gguf"

tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)

# --- System Prompt Template ---
SYSTEM_PROMPT = """You are a helpful AI assistant. Your job is to provide clear and concise responses based on the user's input.
Keep your answers straightforward and avoid unnecessary information."""

def parse_docstring(func):
    doc = inspect.getdoc(func)
    if not doc:
        return {"title": "Untitled", "description": ""}

    lines = doc.splitlines()
    title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled")
    description = "\n".join(line.strip() for line in lines if line.startswith("Description:"))
    description = description.replace("Description:", "").strip()

    return {"title": title, "description": description}


def gradio_app_with_docs(func: Callable) -> Callable:
    sig = inspect.signature(func)
    type_hints = get_type_hints(func)
    metadata = parse_docstring(func)

    """
    A decorator that automatically builds and launches a Gradio interface
    based on function type hints.
    Args:
        func: A callable with type-hinted parameters and return type.
    Returns:
        The wrapped function with a `.launch()` method to start the app.
    """

    def _map_type(t: type) -> gr.Component:
        if t == str:
            return gr.Textbox(label="Input")
        elif t == int:
            return gr.Number(precision=0)
        elif t == float:
            return gr.Number()
        elif t == bool:
            return gr.Checkbox()
        elif hasattr(t, "__origin__") and t.__origin__ == list:
            elem_type = t.__args__[0]
            if elem_type == str:
                return gr.Dropdown(choices=["Option1", "Option2"])
            else:
                raise ValueError(f"Unsupported list element type: {elem_type}")
        else:
            raise ValueError(f"Unsupported type: {t}")

    # Build inputs
    inputs = []
    for name, param in sig.parameters.items():
        if name == "self":
            continue
        param_type = type_hints.get(name, Any)
        component = _map_type(param_type)
        component.label = name.replace("_", " ").title()
        inputs.append(component)

    # Build outputs
    return_type = type_hints.get("return", Any)
    outputs = _map_type(return_type)

    # Wrap function with Gradio interface
    with gr.Blocks() as demo:
        gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
        gr.Interface(fn=func, inputs=inputs, outputs=outputs)

    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)

    wrapper.launch = lambda: demo.launch()
    return wrapper


@gradio_app_with_docs
def generate_response(prompt: str) -> str:
    """
    Title: Super Tiny GGUF Model on CPU
    Description: A Simple app to test out the potentials of small GGUF LLM model.
    Args:
        prompt (str): A simple prompt.
    Returns:
        str: Simplified response.
    """
    # Apply system prompt + user input
    # full_prompt = f"<|begin_of_text|>System: {SYSTEM_PROMPT}\nUser: {prompt}\nAssistant:"

    # inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu")
    
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt}
    ]
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
    )
    
    inputs = tokenizer([text], return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        # temperature=0.7,
        # top_p=0.9
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


if __name__ == "__main__":
    generate_response.launch()