File size: 2,285 Bytes
eca00c9
34e9d48
 
 
9ff10ae
eca00c9
 
 
 
 
 
 
 
 
 
 
 
 
 
9ff10ae
eca00c9
 
 
 
 
 
 
d63c268
 
 
27ff1f5
c9d3334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27ff1f5
 
 
 
 
 
 
 
 
eca00c9
 
0f04f5f
27ff1f5
0f04f5f
27ff1f5
eca00c9
 
 
 
 
 
 
 
 
 
c9d3334
d63c268
 
 
 
eca00c9
 
 
c9d3334
eca00c9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# ------------------- LIBRARIES -------------------- #
import os, logging, torch, streamlit as st
from transformers import (
    AutoTokenizer, AutoModelForCausalLM)

# --------------------- HELPER --------------------- #
def C(text, color="yellow"):
    color_dict: dict = dict(
            red="\033[01;31m",
          green="\033[01;32m",
         yellow="\033[01;33m",
           blue="\033[01;34m",
        magenta="\033[01;35m",
           cyan="\033[01;36m",
    )
    color_dict[None] = "\033[0m"
    return (
        f"{color_dict.get(color, None)}"
        f"{text}{color_dict[None]}")

# ------------------ ENVIORNMENT ------------------- #
os.environ["HF_ENDPOINT"] = "https://huggingface.co"
device = ("cuda" 
    if torch.cuda.is_available() else "cpu")
logging.info(C("[INFO] "f"device = {device}"))

# ------------------ INITITALIZE ------------------- #
@st.cache(
        suppress_st_warning=True
)
def model_init():
    
    
    
    from transformers import GenerationConfig

    # generation_config, unused_kwargs = GenerationConfig.from_pretrained(
    #     "ckip-joint/bloom-1b1-zh", 
    #     max_new_tokens=200,
    #     return_unused_kwargs=True)
    
    
    
    
    
    
    tokenizer = AutoTokenizer.from_pretrained(
        "ckip-joint/bloom-1b1-zh")
    model = AutoModelForCausalLM.from_pretrained(
        "ckip-joint/bloom-1b1-zh",
        # Ref.: Eric, Thanks!
        # torch_dtype="auto", 
        # device_map="auto",
    # Ref. for `half`: Chan-Jan, Thanks!
    ).eval().to(device)
    st.balloons()
    logging.info(C("[INFO] "f"Model init success!"))
    return tokenizer, model

tokenizer, model = model_init()

try:
    # ===================== INPUT ====================== #
    prompt = st.text_input("Prompt: ")

    # =================== INFERENCE ==================== #
    if prompt:
        st.balloons()
        with torch.no_grad():
            [texts_out] = model.generate(
                **tokenizer(
                    prompt, return_tensors="pt",
                    
                ).to(device),
                max_new_tokens=200,
                )
        output_text = tokenizer.decode(texts_out)
        st.balloons()
        st.markdown(output_text)

except Exception as err:
    st.write(str(err))
    st.snow()