svjack commited on
Commit
cba9821
·
1 Parent(s): 7c49807

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chatglm_cpp
2
+ import gradio as gr
3
+ from pathlib import Path
4
+
5
+ model_file_path = "chatglm2-ggml-q4_0.bin"
6
+ chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))
7
+
8
+ examples = [
9
+ "如何弘扬中华传统文化?",
10
+ "How to promote Chinese traditional culture ?",
11
+ "如何学好历史?",
12
+ "写一段孔子与马克思的对话录。",
13
+ "如何进行经济建设?",
14
+ ]
15
+
16
+ def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
17
+ streamer = chatglm_llm.generate(prompt=instruction,
18
+ temperature=temperature,
19
+ top_p=top_p,top_k=top_k,max_length=max_new_tokens,
20
+ stream = True
21
+ )
22
+ response = ""
23
+ for new_text in streamer:
24
+ response += new_text
25
+ yield response
26
+
27
+
28
+ with gr.Blocks(
29
+ theme=gr.themes.Soft(),
30
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
31
+ ) as demo:
32
+ gr.Markdown(
33
+ """<h1><center> ChatGLM2 on CPU in CPP 🐒</center></h1>
34
+
35
+ This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
36
+ """
37
+ )
38
+ with gr.Row():
39
+ with gr.Column():
40
+ with gr.Row():
41
+ instruction = gr.Textbox(
42
+ placeholder="Enter your question or instruction here",
43
+ label="Question/Instruction",
44
+ elem_id="q-input",
45
+ )
46
+ with gr.Accordion("Advanced Options:", open=False):
47
+ with gr.Row():
48
+ with gr.Column():
49
+ with gr.Row():
50
+ temperature = gr.Slider(
51
+ label="Temperature",
52
+ value=0.8,
53
+ minimum=0.1,
54
+ maximum=1.0,
55
+ step=0.1,
56
+ interactive=True,
57
+ info="Higher values produce more diverse outputs",
58
+ )
59
+ with gr.Column():
60
+ with gr.Row():
61
+ top_p = gr.Slider(
62
+ label="Top-p (nucleus sampling)",
63
+ value=0.95,
64
+ minimum=0.0,
65
+ maximum=1.0,
66
+ step=0.01,
67
+ interactive=True,
68
+ info=(
69
+ "Sample from the smallest possible set of tokens whose cumulative probability "
70
+ "exceeds top_p. Set to 1 to disable and sample from all tokens."
71
+ ),
72
+ )
73
+ with gr.Column():
74
+ with gr.Row():
75
+ top_k = gr.Slider(
76
+ label="Top-k",
77
+ value=40,
78
+ minimum=5,
79
+ maximum=80,
80
+ step=1,
81
+ interactive=True,
82
+ info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
83
+ )
84
+ with gr.Column():
85
+ with gr.Row():
86
+ max_new_tokens = gr.Slider(
87
+ label="Maximum new tokens",
88
+ value=256,
89
+ minimum=0,
90
+ maximum=1024,
91
+ step=5,
92
+ interactive=True,
93
+ info="The maximum number of new tokens to generate",
94
+ )
95
+
96
+ with gr.Column():
97
+ with gr.Row():
98
+ seed = gr.Number(
99
+ label="Seed",
100
+ value=42,
101
+ interactive=True,
102
+ info="The seed to use for the generation",
103
+ precision=0
104
+ )
105
+ with gr.Row():
106
+ submit = gr.Button("Submit")
107
+ with gr.Row():
108
+ with gr.Box():
109
+ gr.Markdown("**ChatGLM2-6b**")
110
+ output_7b = gr.Markdown()
111
+
112
+ with gr.Row():
113
+ gr.Examples(
114
+ examples=examples,
115
+ inputs=[instruction],
116
+ cache_examples=False,
117
+ fn=process_stream,
118
+ outputs=output_7b,
119
+ )
120
+
121
+ submit.click(
122
+ process_stream,
123
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
124
+ outputs=output_7b,
125
+ )
126
+ instruction.submit(
127
+ process_stream,
128
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
129
+ outputs=output_7b,
130
+ )
131
+
132
+ demo.queue(max_size=4, concurrency_count=1).launch(debug=True)