GLM4-Z1-32B

Running on Zero

App Files Files Community

nikravan commited on Apr 16

Commit

466bfc4

verified ·

1 Parent(s): e210cea

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -25

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 from PIL import Image
 import gradio as gr
 import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import os
 from threading import Thread
@@ -34,15 +34,19 @@ h1 {
 }
 """
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 def extract_text(path):
     return open(path, 'r').read()
 def extract_pdf(path):
     doc = pymupdf.open(path)
     text = ""
@@ -50,7 +54,6 @@ def extract_pdf(path):
         text += page.get_text()
     return text
 def extract_docx(path):
     doc = docx.Document(path)
     data = []
@@ -59,7 +62,6 @@ def extract_docx(path):
     content = '\n\n'.join(data)
     return content
 def extract_pptx(path):
     prs = Presentation(path)
     text = ""
@@ -69,7 +71,6 @@ def extract_pptx(path):
                 text += shape.text + "\n"
     return text
 def mode_load(path):
     choice = ""
     file_type = path.split(".")[-1]
@@ -87,7 +88,6 @@ def mode_load(path):
         print(content[:100])
         return choice, content[:5000]
     elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
         content = Image.open(path).convert('RGB')
         choice = "image"
@@ -96,7 +96,6 @@ def mode_load(path):
     else:
         raise gr.Error("Oops, unsupported files.")
 @spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
@@ -104,7 +103,9 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
         MODEL_ID,
         torch_dtype=torch.bfloat16,
         low_cpu_mem_usage=True,
-        trust_remote_code=True
     )
     print(f'message is - {message}')
@@ -120,11 +121,9 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
             conversation.append({"role": "user", "content": format_msg})
     else:
         if len(history) == 0:
-            # raise gr.Error("Please upload an image first.")
             contents = None
             conversation.append({"role": "user", "content": message['text']})
         else:
-            # image = Image.open(history[0][0][0])
             for prompt, answer in history:
                 if answer is None:
                     prompt_files.append(prompt[0])
@@ -137,7 +136,6 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
                 choice = ""
                 conversation.append({"role": "user", "image": "", "content": message['text']})
             if choice == "image":
                 conversation.append({"role": "user", "image": contents, "content": message['text']})
             elif choice == "doc":
@@ -169,19 +167,13 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
             buffer += new_text
             yield buffer
-chatbot = gr.Chatbot(
-    #rtl=True,
-)
 chat_input = gr.MultimodalTextbox(
     interactive=True,
     placeholder="Enter message or upload a file ...",
     show_label=False,
-    #rtl=True,
 )
 EXAMPLES = [
     [{"text": "Write a poem about spring season in French Language", }],
     [{"text": "what does this chart mean?", "files": ["sales.png"]}],
@@ -195,8 +187,6 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
     gr.ChatInterface(
         fn=stream_chat,
         multimodal=True,
         textbox=chat_input,
         chatbot=chatbot,
         fill_height=True,
@@ -247,5 +237,4 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
     gr.Examples(EXAMPLES, [chat_input])
 if __name__ == "__main__":
-    demo.queue(api_open=False).launch(show_api=False, share=False, )#server_name="0.0.0.0", )

 from PIL import Image
 import gradio as gr
 import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
 import os
 from threading import Thread
 }
 """
+# Configure BitsAndBytes for 4-bit quantization
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 def extract_text(path):
     return open(path, 'r').read()
 def extract_pdf(path):
     doc = pymupdf.open(path)
     text = ""
         text += page.get_text()
     return text
 def extract_docx(path):
     doc = docx.Document(path)
     data = []
     content = '\n\n'.join(data)
     return content
 def extract_pptx(path):
     prs = Presentation(path)
     text = ""
                 text += shape.text + "\n"
     return text
 def mode_load(path):
     choice = ""
     file_type = path.split(".")[-1]
         print(content[:100])
         return choice, content[:5000]
     elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
         content = Image.open(path).convert('RGB')
         choice = "image"
     else:
         raise gr.Error("Oops, unsupported files.")
 @spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
         MODEL_ID,
         torch_dtype=torch.bfloat16,
         low_cpu_mem_usage=True,
+        trust_remote_code=True,
+        quantization_config=quantization_config,
+        device_map="auto"
     )
     print(f'message is - {message}')
             conversation.append({"role": "user", "content": format_msg})
     else:
         if len(history) == 0:
             contents = None
             conversation.append({"role": "user", "content": message['text']})
         else:
             for prompt, answer in history:
                 if answer is None:
                     prompt_files.append(prompt[0])
                 choice = ""
                 conversation.append({"role": "user", "image": "", "content": message['text']})
             if choice == "image":
                 conversation.append({"role": "user", "image": contents, "content": message['text']})
             elif choice == "doc":
             buffer += new_text
             yield buffer
+chatbot = gr.Chatbot()
 chat_input = gr.MultimodalTextbox(
     interactive=True,
     placeholder="Enter message or upload a file ...",
     show_label=False,
 )
 EXAMPLES = [
     [{"text": "Write a poem about spring season in French Language", }],
     [{"text": "what does this chart mean?", "files": ["sales.png"]}],
     gr.ChatInterface(
         fn=stream_chat,
         multimodal=True,
         textbox=chat_input,
         chatbot=chatbot,
         fill_height=True,
     gr.Examples(EXAMPLES, [chat_input])
 if __name__ == "__main__":
+    demo.queue(api_open=False).launch(show_api=False, share=False)