Daemontatox commited on
Commit
45c547e
·
verified ·
1 Parent(s): ecc54fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -42
app.py CHANGED
@@ -7,47 +7,46 @@ import gradio as gr
7
  from gradio import FileData
8
  import time
9
  import spaces
10
- ckpt ="Daemontatox/DocumentCogito"
 
11
  model = MllamaForConditionalGeneration.from_pretrained(ckpt,
12
  torch_dtype=torch.bfloat16).to("cuda")
13
  processor = AutoProcessor.from_pretrained(ckpt)
14
 
 
 
 
 
15
 
16
  @spaces.GPU()
17
- def bot_streaming(message, history, max_new_tokens=2048):
18
-
19
  txt = message["text"]
20
  ext_buffer = f"{txt}"
21
 
22
- messages= []
23
  images = []
24
 
25
-
26
- for i, msg in enumerate(history):
27
  if isinstance(msg[0], tuple):
28
  messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
29
  messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
30
  images.append(Image.open(msg[0][0]).convert("RGB"))
31
  elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
32
- # messages are already handled
33
  pass
34
- elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): # text only turn
35
  messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
36
  messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
37
 
38
- # add current message
39
  if len(message["files"]) == 1:
40
-
41
- if isinstance(message["files"][0], str): # examples
42
  image = Image.open(message["files"][0]).convert("RGB")
43
- else: # regular input
44
  image = Image.open(message["files"][0]["path"]).convert("RGB")
45
  images.append(image)
46
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
47
  else:
48
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
49
 
50
-
51
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
52
 
53
  if images == []:
@@ -56,8 +55,13 @@ def bot_streaming(message, history, max_new_tokens=2048):
56
  inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
57
  streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
58
 
59
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
60
- generated_text = ""
 
 
 
 
 
61
 
62
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
63
  thread.start()
@@ -69,32 +73,38 @@ def bot_streaming(message, history, max_new_tokens=2048):
69
  time.sleep(0.01)
70
  yield buffer
71
 
72
-
73
- demo = gr.ChatInterface(fn=bot_streaming, title="Document Analyzer", examples=[
74
- [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]},
75
- 200],
76
- [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]},
77
- 250],
78
- [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]},
79
- 250],
80
- [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]},
81
- 250],
82
- [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]},
83
- 250],
84
  ],
85
- textbox=gr.MultimodalTextbox(),
86
- additional_inputs = [gr.Slider(
87
- minimum=10,
88
- maximum=500,
89
- value=2048,
90
- step=10,
91
- label="Maximum number of new tokens to generate",
92
- )
93
- ],
94
- cache_examples=False,
95
- description="MllM ",
96
- stop_btn="Stop Generation",
97
- fill_height=True,
98
- multimodal=True)
99
-
 
 
 
 
 
 
 
 
 
100
  demo.launch(debug=True)
 
7
  from gradio import FileData
8
  import time
9
  import spaces
10
+
11
+ ckpt = "Daemontatox/DocumentCogito"
12
  model = MllamaForConditionalGeneration.from_pretrained(ckpt,
13
  torch_dtype=torch.bfloat16).to("cuda")
14
  processor = AutoProcessor.from_pretrained(ckpt)
15
 
16
+ SYSTEM_PROMPT = """You are a helpful AI assistant specialized in analyzing documents, images, and visual content.
17
+ Your responses should be clear, accurate, and focused on the specific details present in the provided materials.
18
+ When analyzing documents, pay attention to key information, formatting, and context.
19
+ For images, consider both obvious and subtle details that might be relevant to the user's query."""
20
 
21
  @spaces.GPU()
22
+ def bot_streaming(message, history, max_new_tokens=2048, temperature=0.7):
 
23
  txt = message["text"]
24
  ext_buffer = f"{txt}"
25
 
26
+ messages = [{"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]}]
27
  images = []
28
 
29
+ for i, msg in enumerate(history):
 
30
  if isinstance(msg[0], tuple):
31
  messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
32
  messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
33
  images.append(Image.open(msg[0][0]).convert("RGB"))
34
  elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
 
35
  pass
36
+ elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
37
  messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
38
  messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
39
 
 
40
  if len(message["files"]) == 1:
41
+ if isinstance(message["files"][0], str):
 
42
  image = Image.open(message["files"][0]).convert("RGB")
43
+ else:
44
  image = Image.open(message["files"][0]["path"]).convert("RGB")
45
  images.append(image)
46
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
47
  else:
48
  messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
49
 
 
50
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
51
 
52
  if images == []:
 
55
  inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
56
  streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
57
 
58
+ generation_kwargs = dict(
59
+ inputs,
60
+ streamer=streamer,
61
+ max_new_tokens=max_new_tokens,
62
+ temperature=temperature, # Add temperature parameter
63
+ do_sample=True, # Enable sampling for temperature to take effect
64
+ )
65
 
66
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
67
  thread.start()
 
73
  time.sleep(0.01)
74
  yield buffer
75
 
76
+ demo = gr.ChatInterface(
77
+ fn=bot_streaming,
78
+ title="Document Analyzer",
79
+ examples=[
80
+ [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]}, 200, 0.7],
81
+ [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]}, 250, 0.7],
82
+ [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]}, 250, 0.7],
83
+ [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]}, 250, 0.7],
84
+ [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]}, 250, 0.7],
 
 
 
85
  ],
86
+ textbox=gr.MultimodalTextbox(),
87
+ additional_inputs=[
88
+ gr.Slider(
89
+ minimum=10,
90
+ maximum=500,
91
+ value=2048,
92
+ step=10,
93
+ label="Maximum number of new tokens to generate",
94
+ ),
95
+ gr.Slider( # Add temperature slider
96
+ minimum=0.1,
97
+ maximum=2.0,
98
+ value=0.2,
99
+ step=0.1,
100
+ label="Temperature (0.1 = focused, 2.0 = creative)",
101
+ )
102
+ ],
103
+ cache_examples=False,
104
+ description="MllM with Temperature Control",
105
+ stop_btn="Stop Generation",
106
+ fill_height=True,
107
+ multimodal=True
108
+ )
109
+
110
  demo.launch(debug=True)