Spaces:

NikhilJoson
/

Multimodal_Chat_JanusPro

Running on Zero

App Files Files Community

NikhilJoson commited on Feb 25

Commit

b3020f6

verified ·

1 Parent(s): 55736f4

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -46

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ from transformers import AutoConfig, AutoModelForCausalLM
 from janus.models import MultiModalityCausalLM, VLChatProcessor
 from janus.utils.io import load_pil_images
 from PIL import Image
-from diffusers import FluxPipeline
 import numpy as np
 import os
@@ -162,18 +161,10 @@ def unified_chat(image, message, chat_history, seed, top_p, temperature, cfg_wei
     inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
-    outputs = vl_gpt.language_model.generate(
-        inputs_embeds=inputs_embeds,
-        attention_mask=prepare_inputs.attention_mask,
-        pad_token_id=tokenizer.eos_token_id,
-        bos_token_id=tokenizer.bos_token_id,
-        eos_token_id=tokenizer.eos_token_id,
-        max_new_tokens=512,
-        do_sample=False if temperature == 0 else True,
-        use_cache=True,
-        temperature=temperature,
-        top_p=top_p,
-    )
     answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
@@ -234,42 +225,35 @@ def unpack(dec, width, height, parallel_size=5):
 @torch.inference_mode()
 @spaces.GPU(duration=120)  # Specify a duration to avoid timeout
 def generate_image(prompt, seed=None, guidance=5, t2i_temperature=1.0, progress=gr.Progress(track_tqdm=True)):
-    # # Clear CUDA cache and avoid tracking gradients
-    # torch.cuda.empty_cache()
-    # # Set the seed for reproducible results
-    # if seed is not None:
-    #     torch.manual_seed(seed)
-    #     torch.cuda.manual_seed(seed)
-    #     np.random.seed(seed)
-    # width = 384
-    # height = 384
-    # parallel_size = 1
-    # with torch.no_grad():
-    #     messages = [{'role': '<|User|>', 'content': prompt},
-    #                 {'role': '<|Assistant|>', 'content': ''}]
-    #     text = vl_chat_processor.apply_sft_template_for_multi_turn_prompts(conversations=messages,
-    #                                                                sft_format=vl_chat_processor.sft_format,
-    #                                                                system_prompt='')
-    #     text = text + vl_chat_processor.image_start_tag
-    #     input_ids = torch.LongTensor(tokenizer.encode(text))
-    #     output, patches = generate(input_ids, width // 16 * 16, height // 16 * 16, cfg_weight=guidance,
-    #                                parallel_size=parallel_size, temperature=t2i_temperature)
-    #     images = unpack(patches, width // 16 * 16, height // 16 * 16, parallel_size=parallel_size)
-    #     stime = time.time()
-    #     ret_images = [image_upsample(Image.fromarray(images[i])) for i in range(parallel_size)]
-    #     print(f'upsample time: {time.time() - stime}')
-    #     return ret_images
-    # Depending on the variant being used, the pipeline call will slightly vary.
-    # Refer to the pipeline documentation for more details.
-    pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
-    pipe.to("cuda")
-    image = pipe(prompt=prompt, guidance_scale=guidance, height=768, width=768, num_inference_steps=16,).images[0]
-    return image
 @spaces.GPU(duration=60)

 from janus.models import MultiModalityCausalLM, VLChatProcessor
 from janus.utils.io import load_pil_images
 from PIL import Image
 import numpy as np
 import os
     inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
+    outputs = vl_gpt.language_model.generate(inputs_embeds=inputs_embeds, attention_mask=prepare_inputs.attention_mask,
+                                             pad_token_id=tokenizer.eos_token_id, bos_token_id=tokenizer.bos_token_id,
+                                             eos_token_id=tokenizer.eos_token_id, max_new_tokens=512, temperature=temperature, top_p=top_p,
+                                             do_sample=False if temperature == 0 else True, use_cache=True,)
     answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
 @torch.inference_mode()
 @spaces.GPU(duration=120)  # Specify a duration to avoid timeout
 def generate_image(prompt, seed=None, guidance=5, t2i_temperature=1.0, progress=gr.Progress(track_tqdm=True)):
+    # Clear CUDA cache and avoid tracking gradients
+    torch.cuda.empty_cache()
+    # Set the seed for reproducible results
+    if seed is not None:
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed(seed)
+        np.random.seed(seed)
+    width = 384
+    height = 384
+    parallel_size = 1
+    with torch.no_grad():
+        messages = [{'role': '<|User|>', 'content': prompt},
+                    {'role': '<|Assistant|>', 'content': ''}]
+        text = vl_chat_processor.apply_sft_template_for_multi_turn_prompts(conversations=messages,
+                                                                   sft_format=vl_chat_processor.sft_format,
+                                                                   system_prompt='')
+        text = text + vl_chat_processor.image_start_tag
+        input_ids = torch.LongTensor(tokenizer.encode(text))
+        output, patches = generate(input_ids, width // 16 * 16, height // 16 * 16, cfg_weight=guidance,
+                                   parallel_size=parallel_size, temperature=t2i_temperature)
+        images = unpack(patches, width // 16 * 16, height // 16 * 16, parallel_size=parallel_size)
+        stime = time.time()
+        ret_images = [image_upsample(Image.fromarray(images[i])) for i in range(parallel_size)]
+        print(f'upsample time: {time.time() - stime}')
+        return ret_images
 @spaces.GPU(duration=60)