Rishi Desai commited on
Commit
a446ad0
·
1 Parent(s): c9dac35

using together ai now

Browse files
Files changed (1) hide show
  1. caption.py +34 -13
caption.py CHANGED
@@ -1,7 +1,7 @@
1
  import base64
2
  import io
3
  import os
4
- from openai import OpenAI
5
  from PIL import Image
6
  from dotenv import load_dotenv
7
 
@@ -41,7 +41,7 @@ Captioning Principles:
41
  - Avoid mentioning real or fictional identities.
42
  - Always prefix with the trigger word "tr1gger."
43
 
44
- Updated Examples:
45
  - tr1gger photorealistic, tactical vest and gloves, standing in profile, neutral expression, overcast lighting, side profile
46
  - tr1gger 3D-rendered, hooded cloak with digital pattern, seated cross-legged, calm expression, low ambient lighting, front view
47
  - tr1gger anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, soft daylight, three-quarter view
@@ -57,7 +57,13 @@ def caption_images(images):
57
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
58
  image_strings.append(img_str)
59
 
60
- client = OpenAI()
 
 
 
 
 
 
61
  captions = []
62
 
63
  # Start a separate chat session for each image
@@ -65,31 +71,40 @@ def caption_images(images):
65
  messages = [
66
  {"role": "system", "content": get_prompt()},
67
  {
68
- "role": "user",
69
  "content": [
70
- {"type": "text", "text": "Caption this image according to the guidelines."},
71
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}
72
  ]
73
  }
74
  ]
75
 
76
- # Request caption for the image in a single chat
77
  response = client.chat.completions.create(
78
- model="gpt-4o",
79
- messages=messages,
80
- max_tokens=512 # Adjust max_tokens as needed
81
  )
82
 
83
  # Extract caption from the response
84
- caption = response.choices[0].message.content.strip()
 
 
85
  captions.append(caption)
86
 
87
  return captions
88
 
 
 
 
 
 
 
 
 
89
  # Example usage
90
  if __name__ == "__main__":
91
- if not os.environ.get("OPENAI_API_KEY"):
92
- print("Please update the .env file with your OpenAI API key.")
93
  exit(1)
94
 
95
  # Load images
@@ -100,3 +115,9 @@ if __name__ == "__main__":
100
  captions = caption_images(images)
101
  for i, caption in enumerate(captions):
102
  print(f"Generated Caption for Image {i+1}: {caption}")
 
 
 
 
 
 
 
1
  import base64
2
  import io
3
  import os
4
+ from together import Together
5
  from PIL import Image
6
  from dotenv import load_dotenv
7
 
 
41
  - Avoid mentioning real or fictional identities.
42
  - Always prefix with the trigger word "tr1gger."
43
 
44
+ Examples:
45
  - tr1gger photorealistic, tactical vest and gloves, standing in profile, neutral expression, overcast lighting, side profile
46
  - tr1gger 3D-rendered, hooded cloak with digital pattern, seated cross-legged, calm expression, low ambient lighting, front view
47
  - tr1gger anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, soft daylight, three-quarter view
 
57
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
58
  image_strings.append(img_str)
59
 
60
+ # Retrieve the API key from the environment
61
+ api_key = os.environ.get("TOGETHER_API_KEY")
62
+ if not api_key:
63
+ raise ValueError("TOGETHER_API_KEY is not set in the environment.")
64
+
65
+ # Pass the API key to the Together client
66
+ client = Together(api_key=api_key)
67
  captions = []
68
 
69
  # Start a separate chat session for each image
 
71
  messages = [
72
  {"role": "system", "content": get_prompt()},
73
  {
74
+ "role": "user",
75
  "content": [
76
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}},
77
+ {"type": "text", "text": "Describe this image."}
78
  ]
79
  }
80
  ]
81
 
82
+ # Request caption for the image using Llama 4 Maverick
83
  response = client.chat.completions.create(
84
+ model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
85
+ messages=messages
 
86
  )
87
 
88
  # Extract caption from the response
89
+ full_response = response.choices[0].message.content.strip()
90
+ # Post-process to extract only the caption part
91
+ caption = next((line for line in full_response.splitlines() if line.startswith("tr1gger")), "")
92
  captions.append(caption)
93
 
94
  return captions
95
 
96
+ def extract_captions(file_path):
97
+ captions = []
98
+ with open(file_path, 'r') as file:
99
+ for line in file:
100
+ if line.startswith("tr1gger"):
101
+ captions.append(line.strip())
102
+ return captions
103
+
104
  # Example usage
105
  if __name__ == "__main__":
106
+ if not os.environ.get("TOGETHER_API_KEY"):
107
+ print("Please update the environment with your Together AI API key.")
108
  exit(1)
109
 
110
  # Load images
 
115
  captions = caption_images(images)
116
  for i, caption in enumerate(captions):
117
  print(f"Generated Caption for Image {i+1}: {caption}")
118
+
119
+ # Extract captions from a file
120
+ file_path = 'post_girl/multiview_0.txt'
121
+ extracted_captions = extract_captions(file_path)
122
+ for caption in extracted_captions:
123
+ print(caption)