Spaces:
Running
Running
Rishi Desai
commited on
Commit
·
a446ad0
1
Parent(s):
c9dac35
using together ai now
Browse files- caption.py +34 -13
caption.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import base64
|
2 |
import io
|
3 |
import os
|
4 |
-
from
|
5 |
from PIL import Image
|
6 |
from dotenv import load_dotenv
|
7 |
|
@@ -41,7 +41,7 @@ Captioning Principles:
|
|
41 |
- Avoid mentioning real or fictional identities.
|
42 |
- Always prefix with the trigger word "tr1gger."
|
43 |
|
44 |
-
|
45 |
- tr1gger photorealistic, tactical vest and gloves, standing in profile, neutral expression, overcast lighting, side profile
|
46 |
- tr1gger 3D-rendered, hooded cloak with digital pattern, seated cross-legged, calm expression, low ambient lighting, front view
|
47 |
- tr1gger anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, soft daylight, three-quarter view
|
@@ -57,7 +57,13 @@ def caption_images(images):
|
|
57 |
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
58 |
image_strings.append(img_str)
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
captions = []
|
62 |
|
63 |
# Start a separate chat session for each image
|
@@ -65,31 +71,40 @@ def caption_images(images):
|
|
65 |
messages = [
|
66 |
{"role": "system", "content": get_prompt()},
|
67 |
{
|
68 |
-
"role": "user",
|
69 |
"content": [
|
70 |
-
{"type": "
|
71 |
-
{"type": "
|
72 |
]
|
73 |
}
|
74 |
]
|
75 |
|
76 |
-
# Request caption for the image
|
77 |
response = client.chat.completions.create(
|
78 |
-
model="
|
79 |
-
messages=messages
|
80 |
-
max_tokens=512 # Adjust max_tokens as needed
|
81 |
)
|
82 |
|
83 |
# Extract caption from the response
|
84 |
-
|
|
|
|
|
85 |
captions.append(caption)
|
86 |
|
87 |
return captions
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Example usage
|
90 |
if __name__ == "__main__":
|
91 |
-
if not os.environ.get("
|
92 |
-
print("Please update the
|
93 |
exit(1)
|
94 |
|
95 |
# Load images
|
@@ -100,3 +115,9 @@ if __name__ == "__main__":
|
|
100 |
captions = caption_images(images)
|
101 |
for i, caption in enumerate(captions):
|
102 |
print(f"Generated Caption for Image {i+1}: {caption}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import base64
|
2 |
import io
|
3 |
import os
|
4 |
+
from together import Together
|
5 |
from PIL import Image
|
6 |
from dotenv import load_dotenv
|
7 |
|
|
|
41 |
- Avoid mentioning real or fictional identities.
|
42 |
- Always prefix with the trigger word "tr1gger."
|
43 |
|
44 |
+
Examples:
|
45 |
- tr1gger photorealistic, tactical vest and gloves, standing in profile, neutral expression, overcast lighting, side profile
|
46 |
- tr1gger 3D-rendered, hooded cloak with digital pattern, seated cross-legged, calm expression, low ambient lighting, front view
|
47 |
- tr1gger anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, soft daylight, three-quarter view
|
|
|
57 |
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
58 |
image_strings.append(img_str)
|
59 |
|
60 |
+
# Retrieve the API key from the environment
|
61 |
+
api_key = os.environ.get("TOGETHER_API_KEY")
|
62 |
+
if not api_key:
|
63 |
+
raise ValueError("TOGETHER_API_KEY is not set in the environment.")
|
64 |
+
|
65 |
+
# Pass the API key to the Together client
|
66 |
+
client = Together(api_key=api_key)
|
67 |
captions = []
|
68 |
|
69 |
# Start a separate chat session for each image
|
|
|
71 |
messages = [
|
72 |
{"role": "system", "content": get_prompt()},
|
73 |
{
|
74 |
+
"role": "user",
|
75 |
"content": [
|
76 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}},
|
77 |
+
{"type": "text", "text": "Describe this image."}
|
78 |
]
|
79 |
}
|
80 |
]
|
81 |
|
82 |
+
# Request caption for the image using Llama 4 Maverick
|
83 |
response = client.chat.completions.create(
|
84 |
+
model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
85 |
+
messages=messages
|
|
|
86 |
)
|
87 |
|
88 |
# Extract caption from the response
|
89 |
+
full_response = response.choices[0].message.content.strip()
|
90 |
+
# Post-process to extract only the caption part
|
91 |
+
caption = next((line for line in full_response.splitlines() if line.startswith("tr1gger")), "")
|
92 |
captions.append(caption)
|
93 |
|
94 |
return captions
|
95 |
|
96 |
+
def extract_captions(file_path):
|
97 |
+
captions = []
|
98 |
+
with open(file_path, 'r') as file:
|
99 |
+
for line in file:
|
100 |
+
if line.startswith("tr1gger"):
|
101 |
+
captions.append(line.strip())
|
102 |
+
return captions
|
103 |
+
|
104 |
# Example usage
|
105 |
if __name__ == "__main__":
|
106 |
+
if not os.environ.get("TOGETHER_API_KEY"):
|
107 |
+
print("Please update the environment with your Together AI API key.")
|
108 |
exit(1)
|
109 |
|
110 |
# Load images
|
|
|
115 |
captions = caption_images(images)
|
116 |
for i, caption in enumerate(captions):
|
117 |
print(f"Generated Caption for Image {i+1}: {caption}")
|
118 |
+
|
119 |
+
# Extract captions from a file
|
120 |
+
file_path = 'post_girl/multiview_0.txt'
|
121 |
+
extracted_captions = extract_captions(file_path)
|
122 |
+
for caption in extracted_captions:
|
123 |
+
print(caption)
|