Upload sd_token_similarity_calculator.ipynb
Browse files
sd_token_similarity_calculator.ipynb
CHANGED
@@ -163,6 +163,9 @@
|
|
163 |
"source": [
|
164 |
"# @title Load/initialize values (new version - ignore this cell)\n",
|
165 |
"#Imports\n",
|
|
|
|
|
|
|
166 |
"import json , os , shelve , torch\n",
|
167 |
"import pandas as pd\n",
|
168 |
"#----#\n",
|
@@ -211,7 +214,7 @@
|
|
211 |
" if _index <= 1 :\n",
|
212 |
" _file_name = f'{value}'\n",
|
213 |
" %cd {path_enc}\n",
|
214 |
-
" _text_encodings =
|
215 |
" #Store text_encodings for the header items\n",
|
216 |
" text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
|
217 |
" text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
|
@@ -229,8 +232,8 @@
|
|
229 |
" _text_encodings.close() #close the text_encodings file\n",
|
230 |
" file_index = file_index + 1\n",
|
231 |
" #----------#\n",
|
232 |
-
"
|
233 |
-
" return prompts , text_encodings ,
|
234 |
" #--------#\n",
|
235 |
"\n",
|
236 |
"#for key in prompts:\n",
|
@@ -240,10 +243,9 @@
|
|
240 |
"#------#\n"
|
241 |
],
|
242 |
"metadata": {
|
243 |
-
"cellView": "form",
|
244 |
"id": "rUXQ73IbonHY"
|
245 |
},
|
246 |
-
"execution_count":
|
247 |
"outputs": []
|
248 |
},
|
249 |
{
|
@@ -254,10 +256,9 @@
|
|
254 |
"!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
|
255 |
"#------#\n",
|
256 |
"path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
|
257 |
-
"prompts , text_encodings,
|
258 |
],
|
259 |
"metadata": {
|
260 |
-
"cellView": "form",
|
261 |
"id": "ZMG4CThUAmwW"
|
262 |
},
|
263 |
"execution_count": null,
|
@@ -1180,13 +1181,30 @@
|
|
1180 |
{
|
1181 |
"cell_type": "code",
|
1182 |
"source": [
|
1183 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1184 |
"\n",
|
1185 |
"import json\n",
|
1186 |
"import pandas as pd\n",
|
1187 |
"import os\n",
|
1188 |
"import shelve\n",
|
1189 |
"import torch\n",
|
|
|
|
|
|
|
|
|
|
|
1190 |
"\n",
|
1191 |
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
1192 |
"from transformers import AutoTokenizer\n",
|
@@ -1200,7 +1218,9 @@
|
|
1200 |
"my_mkdirs('/content/text_encodings/')\n",
|
1201 |
"filename = ''\n",
|
1202 |
"\n",
|
1203 |
-
"
|
|
|
|
|
1204 |
" if file_index <1: continue\n",
|
1205 |
" filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
|
1206 |
" #🦜 fusion-t2i-prompt-features-1.json\n",
|
@@ -1219,16 +1239,19 @@
|
|
1219 |
" # Calculate text_encoding for .json file contents and results as .db file\n",
|
1220 |
"\n",
|
1221 |
" %cd /content/text_encodings/\n",
|
1222 |
-
"
|
1223 |
-
" d = shelve.open(filename)\n",
|
1224 |
" for index in range(NUM_ITEMS + 1):\n",
|
1225 |
" inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
|
1226 |
" text_features = model.get_text_features(**inputs).to(device)\n",
|
1227 |
" text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
|
1228 |
-
"
|
|
|
1229 |
" #----#\n",
|
1230 |
-
"
|
1231 |
-
"\n"
|
|
|
|
|
|
|
1232 |
],
|
1233 |
"metadata": {
|
1234 |
"id": "9ZiTsF9jV0TV"
|
|
|
163 |
"source": [
|
164 |
"# @title Load/initialize values (new version - ignore this cell)\n",
|
165 |
"#Imports\n",
|
166 |
+
"!pip install safetensors\n",
|
167 |
+
"from safetensors.torch import load_file\n",
|
168 |
+
"\n",
|
169 |
"import json , os , shelve , torch\n",
|
170 |
"import pandas as pd\n",
|
171 |
"#----#\n",
|
|
|
214 |
" if _index <= 1 :\n",
|
215 |
" _file_name = f'{value}'\n",
|
216 |
" %cd {path_enc}\n",
|
217 |
+
" _text_encodings = load_file(f'{_file_name}.safetensors')\n",
|
218 |
" #Store text_encodings for the header items\n",
|
219 |
" text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
|
220 |
" text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
|
|
|
232 |
" _text_encodings.close() #close the text_encodings file\n",
|
233 |
" file_index = file_index + 1\n",
|
234 |
" #----------#\n",
|
235 |
+
" NUM_ITEMS = index\n",
|
236 |
+
" return prompts , text_encodings , NUM_ITEMS\n",
|
237 |
" #--------#\n",
|
238 |
"\n",
|
239 |
"#for key in prompts:\n",
|
|
|
243 |
"#------#\n"
|
244 |
],
|
245 |
"metadata": {
|
|
|
246 |
"id": "rUXQ73IbonHY"
|
247 |
},
|
248 |
+
"execution_count": 3,
|
249 |
"outputs": []
|
250 |
},
|
251 |
{
|
|
|
256 |
"!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
|
257 |
"#------#\n",
|
258 |
"path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
|
259 |
+
"prompts , text_encodings, NUM_ITEMS = getPrompts(path)"
|
260 |
],
|
261 |
"metadata": {
|
|
|
262 |
"id": "ZMG4CThUAmwW"
|
263 |
},
|
264 |
"execution_count": null,
|
|
|
1181 |
{
|
1182 |
"cell_type": "code",
|
1183 |
"source": [
|
1184 |
+
"%cd /content/\n",
|
1185 |
+
"!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts"
|
1186 |
+
],
|
1187 |
+
"metadata": {
|
1188 |
+
"id": "GPAUFxZgPfrY"
|
1189 |
+
},
|
1190 |
+
"execution_count": null,
|
1191 |
+
"outputs": []
|
1192 |
+
},
|
1193 |
+
{
|
1194 |
+
"cell_type": "code",
|
1195 |
+
"source": [
|
1196 |
+
"# @title Make your own text_encodings .pt file for later use (using GPU is recommended to speed things up)\n",
|
1197 |
"\n",
|
1198 |
"import json\n",
|
1199 |
"import pandas as pd\n",
|
1200 |
"import os\n",
|
1201 |
"import shelve\n",
|
1202 |
"import torch\n",
|
1203 |
+
"from safetensors.torch import save_file\n",
|
1204 |
+
"\n",
|
1205 |
+
"def my_mkdirs(folder):\n",
|
1206 |
+
" if os.path.exists(folder)==False:\n",
|
1207 |
+
" os.makedirs(folder)\n",
|
1208 |
"\n",
|
1209 |
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
1210 |
"from transformers import AutoTokenizer\n",
|
|
|
1218 |
"my_mkdirs('/content/text_encodings/')\n",
|
1219 |
"filename = ''\n",
|
1220 |
"\n",
|
1221 |
+
"NUM_FILES = 34\n",
|
1222 |
+
"\n",
|
1223 |
+
"for file_index in range(NUM_FILES + 1):\n",
|
1224 |
" if file_index <1: continue\n",
|
1225 |
" filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
|
1226 |
" #🦜 fusion-t2i-prompt-features-1.json\n",
|
|
|
1239 |
" # Calculate text_encoding for .json file contents and results as .db file\n",
|
1240 |
"\n",
|
1241 |
" %cd /content/text_encodings/\n",
|
1242 |
+
" text_encoding_dict = {}\n",
|
|
|
1243 |
" for index in range(NUM_ITEMS + 1):\n",
|
1244 |
" inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
|
1245 |
" text_features = model.get_text_features(**inputs).to(device)\n",
|
1246 |
" text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
|
1247 |
+
" text_encoding_dict[f'{index}'] = text_features.to('cpu')\n",
|
1248 |
+
" save_file(text_encoding_dict, f'{filename}.safetensors')\n",
|
1249 |
" #----#\n",
|
1250 |
+
"\n",
|
1251 |
+
"#from safetensors.torch import load_file\n",
|
1252 |
+
"#%cd /content/text_encodings\n",
|
1253 |
+
"#loaded = load_file('🦜 fusion-t2i-prompt-features-1.safetensors')\n",
|
1254 |
+
"#print(loaded[\"325\"])"
|
1255 |
],
|
1256 |
"metadata": {
|
1257 |
"id": "9ZiTsF9jV0TV"
|