codeShare commited on
Commit
fa20c7e
·
verified ·
1 Parent(s): fd7fc65

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +37 -14
sd_token_similarity_calculator.ipynb CHANGED
@@ -163,6 +163,9 @@
163
  "source": [
164
  "# @title Load/initialize values (new version - ignore this cell)\n",
165
  "#Imports\n",
 
 
 
166
  "import json , os , shelve , torch\n",
167
  "import pandas as pd\n",
168
  "#----#\n",
@@ -211,7 +214,7 @@
211
  " if _index <= 1 :\n",
212
  " _file_name = f'{value}'\n",
213
  " %cd {path_enc}\n",
214
- " _text_encodings = shelve.open(_file_name)\n",
215
  " #Store text_encodings for the header items\n",
216
  " text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
217
  " text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
@@ -229,8 +232,8 @@
229
  " _text_encodings.close() #close the text_encodings file\n",
230
  " file_index = file_index + 1\n",
231
  " #----------#\n",
232
- " RANGE = index\n",
233
- " return prompts , text_encodings , NUM_TOKENS\n",
234
  " #--------#\n",
235
  "\n",
236
  "#for key in prompts:\n",
@@ -240,10 +243,9 @@
240
  "#------#\n"
241
  ],
242
  "metadata": {
243
- "cellView": "form",
244
  "id": "rUXQ73IbonHY"
245
  },
246
- "execution_count": null,
247
  "outputs": []
248
  },
249
  {
@@ -254,10 +256,9 @@
254
  "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
255
  "#------#\n",
256
  "path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
257
- "prompts , text_encodings, RANGE = getPrompts(path)"
258
  ],
259
  "metadata": {
260
- "cellView": "form",
261
  "id": "ZMG4CThUAmwW"
262
  },
263
  "execution_count": null,
@@ -1180,13 +1181,30 @@
1180
  {
1181
  "cell_type": "code",
1182
  "source": [
1183
- "# @title Make your own text_encodings .db file for later use (using GPU is recommended)\n",
 
 
 
 
 
 
 
 
 
 
 
 
1184
  "\n",
1185
  "import json\n",
1186
  "import pandas as pd\n",
1187
  "import os\n",
1188
  "import shelve\n",
1189
  "import torch\n",
 
 
 
 
 
1190
  "\n",
1191
  "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
1192
  "from transformers import AutoTokenizer\n",
@@ -1200,7 +1218,9 @@
1200
  "my_mkdirs('/content/text_encodings/')\n",
1201
  "filename = ''\n",
1202
  "\n",
1203
- "for file_index in range(34 + 1):\n",
 
 
1204
  " if file_index <1: continue\n",
1205
  " filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
1206
  " #🦜 fusion-t2i-prompt-features-1.json\n",
@@ -1219,16 +1239,19 @@
1219
  " # Calculate text_encoding for .json file contents and results as .db file\n",
1220
  "\n",
1221
  " %cd /content/text_encodings/\n",
1222
- " import shelve\n",
1223
- " d = shelve.open(filename)\n",
1224
  " for index in range(NUM_ITEMS + 1):\n",
1225
  " inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
1226
  " text_features = model.get_text_features(**inputs).to(device)\n",
1227
  " text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
1228
- " d[f'{index}'] = text_features.to('cpu')\n",
 
1229
  " #----#\n",
1230
- " d.close() #close the file\n",
1231
- "\n"
 
 
 
1232
  ],
1233
  "metadata": {
1234
  "id": "9ZiTsF9jV0TV"
 
163
  "source": [
164
  "# @title Load/initialize values (new version - ignore this cell)\n",
165
  "#Imports\n",
166
+ "!pip install safetensors\n",
167
+ "from safetensors.torch import load_file\n",
168
+ "\n",
169
  "import json , os , shelve , torch\n",
170
  "import pandas as pd\n",
171
  "#----#\n",
 
214
  " if _index <= 1 :\n",
215
  " _file_name = f'{value}'\n",
216
  " %cd {path_enc}\n",
217
+ " _text_encodings = load_file(f'{_file_name}.safetensors')\n",
218
  " #Store text_encodings for the header items\n",
219
  " text_encodings[f'{index-1}'] = _text_encodings[f'{_index-1}']\n",
220
  " text_encodings[f'{index}'] = _text_encodings[f'{_index}']\n",
 
232
  " _text_encodings.close() #close the text_encodings file\n",
233
  " file_index = file_index + 1\n",
234
  " #----------#\n",
235
+ " NUM_ITEMS = index\n",
236
+ " return prompts , text_encodings , NUM_ITEMS\n",
237
  " #--------#\n",
238
  "\n",
239
  "#for key in prompts:\n",
 
243
  "#------#\n"
244
  ],
245
  "metadata": {
 
246
  "id": "rUXQ73IbonHY"
247
  },
248
+ "execution_count": 3,
249
  "outputs": []
250
  },
251
  {
 
256
  "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
257
  "#------#\n",
258
  "path = '/content/text-to-image-prompts/civitai-prompts/green'\n",
259
+ "prompts , text_encodings, NUM_ITEMS = getPrompts(path)"
260
  ],
261
  "metadata": {
 
262
  "id": "ZMG4CThUAmwW"
263
  },
264
  "execution_count": null,
 
1181
  {
1182
  "cell_type": "code",
1183
  "source": [
1184
+ "%cd /content/\n",
1185
+ "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts"
1186
+ ],
1187
+ "metadata": {
1188
+ "id": "GPAUFxZgPfrY"
1189
+ },
1190
+ "execution_count": null,
1191
+ "outputs": []
1192
+ },
1193
+ {
1194
+ "cell_type": "code",
1195
+ "source": [
1196
+ "# @title Make your own text_encodings .pt file for later use (using GPU is recommended to speed things up)\n",
1197
  "\n",
1198
  "import json\n",
1199
  "import pandas as pd\n",
1200
  "import os\n",
1201
  "import shelve\n",
1202
  "import torch\n",
1203
+ "from safetensors.torch import save_file\n",
1204
+ "\n",
1205
+ "def my_mkdirs(folder):\n",
1206
+ " if os.path.exists(folder)==False:\n",
1207
+ " os.makedirs(folder)\n",
1208
  "\n",
1209
  "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
1210
  "from transformers import AutoTokenizer\n",
 
1218
  "my_mkdirs('/content/text_encodings/')\n",
1219
  "filename = ''\n",
1220
  "\n",
1221
+ "NUM_FILES = 34\n",
1222
+ "\n",
1223
+ "for file_index in range(NUM_FILES + 1):\n",
1224
  " if file_index <1: continue\n",
1225
  " filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
1226
  " #🦜 fusion-t2i-prompt-features-1.json\n",
 
1239
  " # Calculate text_encoding for .json file contents and results as .db file\n",
1240
  "\n",
1241
  " %cd /content/text_encodings/\n",
1242
+ " text_encoding_dict = {}\n",
 
1243
  " for index in range(NUM_ITEMS + 1):\n",
1244
  " inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
1245
  " text_features = model.get_text_features(**inputs).to(device)\n",
1246
  " text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
1247
+ " text_encoding_dict[f'{index}'] = text_features.to('cpu')\n",
1248
+ " save_file(text_encoding_dict, f'{filename}.safetensors')\n",
1249
  " #----#\n",
1250
+ "\n",
1251
+ "#from safetensors.torch import load_file\n",
1252
+ "#%cd /content/text_encodings\n",
1253
+ "#loaded = load_file('🦜 fusion-t2i-prompt-features-1.safetensors')\n",
1254
+ "#print(loaded[\"325\"])"
1255
  ],
1256
  "metadata": {
1257
  "id": "9ZiTsF9jV0TV"