codeShare
/

JupyterNotebooks

Safetensors

Model card Files Files and versions Community

codeShare commited on Sep 14, 2024

Commit

a070d20

verified ·

1 Parent(s): 84d4abc

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +92 -109

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -3,7 +3,8 @@
   "nbformat_minor": 0,
   "metadata": {
     "colab": {
-      "provenance": []
     },
     "kernelspec": {
       "name": "python3",
@@ -11,7 +12,8 @@
     },
     "language_info": {
       "name": "python"
-    }
   },
   "cells": [
     {
@@ -47,15 +49,14 @@
         "NUM_SUFFIX = 32901\n",
         "\n",
         "PREFIX_ENC_VOCAB = ['encoded_prefix_to_girl',]\n",
-        "SUFFIX_ENC_VOCAB = [\n",
-        "    'from_-encoded_suffix',\n",
-        "    'a_-_encoded_suffix' ,\n",
-        "    'by_-encoded_suffix' ,\n",
-        "    'encoded_suffix-_like']\n",
         "\n",
         "# Make sure these match above results\n",
-        "NUM_PREFIX_LISTS = 1\n",
-        "NUM_SUFFIX_LISTS = 4\n",
         "#-----#\n",
         "\n",
         "\n",
@@ -155,10 +156,29 @@
       ],
       "metadata": {
         "id": "Ch9puvwKH1s3",
-        "collapsed": true
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
@@ -308,7 +328,8 @@
         "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
       ],
       "metadata": {
-        "id": "iWeFnT1gAx6A"
       },
       "execution_count": null,
       "outputs": []
@@ -1076,48 +1097,19 @@
     {
       "cell_type": "code",
       "source": [
-        "# @title Make your own text_encodings .db file for later use\n",
         "\n",
-        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
         "\n",
-        "from transformers import AutoTokenizer\n",
-        "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
-        "from transformers import  CLIPProcessor, CLIPModel\n",
-        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
-        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\").to(device)\n",
-        "\n",
-        "# Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('green_-encoded_suffix')\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  inputs = tokenizer(text = 'green '+get_suffix(index), padding=True, return_tensors=\"pt\").to(device)\n",
-        "  text_features = model.get_text_features(**inputs).to(device)\n",
-        "  d[f'{index}'] = text_features.to('cpu')\n",
-        "#----#\n",
-        "d.close() #close the file\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "9ZiTsF9jV0TV"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('encoded_suffix-_knee')\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  inputs = tokenizer(text = get_suffix(index)+' knee', padding=True, return_tensors=\"pt\").to(device)\n",
-        "  text_features = model.get_text_features(**inputs).to(device)\n",
-        "  d[f'{index}'] = text_features.to('cpu')\n",
-        "#----#\n",
-        "d.close() #close the file"
       ],
       "metadata": {
-        "id": "9h6Rw72Pd307"
       },
       "execution_count": null,
       "outputs": []
@@ -1125,56 +1117,53 @@
     {
       "cell_type": "code",
       "source": [
-        "# Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('encoded_suffix-_nose')\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  inputs = tokenizer(text = get_suffix(index)+' nose', padding=True, return_tensors=\"pt\").to(device)\n",
-        "  text_features = model.get_text_features(**inputs).to(device)\n",
-        "  d[f'{index}'] = text_features.to('cpu')\n",
-        "#----#\n",
-        "d.close() #close the file"
-      ],
-      "metadata": {
-        "id": "UG4M1-Tqd8Ly"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('encoded_suffix-_hair')\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  inputs = tokenizer(text = get_suffix(index)+' hair', padding=True, return_tensors=\"pt\").to(device)\n",
-        "  text_features = model.get_text_features(**inputs).to(device)\n",
-        "  d[f'{index}'] = text_features.to('cpu')\n",
-        "#----#\n",
-        "d.close() #close the file"
-      ],
-      "metadata": {
-        "id": "GxkmAz6-eJgY"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('encoded_suffix-_background')\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  inputs = tokenizer(text = get_suffix(index)+' background', padding=True, return_tensors=\"pt\").to(device)\n",
-        "  text_features = model.get_text_features(**inputs).to(device)\n",
-        "  d[f'{index}'] = text_features.to('cpu')\n",
-        "#----#\n",
-        "d.close() #close the file"
       ],
       "metadata": {
-        "id": "Sdi8Gw1BfL-0"
       },
       "execution_count": null,
       "outputs": []
@@ -1182,18 +1171,12 @@
     {
       "cell_type": "code",
       "source": [
-        "# Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('in_a_-encoded_suffix')\n",
-        "for index in range(NUM_SUFFIX):\n",
-        "  inputs = tokenizer(text = 'in a '+get_suffix(index), padding=True, return_tensors=\"pt\").to(device)\n",
-        "  text_features = model.get_text_features(**inputs).to(device)\n",
-        "  d[f'{index}'] = text_features.to('cpu')\n",
-        "#----#\n",
-        "d.close() #close the file"
       ],
       "metadata": {
-        "id": "FztUKiyegN2F"
       },
       "execution_count": null,
       "outputs": []

   "nbformat_minor": 0,
   "metadata": {
     "colab": {
+      "provenance": [],
+      "gpuType": "T4"
     },
     "kernelspec": {
       "name": "python3",
     },
     "language_info": {
       "name": "python"
+    },
+    "accelerator": "GPU"
   },
   "cells": [
     {
         "NUM_SUFFIX = 32901\n",
         "\n",
         "PREFIX_ENC_VOCAB = ['encoded_prefix_to_girl',]\n",
+        "SUFFIX_ENC_VOCAB = ['a_-_encoded_suffix' ,]\n",
+        "    #'from_-encoded_suffix',\n",
+        "    #'by_-encoded_suffix' ,\n",
+        "    #'encoded_suffix-_like']\n",
         "\n",
         "# Make sure these match above results\n",
+        "NUM_PREFIX_LISTS = len(PREFIX_ENC_VOCAB)\n",
+        "NUM_SUFFIX_LISTS = len(SUFFIX_ENC_VOCAB)\n",
         "#-----#\n",
         "\n",
         "\n",
       ],
       "metadata": {
         "id": "Ch9puvwKH1s3",
+        "collapsed": true,
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "42e8d455-ca0a-4c78-dba7-a32d9dee9b41"
       },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cloning into 'sd_tokens'...\n",
+            "remote: Enumerating objects: 99, done.\u001b[K\n",
+            "remote: Counting objects: 100% (96/96), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (96/96), done.\u001b[K\n",
+            "remote: Total 99 (delta 34), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
+            "Unpacking objects: 100% (99/99), 1.35 MiB | 1.60 MiB/s, done.\n",
+            "Filtering content: 100% (22/22), 2.47 GiB | 36.54 MiB/s, done.\n",
+            "/content/sd_tokens\n"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "code",
         "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
       ],
       "metadata": {
+        "id": "iWeFnT1gAx6A",
+        "cellView": "form"
       },
       "execution_count": null,
       "outputs": []
     {
       "cell_type": "code",
       "source": [
         "\n",
+        "# @title Import text-to-image-prompts .json files\n",
+        "%cd /content/\n",
+        "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
         "\n",
+        "#Initialize\n",
+        "import os\n",
+        "def my_mkdirs(folder):\n",
+        "  if os.path.exists(folder)==False:\n",
+        "    os.makedirs(folder)"
       ],
       "metadata": {
+        "id": "Qy51FFu8aVNA"
       },
       "execution_count": null,
       "outputs": []
     {
       "cell_type": "code",
       "source": [
+        "# @title Make your own text_encodings .db file for later use\n",
+        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+        "from transformers import AutoTokenizer\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
+        "from transformers import  CLIPProcessor, CLIPModel\n",
+        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
+        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\").to(device)\n",
+        "\n",
+        "#Import the vocab.json\n",
+        "import json\n",
+        "import pandas as pd\n",
+        "\n",
+        "my_mkdirs('/content/text_encodings/')\n",
+        "filename = ''\n",
+        "\n",
+        "for  file_index in range(34):\n",
+        "  if file_index <1: continue\n",
+        "  filename = f'🦜 fusion-t2i-prompt-features-{file_index}'\n",
+        "  #🦜 fusion-t2i-prompt-features-1.json\n",
+        "\n",
+        "  # Read suffix.json\n",
+        "  %cd /content/text-to-image-prompts/civitai-prompts/green/\n",
+        "  with open(filename + '.json', 'r') as f:\n",
+        "      data = json.load(f)\n",
+        "  _df = pd.DataFrame({'count': data})['count']\n",
+        "  prompts = {\n",
+        "      key : value for key, value in _df.items()\n",
+        "  }\n",
+        "  NUM_ITEMS = int(prompts[\"0\"])\n",
+        "  #------#\n",
+        "\n",
+        "  # Calculate text_encoding for .json file contents and results as .db file\n",
+        "\n",
+        "  %cd /content/text_encodings/\n",
+        "  import shelve\n",
+        "  d = shelve.open(filename)\n",
+        "  for index in range(NUM_ITEMS):\n",
+        "    inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
+        "    text_features = model.get_text_features(**inputs).to(device)\n",
+        "    text_features =  text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
+        "    d[f'{index}'] = text_features.to('cpu')\n",
+        "  #----#\n",
+        "  d.close() #close the file\n",
+        "\n"
       ],
       "metadata": {
+        "id": "9ZiTsF9jV0TV"
       },
       "execution_count": null,
       "outputs": []
     {
       "cell_type": "code",
       "source": [
+        "# @title Download the created text_encodings as .zip file\n",
+        "%cd /content/\n",
+        "!zip -r /content/text-encodings.zip /content/text_encodings"
       ],
       "metadata": {
+        "id": "gX-sHZPWj4Lt"
       },
       "execution_count": null,
       "outputs": []