EleutherAI
/

skip-transcoder-SmolLM2-135M-128x

Model card Files Files and versions Community

MrGonao commited on 23 days ago

Commit

651f514

verified ·

1 Parent(s): 0bc2fcc

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

single_128x/config.json +1 -0
single_128x/model.layers.0.mlp/cfg.json +1 -0
single_128x/model.layers.0.mlp/sae.safetensors +3 -0
single_128x/model.layers.1.mlp/cfg.json +1 -0
single_128x/model.layers.1.mlp/sae.safetensors +3 -0
single_128x/model.layers.10.mlp/cfg.json +1 -0
single_128x/model.layers.10.mlp/sae.safetensors +3 -0
single_128x/model.layers.11.mlp/cfg.json +1 -0
single_128x/model.layers.11.mlp/sae.safetensors +3 -0
single_128x/model.layers.12.mlp/cfg.json +1 -0
single_128x/model.layers.12.mlp/sae.safetensors +3 -0
single_128x/model.layers.13.mlp/cfg.json +1 -0
single_128x/model.layers.13.mlp/sae.safetensors +3 -0
single_128x/model.layers.14.mlp/cfg.json +1 -0
single_128x/model.layers.14.mlp/sae.safetensors +3 -0
single_128x/model.layers.15.mlp/cfg.json +1 -0
single_128x/model.layers.15.mlp/sae.safetensors +3 -0
single_128x/model.layers.16.mlp/cfg.json +1 -0
single_128x/model.layers.16.mlp/sae.safetensors +3 -0
single_128x/model.layers.17.mlp/cfg.json +1 -0
single_128x/model.layers.17.mlp/sae.safetensors +3 -0
single_128x/model.layers.18.mlp/cfg.json +1 -0
single_128x/model.layers.18.mlp/sae.safetensors +3 -0
single_128x/model.layers.19.mlp/cfg.json +1 -0
single_128x/model.layers.19.mlp/sae.safetensors +3 -0
single_128x/model.layers.2.mlp/cfg.json +1 -0
single_128x/model.layers.2.mlp/sae.safetensors +3 -0
single_128x/model.layers.20.mlp/cfg.json +1 -0
single_128x/model.layers.20.mlp/sae.safetensors +3 -0
single_128x/model.layers.20.mlp_old/cfg.json +1 -0
single_128x/model.layers.20.mlp_old/sae.safetensors +3 -0
single_128x/model.layers.21.mlp/cfg.json +1 -0
single_128x/model.layers.21.mlp/sae.safetensors +3 -0
single_128x/model.layers.21.mlp_old/cfg.json +1 -0
single_128x/model.layers.21.mlp_old/sae.safetensors +3 -0
single_128x/model.layers.22.mlp/cfg.json +1 -0
single_128x/model.layers.22.mlp/sae.safetensors +3 -0
single_128x/model.layers.22.mlp_old/cfg.json +1 -0
single_128x/model.layers.22.mlp_old/sae.safetensors +3 -0
single_128x/model.layers.23.mlp/cfg.json +1 -0
single_128x/model.layers.23.mlp/sae.safetensors +3 -0
single_128x/model.layers.24.mlp/cfg.json +1 -0
single_128x/model.layers.24.mlp/sae.safetensors +3 -0
single_128x/model.layers.25.mlp/cfg.json +1 -0
single_128x/model.layers.25.mlp/sae.safetensors +3 -0
single_128x/model.layers.26.mlp/cfg.json +1 -0
single_128x/model.layers.26.mlp/sae.safetensors +3 -0
single_128x/model.layers.27.mlp/cfg.json +1 -0
single_128x/model.layers.27.mlp/sae.safetensors +3 -0
single_128x/model.layers.28.mlp/cfg.json +1 -0

single_128x/config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"sae": {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false}, "batch_size": 8, "grad_acc_steps": 4, "micro_acc_steps": 1, "optimizer": "signum", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["model.layers.17.mlp", "model.layers.18.mlp", "model.layers.19.mlp", "model.layers.20.mlp", "model.layers.21.mlp", "model.layers.22.mlp", "model.layers.24.mlp", "model.layers.29.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "single_128x", "wandb_log_frequency": 1, "model": "HuggingFaceTB/SmolLM2-135M", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 500000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}

single_128x/model.layers.0.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.0.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92299d41037bb97d4819c4bb29d917db2ae7a8420bae7803994fd42a23235795
+size 341363360

single_128x/model.layers.1.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.1.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c75b340c8f2e77ee66073cef10e67e12e80f1d156ed165b7b1b4ecd7e0522242
+size 341363360

single_128x/model.layers.10.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.10.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4cb18c5d7da8cadcaeaa4e54ab89be53a6eacb6f73999df2b6726c17b2d440b
+size 341363360

single_128x/model.layers.11.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.11.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa865413536b0df777441d0c41a7d72d57ede77a3d24c838a9fcdb713064671e
+size 341363360

single_128x/model.layers.12.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.12.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:088dcf999f7c73f87b86fb48fb72aa74ac4937768fc0ef28f53a6b7c1c5d11b6
+size 341363360

single_128x/model.layers.13.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.13.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76d9722130bc757ac0c047bc44f4d86cbe25f48e8387d62451fd4dd2423cf012
+size 341363360

single_128x/model.layers.14.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.14.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbe02cf1183e889d8d6f61e5f9c65d25283e9183e8df8dcdabbc8f4f0e3d9948
+size 341363360

single_128x/model.layers.15.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.15.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5a5790d95e1e2f846143f9bf395a479e542b6aaaf6c98b100dadf439e040551
+size 341363360

single_128x/model.layers.16.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "d_in": 576}

single_128x/model.layers.16.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4207e91baeacb6094a314b995b29886fcbf82fbf90dfbe5effc90fe5bf3cfb
+size 341363360

single_128x/model.layers.17.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.17.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22389bfbea332b1d241ba12334a82e2b700d7df9cf466a83ec15f31be471fd5c
+size 341363360

single_128x/model.layers.18.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.18.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c28b7dbb9a081701cee94bf8c584c25f9032e998b39e0eaa8611b018da114615
+size 341363360

single_128x/model.layers.19.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.19.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63548cd5e1697979e13355c107127a5548c89033fc087ffd903bad7e9eb9b033
+size 341363360

single_128x/model.layers.2.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.2.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afa2ed5574d04b4b2ecfb53bcdc25650198d1117f9cbeaea089d244a094055b1
+size 341363360

single_128x/model.layers.20.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.20.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e51ecad8c9c1145f85f5887643d102ba807c7bb65c63a00f34b0e0b7519a8476
+size 341363360

single_128x/model.layers.20.mlp_old/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "d_in": 576}

single_128x/model.layers.20.mlp_old/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bbf6f1dc5377a17525fb999c4b10897c86ced205579c37121c8d6dc735b06ac
+size 341363360

single_128x/model.layers.21.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.21.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43fe17a75aeb346df081a2bd5d49ab16178623c04f02a99855d0b4aa84b7dd08
+size 341363360

single_128x/model.layers.21.mlp_old/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "d_in": 576}

single_128x/model.layers.21.mlp_old/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a89aa886e34339841f1d18aca73a4d07800d3628680f894ad5c322d924be86b0
+size 341363360

single_128x/model.layers.22.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.22.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f73dd558709203804e1b2526fc5cb67ab83ca0bbffaf29ad8322daba8e70def0
+size 341363360

single_128x/model.layers.22.mlp_old/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "d_in": 576}

single_128x/model.layers.22.mlp_old/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d176fb7a5a2688cad50a9c22cfcebb8cd1e0972639dc79f9e77a2fa49aed2750
+size 341363360

single_128x/model.layers.23.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.23.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1218d33e4b8bba5f4b067206a24693164208f6191f3885d1a86fd8ecbc0a0623
+size 341363360

single_128x/model.layers.24.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.24.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9703d9323baafaeab74d7357e6e2f3385b94c742bd873bcbeae009a8a7bb736d
+size 341363360

single_128x/model.layers.25.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "multi_layer": false, "lens": false, "d_in": 576}

single_128x/model.layers.25.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e56d01b7c5cd5e6ca031d5b3c43ecf34c3d184f8b9a7cdeb3d2fb86d7a2f745e
+size 341363360

single_128x/model.layers.26.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true,"d_in": 576}

single_128x/model.layers.26.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:430a72cf5fa68b97b887b93b289ee007353db785d04971fe64059edc02b703ab
+size 341363360

single_128x/model.layers.27.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "d_in": 576}

single_128x/model.layers.27.mlp/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8317d1b216779c8667832b55664fabfe08fab6c3444856ce096c980d1b3bddb1
+size 341363360

single_128x/model.layers.28.mlp/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"activation": "topk", "expansion_factor": 128, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": true, "transcode": true, "d_in": 576}