pookiefoof commited on
Commit
2c1c516
·
0 Parent(s):

Initial release

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ pipeline_tag: image-to-3d
4
+ tags:
5
+ - triposg
6
+ - 3d-generation
7
+ - rectified-flow
8
+ ---
9
+ # TripoSG - High-Fidelity 3D Shape Synthesis using Large-Scale Rectified Flow Models
10
+
11
+ TripoSG is a state-of-the-art image-to-3D generation foundation model that leverages large-scale rectified flow transformers to produce high-fidelity 3D shapes from single images.
12
+
13
+ ## Model Description
14
+
15
+ ### Model Architecture
16
+
17
+ TripoSG utilizes a novel architecture combining:
18
+ - Rectified Flow (RF) based Transformer for stable, linear trajectory modeling
19
+ - Advanced VAE with SDF-based representation and hybrid geometric supervision
20
+ - Cross-attention mechanism for image feature condition
21
+ - 1.5B parameters operating on 2048 latent tokens
22
+
23
+ ## Intended Uses
24
+
25
+ This model is designed for:
26
+ - Converting single images to high-quality 3D meshes
27
+ - Creative and design applications
28
+ - Gaming and VFX asset creation
29
+ - Prototyping and visualization
30
+
31
+ ## Requirements
32
+
33
+ - CUDA-capable GPU (>8GB VRAM)
34
+
35
+ ## Usage
36
+
37
+ For detailed usage instructions, please visit our [GitHub repository](https://github.com/VAST-AI-Research/TripoSG).
38
+
39
+ ## About
40
+
41
+ TripoSG is developed by [Tripo](https://www.tripo3d.ai), [VAST AI Research](https://github.com/orgs/VAST-AI-Research), pushing the boundaries of 3D Generative AI.
42
+ For more information:
43
+ - [GitHub Repository](https://github.com/VAST-AI-Research/TripoSG)
44
+ - [Paper](https://arxiv.org/abs/2502.06608)
45
+ - [Gradio Demo](https://huggingface.co/spaces/VAST-AI/TripoSG)
feature_extractor_dinov2/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.485,
13
+ 0.456,
14
+ 0.406
15
+ ],
16
+ "image_processor_type": "BitImageProcessor",
17
+ "image_std": [
18
+ 0.229,
19
+ 0.224,
20
+ 0.225
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 256
26
+ }
27
+ }
image_encoder_dinov2/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/dinov2-large",
3
+ "apply_layernorm": true,
4
+ "architectures": [
5
+ "Dinov2Model"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "drop_path_rate": 0.0,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 1024,
12
+ "image_size": 518,
13
+ "initializer_range": 0.02,
14
+ "layer_norm_eps": 1e-06,
15
+ "layerscale_value": 1.0,
16
+ "mlp_ratio": 4,
17
+ "model_type": "dinov2",
18
+ "num_attention_heads": 16,
19
+ "num_channels": 3,
20
+ "num_hidden_layers": 24,
21
+ "out_features": [
22
+ "stage24"
23
+ ],
24
+ "out_indices": [
25
+ 24
26
+ ],
27
+ "patch_size": 14,
28
+ "qkv_bias": true,
29
+ "reshape_hidden_states": true,
30
+ "stage_names": [
31
+ "stem",
32
+ "stage1",
33
+ "stage2",
34
+ "stage3",
35
+ "stage4",
36
+ "stage5",
37
+ "stage6",
38
+ "stage7",
39
+ "stage8",
40
+ "stage9",
41
+ "stage10",
42
+ "stage11",
43
+ "stage12",
44
+ "stage13",
45
+ "stage14",
46
+ "stage15",
47
+ "stage16",
48
+ "stage17",
49
+ "stage18",
50
+ "stage19",
51
+ "stage20",
52
+ "stage21",
53
+ "stage22",
54
+ "stage23",
55
+ "stage24"
56
+ ],
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.45.2",
59
+ "use_swiglu_ffn": false
60
+ }
image_encoder_dinov2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399fba97a95f22c36834418bc69373364a99af3a1153da1c0fb31db567c92e23
3
+ size 1217522888
model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "TripoSGPipeline",
3
+ "_diffusers_version": "0.30.3",
4
+ "feature_extractor_dinov2": [
5
+ "transformers",
6
+ "BitImageProcessor"
7
+ ],
8
+ "image_encoder_dinov2": [
9
+ "transformers",
10
+ "Dinov2Model"
11
+ ],
12
+ "scheduler": [
13
+ "triposg.schedulers.scheduling_rectified_flow",
14
+ "RectifiedFlowScheduler"
15
+ ],
16
+ "transformer": [
17
+ "triposg.models.transformers.triposg_transformer",
18
+ "TripoSGDiTModel"
19
+ ],
20
+ "vae": [
21
+ "triposg.models.autoencoders.autoencoder_kl_triposg",
22
+ "TripoSGVAEModel"
23
+ ]
24
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "RectifiedFlowScheduler",
3
+ "_diffusers_version": "0.30.3",
4
+ "num_train_timesteps": 1000,
5
+ "shift": 1,
6
+ "use_dynamic_shifting": false
7
+ }
transformer/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "TripoSGDiTModel",
3
+ "_diffusers_version": "0.30.3",
4
+ "cross_attention_dim": 1024,
5
+ "in_channels": 64,
6
+ "num_attention_heads": 16,
7
+ "num_layers": 21,
8
+ "width": 2048
9
+ }
transformer/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9192b5923f7b605b394192809aa2ceb73bf0f4009674d8e3b999b45bb97d4bf2
3
+ size 5758280104
vae/config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "Tripo2VAEModel",
3
+ "_diffusers_version": "0.30.3",
4
+ "embed_frequency": 8,
5
+ "embed_include_pi": false,
6
+ "embedding_type": "frequency",
7
+ "in_channels": 3,
8
+ "latent_channels": 64,
9
+ "num_attention_heads": 8,
10
+ "num_layers_decoder": 16,
11
+ "num_layers_encoder": 8,
12
+ "width_decoder": 1024,
13
+ "width_encoder": 512
14
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e667c24927a5a35e5f19fcb4c75890e9399aa966b6db8131d7df733a750c8b
3
+ size 970685468