Add new SentenceTransformer model

Browse files

Files changed (12) hide show

.gitattributes +1 -0
1_Pooling/config.json +10 -0
README.md +1192 -0
config.json +49 -0
config_sentence_transformers.json +10 -0
configuration.py +145 -0
model.safetensors +3 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +3 -0
tokenizer_config.json +62 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,1192 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:583058
+- loss:MultipleNegativesRankingLoss
+base_model: Alibaba-NLP/gte-multilingual-base
+widget:
+- source_sentence: 'Pre-Emphasis (PE)
+    A pre-emphasis filter is applied to the framed offset-free input signal:
+    )1
+    ('
+  sentences:
+  - 'Windowing (W)
+    A Hamming window of length N is applied to the output of the pre-emphasis block:
+    (
+    )
+    N
+    n
+    n
+    s
+    N
+    n
+    n
+    s
+    pe
+    w
+    ≤
+    ≤
+    ×
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    −
+    −
+    ×
+    −
+    ='
+  - 'Group or broadcast call, called mobile stations (GSM only)
+    Within each set of voice group call or voice broadcast call attributes stored
+    in the GCR as defined in 3GPP TS 43.068
+    and 3GPP TS 43.069, respectively, a priority level is included if eMLPP is applied.
+    The priority level will be provided
+    by the GCR to the MSC together with the call attributes.
+    The priority level shall be indicated together with the related notification messages
+    and treated in the mobile station as
+    defined in 3GPP TS 43.0'
+  - 'Description of the access technology indicator mechanism
+    This clause describes the mechanisms that can be employed to indicate access technology
+    specific dependencies in a
+    multi-access technology environment.
+    There are cases where toolkit applications need to know which access technology
+    the terminal is currently in so that it
+    can issue access technology dependent commands as well as determine that the response
+    to a particular command is
+    technology dependent. Setting up the event, ACCESS TECHNOL'
+- source_sentence: 'Distribution of DL delay between NG-RAN and UE
+    a) This measurement provides the distribution of DL packet delay between NG-RAN
+    and UE, which is the delay
+    incurred in NG-RAN (including the delay at gNB-CU-UP, on F1-U and on gNB-DU) and
+    the delay over Uu
+    interface. This measurement is split into subcounters per 5QI and subcounters
+    per S-NSSAI.
+    b) DER (n=1).
+    ETSI
+    ETSI TS 128 552 V16.18.0 (2024-08)'
+  sentences:
+  - 'Distribution of UL delay between NG-RAN and UE
+    a) This measurement provides the distribution of UL packet delay between NG-RAN
+    and UE, which is the delay
+    incurred in NG-RAN (including the delay at gNB-CU-UP, on F1-U and on gNB-DU) and
+    the delay over Uu
+    interface. This measurement is split into subcounters per 5QI and subcounters
+    per S-NSSAI.
+    b) DER (n=1).
+    c) The measurement is obtained by the following method:
+    The gNB performs the GTP PDU packet delay measurement for QoS monitoring per the
+    GTP '
+  - 'Subscriber data
+    Subscription to MExE services shall be logically separate to subscription of network
+    services. A subscriber may have a
+    MExE subscription to multiple MExE service providers. It may also be possible
+    for the subscriber to interrogate such
+    subscription registration (with a suitable means of authorisation), depending
+    on PLMN support.'
+  - 'MSC for LMU Control
+    When a control message has to be routed to an LMU from an SMLC, the SMLC addresses
+    the serving MSC for the
+    LMU using an E.164 address.
+    ETSI
+    ETSI TS 129 002 V10.6.0 (2012-04)'
+- source_sentence: 'Enter SMS Block Mode Protocol +CESP
+    Table 3.2.4-1: +CESP Action Command Syntax
+    Command
+    Possible response(s)
+    +CESP
+    +CESP=?
+    Description
+    Execution command sets the TA in SMS block protocol mode. The TA shall return
+    OK (or 0) to confirm acceptance of
+    the command prior to entering the block mode (see clause 2.1.1). The final result
+    code OK (or 0) shall be returned when
+    the block mode is exited.
+    NOTE:
+    Commands following +CESP in the AT command line must not be processed by the TA.
+    Implementation
+    Ma'
+  sentences:
+  - 'SGSN
+    To support NBIFOM, the SGSN needs to be capable to:
+    ETSI
+    ETSI TS 123 161 V14.0.0 (2017-05)'
+  - 'Message Service Failure Result Code +CMS ERROR
+    Final result code +CMS ERROR: <err> indicates an error related to mobile equipment
+    or network. The operation is
+    similar to ERROR final result code. None of the following commands in the same
+    command line is executed. Neither
+    ERROR nor OK final result code shall be returned. ERROR is returned normally when
+    error is related to syntax or invalid
+    parameters.
+    Defined Values
+    <err> values used by common messaging commands:'
+  - 'C
+    C
+    -
+    -
+    P
+    Service Priority Level'
+- source_sentence: 'Definition
+    Cell synchronization accuracy is defined as the maximum deviation in frame start
+    times between any pair of cells on the
+    same frequency that have overlapping coverage areas.'
+  sentences:
+  - 'Minimum requirements
+    The cell synchronization accuracy shall be better than or equal to 3μs.'
+  - "Subsequent Inter-MSC Handover to third MSC\nWhen a Mobile Station is being handed\
+    \ over to a third MSC, the procedure (described in GSM 03.09)\ndoes require one\
+    \ specific interworking case in MSC-A (figure 20) between E-Interface from MSC-B\
+    \ and E-\nInterface from MSC-B' other than the combination of the ones described\
+    \ in the chapter 4.5.1 and 4.5.2.\n%66\x10$\x03\x03\x03\x03\x03\x0306&\x10%\x03\
+    \x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x0306&\x10$\x03\
+    \x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x0306&\x10%\n_\x03\x03\x03\x03\x03\x03\
+    \x03\x03\x03\x03\x03\x03_\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\
+    \x03\x03\x03\x03\x03\x03\x03\x03_\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\
+    \x03\x03\x03\x03_\n_+$1'29(5\x03\x03\x03\x03_\x03\x03\x03\x03\x03\x03\x03\x03\x03\
+    \x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03_\x03\x03\x03\x03"
+  - 'DL Total PRB Usage
+    a) This measurement provides the total usage (in percentage) of physical resource
+    blocks (PRBs) on the downlink
+    for any purpose.
+    b) SI
+    c) This measurement is obtained as:
+    
+    
+    
+    
+    ∗
+    ='
+- source_sentence: Carrier aggregation measurement accuracy
+  sentences:
+  - 'PUCCH / PUSCH / SRS time mask
+    The PUCCH/PUSCH/SRS time mask defines the observation period between sounding
+    reference symbol (SRS) and an
+    adjacent PUSCH/PUCCH symbol and subsequent sub-frame.
+    There are no additional requirements on UE transmit power beyond that which is
+    required in subclause 6.2.2 and
+    subclause 6.6.2.3
+    ETSI
+    ETSI TS 136 101 V9.16.0 (2013-07)'
+  - 'Reference Signal Time Difference (RSTD) Measurement Accuracy
+    Requirements for Carrier Aggregation
+    A.8
+    UE Measurements Procedures
+    A.9
+    Measurement Performance Requirements
+    NOTE:
+    Only requirements and test cases in this table defined for inter-band carrier
+    aggregation shall apply.
+    ETSI
+    ETSI TS 136 307 V10.17.0 (2016-01)'
+  - 'Operator control
+    Three general architectures are candidates to offer energy savings functionalities:
+    Distributed, NM-Centralized, EM-Centralized as defined in TS 32.500 [6].
+    Energy savings in cells can be initiated in several different ways. Some of the
+    mechanisms are:
+    For NM-centralized architecture
+    -
+    IRPManager instructs the cells to move to energySaving state (e.g. according to
+    a schedule determined by
+    network statistics) , configures trigger points (e.g. load threshold crossing)
+    when it want'
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on Alibaba-NLP/gte-multilingual-base
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [Alibaba-NLP/gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) <!-- at revision 9fdd4ee8bba0e2808a34e0e739576f6740d2b225 -->
+- **Maximum Sequence Length:** 8192 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("lucian-li/my_new_model")
+# Run inference
+sentences = [
+    'Carrier aggregation measurement accuracy',
+    'Reference Signal Time Difference (RSTD) Measurement Accuracy\nRequirements for Carrier Aggregation\nA.8\nUE Measurements Procedures\nA.9\nMeasurement Performance Requirements\nNOTE:\nOnly requirements and test cases in this table defined for inter-band carrier aggregation shall apply.\n\n\nETSI\nETSI TS 136 307 V10.17.0 (2016-01)',
+    'Operator control\nThree general architectures are candidates to offer energy savings functionalities:\nDistributed, NM-Centralized, EM-Centralized as defined in TS 32.500 [6].\nEnergy savings in cells can be initiated in several different ways. Some of the mechanisms are:\nFor NM-centralized architecture\n-\nIRPManager instructs the cells to move to energySaving state (e.g. according to a schedule determined by\nnetwork statistics) , configures trigger points (e.g. load threshold crossing) when it want',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 583,058 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                             | positive                                                                           |
+  |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                             |
+  | details | <ul><li>min: 7 tokens</li><li>mean: 85.73 tokens</li><li>max: 229 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 85.86 tokens</li><li>max: 229 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                                                                                                   | positive                                                                                                                                                                 |
+  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>Triggering Optimization Function (TG_F)<br>This functional bloc supports the following functions: [SO2], [SO3].</code>                                             | <code>Optimization Fallback Function (O_FB_F)<br>This functional bloc supports the following functions: [SO7], [SO9], [SO10].</code>                                     |
+  | <code>Optimization Fallback Function (O_FB_F)<br>This functional bloc supports the following functions: [SO7], [SO9], [SO10].</code>                                     | <code>Self-Optimization Progress Update Function (SO_PGS_UF)<br>This function updates the self-optimization progress and important events to the operator: [SO11]</code> |
+  | <code>Self-Optimization Progress Update Function (SO_PGS_UF)<br>This function updates the self-optimization progress and important events to the operator: [SO11]</code> | <code>NRM IRP Update Function (NRM_UF)<br>This function updates the E-UTRAN and EPC NRM IRP with the optimization modification if needed.</code>                         |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 11
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 11
+- `per_device_eval_batch_size`: 8
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `tp_size`: 0
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+<details><summary>Click to expand</summary>
+| Epoch  | Step  | Training Loss |
+|:------:|:-----:|:-------------:|
+| 0.0019 | 100   | 0.8198        |
+| 0.0038 | 200   | 0.7651        |
+| 0.0057 | 300   | 0.6659        |
+| 0.0075 | 400   | 0.6404        |
+| 0.0094 | 500   | 0.5638        |
+| 0.0113 | 600   | 0.5184        |
+| 0.0132 | 700   | 0.448         |
+| 0.0151 | 800   | 0.4464        |
+| 0.0170 | 900   | 0.3461        |
+| 0.0189 | 1000  | 0.3731        |
+| 0.0208 | 1100  | 0.343         |
+| 0.0226 | 1200  | 0.3557        |
+| 0.0245 | 1300  | 0.3623        |
+| 0.0264 | 1400  | 0.2941        |
+| 0.0283 | 1500  | 0.3153        |
+| 0.0302 | 1600  | 0.2724        |
+| 0.0321 | 1700  | 0.2702        |
+| 0.0340 | 1800  | 0.2934        |
+| 0.0358 | 1900  | 0.2255        |
+| 0.0377 | 2000  | 0.2519        |
+| 0.0396 | 2100  | 0.2424        |
+| 0.0415 | 2200  | 0.1883        |
+| 0.0434 | 2300  | 0.2428        |
+| 0.0453 | 2400  | 0.2212        |
+| 0.0472 | 2500  | 0.1862        |
+| 0.0491 | 2600  | 0.2451        |
+| 0.0509 | 2700  | 0.2336        |
+| 0.0528 | 2800  | 0.225         |
+| 0.0547 | 2900  | 0.2154        |
+| 0.0566 | 3000  | 0.1907        |
+| 0.0585 | 3100  | 0.2514        |
+| 0.0604 | 3200  | 0.2082        |
+| 0.0623 | 3300  | 0.2076        |
+| 0.0641 | 3400  | 0.1818        |
+| 0.0660 | 3500  | 0.1688        |
+| 0.0679 | 3600  | 0.2261        |
+| 0.0698 | 3700  | 0.2108        |
+| 0.0717 | 3800  | 0.1732        |
+| 0.0736 | 3900  | 0.1764        |
+| 0.0755 | 4000  | 0.1481        |
+| 0.0773 | 4100  | 0.1687        |
+| 0.0792 | 4200  | 0.1897        |
+| 0.0811 | 4300  | 0.1685        |
+| 0.0830 | 4400  | 0.1915        |
+| 0.0849 | 4500  | 0.2013        |
+| 0.0868 | 4600  | 0.1701        |
+| 0.0887 | 4700  | 0.2006        |
+| 0.0906 | 4800  | 0.2006        |
+| 0.0924 | 4900  | 0.1617        |
+| 0.0943 | 5000  | 0.1406        |
+| 0.0962 | 5100  | 0.1456        |
+| 0.0981 | 5200  | 0.1703        |
+| 0.1000 | 5300  | 0.1464        |
+| 0.1019 | 5400  | 0.1803        |
+| 0.1038 | 5500  | 0.1346        |
+| 0.1056 | 5600  | 0.134         |
+| 0.1075 | 5700  | 0.1567        |
+| 0.1094 | 5800  | 0.163         |
+| 0.1113 | 5900  | 0.1544        |
+| 0.1132 | 6000  | 0.1648        |
+| 0.1151 | 6100  | 0.1505        |
+| 0.1170 | 6200  | 0.1231        |
+| 0.1189 | 6300  | 0.1591        |
+| 0.1207 | 6400  | 0.1533        |
+| 0.1226 | 6500  | 0.1376        |
+| 0.1245 | 6600  | 0.1473        |
+| 0.1264 | 6700  | 0.1405        |
+| 0.1283 | 6800  | 0.141         |
+| 0.1302 | 6900  | 0.1105        |
+| 0.1321 | 7000  | 0.1712        |
+| 0.1339 | 7100  | 0.1534        |
+| 0.1358 | 7200  | 0.1578        |
+| 0.1377 | 7300  | 0.1101        |
+| 0.1396 | 7400  | 0.128         |
+| 0.1415 | 7500  | 0.1679        |
+| 0.1434 | 7600  | 0.1592        |
+| 0.1453 | 7700  | 0.1383        |
+| 0.1472 | 7800  | 0.1274        |
+| 0.1490 | 7900  | 0.1616        |
+| 0.1509 | 8000  | 0.1617        |
+| 0.1528 | 8100  | 0.1361        |
+| 0.1547 | 8200  | 0.1268        |
+| 0.1566 | 8300  | 0.1286        |
+| 0.1585 | 8400  | 0.1253        |
+| 0.1604 | 8500  | 0.1157        |
+| 0.1622 | 8600  | 0.1499        |
+| 0.1641 | 8700  | 0.1398        |
+| 0.1660 | 8800  | 0.1188        |
+| 0.1679 | 8900  | 0.1103        |
+| 0.1698 | 9000  | 0.1217        |
+| 0.1717 | 9100  | 0.1144        |
+| 0.1736 | 9200  | 0.1203        |
+| 0.1755 | 9300  | 0.1074        |
+| 0.1773 | 9400  | 0.1145        |
+| 0.1792 | 9500  | 0.1035        |
+| 0.1811 | 9600  | 0.1406        |
+| 0.1830 | 9700  | 0.1465        |
+| 0.1849 | 9800  | 0.1169        |
+| 0.1868 | 9900  | 0.1115        |
+| 0.1887 | 10000 | 0.1207        |
+| 0.1905 | 10100 | 0.1191        |
+| 0.1924 | 10200 | 0.1099        |
+| 0.1943 | 10300 | 0.1309        |
+| 0.1962 | 10400 | 0.1092        |
+| 0.1981 | 10500 | 0.1075        |
+| 0.2000 | 10600 | 0.1174        |
+| 0.2019 | 10700 | 0.1103        |
+| 0.2038 | 10800 | 0.1077        |
+| 0.2056 | 10900 | 0.0844        |
+| 0.2075 | 11000 | 0.1093        |
+| 0.2094 | 11100 | 0.1428        |
+| 0.2113 | 11200 | 0.0928        |
+| 0.2132 | 11300 | 0.1039        |
+| 0.2151 | 11400 | 0.1436        |
+| 0.2170 | 11500 | 0.1197        |
+| 0.2188 | 11600 | 0.1249        |
+| 0.2207 | 11700 | 0.0856        |
+| 0.2226 | 11800 | 0.1126        |
+| 0.2245 | 11900 | 0.1028        |
+| 0.2264 | 12000 | 0.0988        |
+| 0.2283 | 12100 | 0.1031        |
+| 0.2302 | 12200 | 0.101         |
+| 0.2320 | 12300 | 0.1188        |
+| 0.2339 | 12400 | 0.0908        |
+| 0.2358 | 12500 | 0.069         |
+| 0.2377 | 12600 | 0.1099        |
+| 0.2396 | 12700 | 0.1227        |
+| 0.2415 | 12800 | 0.0794        |
+| 0.2434 | 12900 | 0.0969        |
+| 0.2453 | 13000 | 0.0864        |
+| 0.2471 | 13100 | 0.1193        |
+| 0.2490 | 13200 | 0.0824        |
+| 0.2509 | 13300 | 0.12          |
+| 0.2528 | 13400 | 0.0928        |
+| 0.2547 | 13500 | 0.1126        |
+| 0.2566 | 13600 | 0.0912        |
+| 0.2585 | 13700 | 0.1126        |
+| 0.2603 | 13800 | 0.078         |
+| 0.2622 | 13900 | 0.0715        |
+| 0.2641 | 14000 | 0.1095        |
+| 0.2660 | 14100 | 0.089         |
+| 0.2679 | 14200 | 0.0926        |
+| 0.2698 | 14300 | 0.086         |
+| 0.2717 | 14400 | 0.1115        |
+| 0.2736 | 14500 | 0.0996        |
+| 0.2754 | 14600 | 0.1014        |
+| 0.2773 | 14700 | 0.1033        |
+| 0.2792 | 14800 | 0.0732        |
+| 0.2811 | 14900 | 0.0994        |
+| 0.2830 | 15000 | 0.0872        |
+| 0.2849 | 15100 | 0.0923        |
+| 0.2868 | 15200 | 0.111         |
+| 0.2886 | 15300 | 0.0891        |
+| 0.2905 | 15400 | 0.0868        |
+| 0.2924 | 15500 | 0.0773        |
+| 0.2943 | 15600 | 0.0918        |
+| 0.2962 | 15700 | 0.0726        |
+| 0.2981 | 15800 | 0.0951        |
+| 0.3000 | 15900 | 0.0835        |
+| 0.3019 | 16000 | 0.083         |
+| 0.3037 | 16100 | 0.095         |
+| 0.3056 | 16200 | 0.0722        |
+| 0.3075 | 16300 | 0.1061        |
+| 0.3094 | 16400 | 0.0902        |
+| 0.3113 | 16500 | 0.0978        |
+| 0.3132 | 16600 | 0.0983        |
+| 0.3151 | 16700 | 0.0808        |
+| 0.3169 | 16800 | 0.0758        |
+| 0.3188 | 16900 | 0.071         |
+| 0.3207 | 17000 | 0.0918        |
+| 0.3226 | 17100 | 0.1011        |
+| 0.3245 | 17200 | 0.079         |
+| 0.3264 | 17300 | 0.0992        |
+| 0.3283 | 17400 | 0.1089        |
+| 0.3302 | 17500 | 0.0904        |
+| 0.3320 | 17600 | 0.0956        |
+| 0.3339 | 17700 | 0.0747        |
+| 0.3358 | 17800 | 0.0961        |
+| 0.3377 | 17900 | 0.0923        |
+| 0.3396 | 18000 | 0.1114        |
+| 0.3415 | 18100 | 0.0689        |
+| 0.3434 | 18200 | 0.1308        |
+| 0.3452 | 18300 | 0.0923        |
+| 0.3471 | 18400 | 0.0756        |
+| 0.3490 | 18500 | 0.0842        |
+| 0.3509 | 18600 | 0.0859        |
+| 0.3528 | 18700 | 0.0903        |
+| 0.3547 | 18800 | 0.084         |
+| 0.3566 | 18900 | 0.0923        |
+| 0.3584 | 19000 | 0.0848        |
+| 0.3603 | 19100 | 0.0812        |
+| 0.3622 | 19200 | 0.0872        |
+| 0.3641 | 19300 | 0.083         |
+| 0.3660 | 19400 | 0.0826        |
+| 0.3679 | 19500 | 0.101         |
+| 0.3698 | 19600 | 0.0804        |
+| 0.3717 | 19700 | 0.0676        |
+| 0.3735 | 19800 | 0.0836        |
+| 0.3754 | 19900 | 0.0711        |
+| 0.3773 | 20000 | 0.0825        |
+| 0.3792 | 20100 | 0.0835        |
+| 0.3811 | 20200 | 0.0816        |
+| 0.3830 | 20300 | 0.0812        |
+| 0.3849 | 20400 | 0.0689        |
+| 0.3867 | 20500 | 0.0627        |
+| 0.3886 | 20600 | 0.0965        |
+| 0.3905 | 20700 | 0.0632        |
+| 0.3924 | 20800 | 0.0945        |
+| 0.3943 | 20900 | 0.0923        |
+| 0.3962 | 21000 | 0.0833        |
+| 0.3981 | 21100 | 0.0537        |
+| 0.4000 | 21200 | 0.0822        |
+| 0.4018 | 21300 | 0.0684        |
+| 0.4037 | 21400 | 0.0807        |
+| 0.4056 | 21500 | 0.0945        |
+| 0.4075 | 21600 | 0.0981        |
+| 0.4094 | 21700 | 0.0748        |
+| 0.4113 | 21800 | 0.0943        |
+| 0.4132 | 21900 | 0.0709        |
+| 0.4150 | 22000 | 0.0551        |
+| 0.4169 | 22100 | 0.0679        |
+| 0.4188 | 22200 | 0.0666        |
+| 0.4207 | 22300 | 0.0976        |
+| 0.4226 | 22400 | 0.0666        |
+| 0.4245 | 22500 | 0.0651        |
+| 0.4264 | 22600 | 0.0803        |
+| 0.4283 | 22700 | 0.068         |
+| 0.4301 | 22800 | 0.0541        |
+| 0.4320 | 22900 | 0.0487        |
+| 0.4339 | 23000 | 0.091         |
+| 0.4358 | 23100 | 0.074         |
+| 0.4377 | 23200 | 0.0733        |
+| 0.4396 | 23300 | 0.0845        |
+| 0.4415 | 23400 | 0.0823        |
+| 0.4433 | 23500 | 0.0561        |
+| 0.4452 | 23600 | 0.0508        |
+| 0.4471 | 23700 | 0.074         |
+| 0.4490 | 23800 | 0.0683        |
+| 0.4509 | 23900 | 0.0797        |
+| 0.4528 | 24000 | 0.0561        |
+| 0.4547 | 24100 | 0.0744        |
+| 0.4566 | 24200 | 0.0638        |
+| 0.4584 | 24300 | 0.0633        |
+| 0.4603 | 24400 | 0.062         |
+| 0.4622 | 24500 | 0.0887        |
+| 0.4641 | 24600 | 0.0908        |
+| 0.4660 | 24700 | 0.0654        |
+| 0.4679 | 24800 | 0.0522        |
+| 0.4698 | 24900 | 0.0851        |
+| 0.4716 | 25000 | 0.0763        |
+| 0.4735 | 25100 | 0.0623        |
+| 0.4754 | 25200 | 0.0712        |
+| 0.4773 | 25300 | 0.0866        |
+| 0.4792 | 25400 | 0.0812        |
+| 0.4811 | 25500 | 0.0706        |
+| 0.4830 | 25600 | 0.0734        |
+| 0.4849 | 25700 | 0.068         |
+| 0.4867 | 25800 | 0.111         |
+| 0.4886 | 25900 | 0.0627        |
+| 0.4905 | 26000 | 0.0459        |
+| 0.4924 | 26100 | 0.0794        |
+| 0.4943 | 26200 | 0.0547        |
+| 0.4962 | 26300 | 0.0779        |
+| 0.4981 | 26400 | 0.0609        |
+| 0.4999 | 26500 | 0.0785        |
+| 0.5018 | 26600 | 0.0722        |
+| 0.5037 | 26700 | 0.0585        |
+| 0.5056 | 26800 | 0.0572        |
+| 0.5075 | 26900 | 0.0636        |
+| 0.5094 | 27000 | 0.0642        |
+| 0.5113 | 27100 | 0.0606        |
+| 0.5131 | 27200 | 0.0725        |
+| 0.5150 | 27300 | 0.0664        |
+| 0.5169 | 27400 | 0.0933        |
+| 0.5188 | 27500 | 0.0486        |
+| 0.5207 | 27600 | 0.0514        |
+| 0.5226 | 27700 | 0.0779        |
+| 0.5245 | 27800 | 0.0614        |
+| 0.5264 | 27900 | 0.0646        |
+| 0.5282 | 28000 | 0.0606        |
+| 0.5301 | 28100 | 0.0453        |
+| 0.5320 | 28200 | 0.0749        |
+| 0.5339 | 28300 | 0.0695        |
+| 0.5358 | 28400 | 0.0897        |
+| 0.5377 | 28500 | 0.0612        |
+| 0.5396 | 28600 | 0.0542        |
+| 0.5414 | 28700 | 0.0504        |
+| 0.5433 | 28800 | 0.0539        |
+| 0.5452 | 28900 | 0.0584        |
+| 0.5471 | 29000 | 0.0552        |
+| 0.5490 | 29100 | 0.076         |
+| 0.5509 | 29200 | 0.0861        |
+| 0.5528 | 29300 | 0.067         |
+| 0.5547 | 29400 | 0.0887        |
+| 0.5565 | 29500 | 0.059         |
+| 0.5584 | 29600 | 0.0484        |
+| 0.5603 | 29700 | 0.0703        |
+| 0.5622 | 29800 | 0.0802        |
+| 0.5641 | 29900 | 0.0805        |
+| 0.5660 | 30000 | 0.0737        |
+| 0.5679 | 30100 | 0.0518        |
+| 0.5697 | 30200 | 0.0517        |
+| 0.5716 | 30300 | 0.0806        |
+| 0.5735 | 30400 | 0.0586        |
+| 0.5754 | 30500 | 0.0491        |
+| 0.5773 | 30600 | 0.0591        |
+| 0.5792 | 30700 | 0.066         |
+| 0.5811 | 30800 | 0.0419        |
+| 0.5830 | 30900 | 0.0517        |
+| 0.5848 | 31000 | 0.0539        |
+| 0.5867 | 31100 | 0.0845        |
+| 0.5886 | 31200 | 0.044         |
+| 0.5905 | 31300 | 0.0597        |
+| 0.5924 | 31400 | 0.0556        |
+| 0.5943 | 31500 | 0.0724        |
+| 0.5962 | 31600 | 0.0465        |
+| 0.5980 | 31700 | 0.0585        |
+| 0.5999 | 31800 | 0.0978        |
+| 0.6018 | 31900 | 0.0657        |
+| 0.6037 | 32000 | 0.0438        |
+| 0.6056 | 32100 | 0.0429        |
+| 0.6075 | 32200 | 0.0629        |
+| 0.6094 | 32300 | 0.0591        |
+| 0.6113 | 32400 | 0.0543        |
+| 0.6131 | 32500 | 0.0502        |
+| 0.6150 | 32600 | 0.0733        |
+| 0.6169 | 32700 | 0.0426        |
+| 0.6188 | 32800 | 0.0626        |
+| 0.6207 | 32900 | 0.0406        |
+| 0.6226 | 33000 | 0.0524        |
+| 0.6245 | 33100 | 0.0619        |
+| 0.6263 | 33200 | 0.0633        |
+| 0.6282 | 33300 | 0.0582        |
+| 0.6301 | 33400 | 0.0852        |
+| 0.6320 | 33500 | 0.0482        |
+| 0.6339 | 33600 | 0.0509        |
+| 0.6358 | 33700 | 0.0626        |
+| 0.6377 | 33800 | 0.0609        |
+| 0.6396 | 33900 | 0.0508        |
+| 0.6414 | 34000 | 0.0486        |
+| 0.6433 | 34100 | 0.0508        |
+| 0.6452 | 34200 | 0.0581        |
+| 0.6471 | 34300 | 0.0409        |
+| 0.6490 | 34400 | 0.0703        |
+| 0.6509 | 34500 | 0.0606        |
+| 0.6528 | 34600 | 0.0517        |
+| 0.6546 | 34700 | 0.0493        |
+| 0.6565 | 34800 | 0.0271        |
+| 0.6584 | 34900 | 0.0337        |
+| 0.6603 | 35000 | 0.0369        |
+| 0.6622 | 35100 | 0.0474        |
+| 0.6641 | 35200 | 0.0562        |
+| 0.6660 | 35300 | 0.0663        |
+| 0.6678 | 35400 | 0.0419        |
+| 0.6697 | 35500 | 0.0766        |
+| 0.6716 | 35600 | 0.0439        |
+| 0.6735 | 35700 | 0.0538        |
+| 0.6754 | 35800 | 0.0512        |
+| 0.6773 | 35900 | 0.0388        |
+| 0.6792 | 36000 | 0.0528        |
+| 0.6811 | 36100 | 0.0489        |
+| 0.6829 | 36200 | 0.0454        |
+| 0.6848 | 36300 | 0.0449        |
+| 0.6867 | 36400 | 0.055         |
+| 0.6886 | 36500 | 0.0344        |
+| 0.6905 | 36600 | 0.0485        |
+| 0.6924 | 36700 | 0.0496        |
+| 0.6943 | 36800 | 0.0705        |
+| 0.6961 | 36900 | 0.0617        |
+| 0.6980 | 37000 | 0.054         |
+| 0.6999 | 37100 | 0.0613        |
+| 0.7018 | 37200 | 0.0549        |
+| 0.7037 | 37300 | 0.0378        |
+| 0.7056 | 37400 | 0.0508        |
+| 0.7075 | 37500 | 0.0613        |
+| 0.7094 | 37600 | 0.0602        |
+| 0.7112 | 37700 | 0.0592        |
+| 0.7131 | 37800 | 0.0441        |
+| 0.7150 | 37900 | 0.0445        |
+| 0.7169 | 38000 | 0.0464        |
+| 0.7188 | 38100 | 0.0537        |
+| 0.7207 | 38200 | 0.0521        |
+| 0.7226 | 38300 | 0.0447        |
+| 0.7244 | 38400 | 0.044         |
+| 0.7263 | 38500 | 0.0506        |
+| 0.7282 | 38600 | 0.043         |
+| 0.7301 | 38700 | 0.0441        |
+| 0.7320 | 38800 | 0.0444        |
+| 0.7339 | 38900 | 0.0416        |
+| 0.7358 | 39000 | 0.0556        |
+| 0.7377 | 39100 | 0.0829        |
+| 0.7395 | 39200 | 0.043         |
+| 0.7414 | 39300 | 0.0366        |
+| 0.7433 | 39400 | 0.0457        |
+| 0.7452 | 39500 | 0.0622        |
+| 0.7471 | 39600 | 0.0353        |
+| 0.7490 | 39700 | 0.0597        |
+| 0.7509 | 39800 | 0.0468        |
+| 0.7527 | 39900 | 0.0418        |
+| 0.7546 | 40000 | 0.0606        |
+| 0.7565 | 40100 | 0.0613        |
+| 0.7584 | 40200 | 0.0654        |
+| 0.7603 | 40300 | 0.046         |
+| 0.7622 | 40400 | 0.034         |
+| 0.7641 | 40500 | 0.0378        |
+| 0.7660 | 40600 | 0.0461        |
+| 0.7678 | 40700 | 0.0404        |
+| 0.7697 | 40800 | 0.0583        |
+| 0.7716 | 40900 | 0.0636        |
+| 0.7735 | 41000 | 0.0537        |
+| 0.7754 | 41100 | 0.0336        |
+| 0.7773 | 41200 | 0.0315        |
+| 0.7792 | 41300 | 0.0536        |
+| 0.7810 | 41400 | 0.0532        |
+| 0.7829 | 41500 | 0.0553        |
+| 0.7848 | 41600 | 0.0458        |
+| 0.7867 | 41700 | 0.0372        |
+| 0.7886 | 41800 | 0.0346        |
+| 0.7905 | 41900 | 0.0419        |
+| 0.7924 | 42000 | 0.0461        |
+| 0.7942 | 42100 | 0.0517        |
+| 0.7961 | 42200 | 0.0574        |
+| 0.7980 | 42300 | 0.0411        |
+| 0.7999 | 42400 | 0.0389        |
+| 0.8018 | 42500 | 0.0578        |
+| 0.8037 | 42600 | 0.0637        |
+| 0.8056 | 42700 | 0.0434        |
+| 0.8075 | 42800 | 0.0776        |
+| 0.8093 | 42900 | 0.0644        |
+| 0.8112 | 43000 | 0.0537        |
+| 0.8131 | 43100 | 0.0519        |
+| 0.8150 | 43200 | 0.0241        |
+| 0.8169 | 43300 | 0.0295        |
+| 0.8188 | 43400 | 0.0618        |
+| 0.8207 | 43500 | 0.0275        |
+| 0.8225 | 43600 | 0.0605        |
+| 0.8244 | 43700 | 0.0414        |
+| 0.8263 | 43800 | 0.0446        |
+| 0.8282 | 43900 | 0.0449        |
+| 0.8301 | 44000 | 0.0558        |
+| 0.8320 | 44100 | 0.0336        |
+| 0.8339 | 44200 | 0.0555        |
+| 0.8358 | 44300 | 0.0399        |
+| 0.8376 | 44400 | 0.0319        |
+| 0.8395 | 44500 | 0.0331        |
+| 0.8414 | 44600 | 0.0415        |
+| 0.8433 | 44700 | 0.0424        |
+| 0.8452 | 44800 | 0.0287        |
+| 0.8471 | 44900 | 0.044         |
+| 0.8490 | 45000 | 0.0375        |
+| 0.8508 | 45100 | 0.032         |
+| 0.8527 | 45200 | 0.0406        |
+| 0.8546 | 45300 | 0.0429        |
+| 0.8565 | 45400 | 0.0727        |
+| 0.8584 | 45500 | 0.05          |
+| 0.8603 | 45600 | 0.0436        |
+| 0.8622 | 45700 | 0.0401        |
+| 0.8641 | 45800 | 0.0312        |
+| 0.8659 | 45900 | 0.036         |
+| 0.8678 | 46000 | 0.0558        |
+| 0.8697 | 46100 | 0.0436        |
+| 0.8716 | 46200 | 0.0517        |
+| 0.8735 | 46300 | 0.0361        |
+| 0.8754 | 46400 | 0.038         |
+| 0.8773 | 46500 | 0.0418        |
+| 0.8791 | 46600 | 0.0407        |
+| 0.8810 | 46700 | 0.0336        |
+| 0.8829 | 46800 | 0.0559        |
+| 0.8848 | 46900 | 0.0488        |
+| 0.8867 | 47000 | 0.0463        |
+| 0.8886 | 47100 | 0.0504        |
+| 0.8905 | 47200 | 0.0414        |
+| 0.8924 | 47300 | 0.0428        |
+| 0.8942 | 47400 | 0.0389        |
+| 0.8961 | 47500 | 0.0422        |
+| 0.8980 | 47600 | 0.0533        |
+| 0.8999 | 47700 | 0.0386        |
+| 0.9018 | 47800 | 0.0672        |
+| 0.9037 | 47900 | 0.0505        |
+| 0.9056 | 48000 | 0.0632        |
+| 0.9074 | 48100 | 0.0263        |
+| 0.9093 | 48200 | 0.0448        |
+| 0.9112 | 48300 | 0.0413        |
+| 0.9131 | 48400 | 0.0532        |
+| 0.9150 | 48500 | 0.0503        |
+| 0.9169 | 48600 | 0.0472        |
+| 0.9188 | 48700 | 0.0255        |
+| 0.9207 | 48800 | 0.035         |
+| 0.9225 | 48900 | 0.0353        |
+| 0.9244 | 49000 | 0.0407        |
+| 0.9263 | 49100 | 0.0154        |
+| 0.9282 | 49200 | 0.0535        |
+| 0.9301 | 49300 | 0.0435        |
+| 0.9320 | 49400 | 0.0461        |
+| 0.9339 | 49500 | 0.0288        |
+| 0.9357 | 49600 | 0.0366        |
+| 0.9376 | 49700 | 0.0411        |
+| 0.9395 | 49800 | 0.0605        |
+| 0.9414 | 49900 | 0.0551        |
+| 0.9433 | 50000 | 0.0297        |
+| 0.9452 | 50100 | 0.0388        |
+| 0.9471 | 50200 | 0.0402        |
+| 0.9489 | 50300 | 0.0321        |
+| 0.9508 | 50400 | 0.0538        |
+| 0.9527 | 50500 | 0.036         |
+| 0.9546 | 50600 | 0.0318        |
+| 0.9565 | 50700 | 0.0398        |
+| 0.9584 | 50800 | 0.0405        |
+| 0.9603 | 50900 | 0.0408        |
+| 0.9622 | 51000 | 0.0485        |
+| 0.9640 | 51100 | 0.047         |
+| 0.9659 | 51200 | 0.0452        |
+| 0.9678 | 51300 | 0.0469        |
+| 0.9697 | 51400 | 0.0473        |
+| 0.9716 | 51500 | 0.039         |
+| 0.9735 | 51600 | 0.0579        |
+| 0.9754 | 51700 | 0.0332        |
+| 0.9772 | 51800 | 0.0322        |
+| 0.9791 | 51900 | 0.0324        |
+| 0.9810 | 52000 | 0.035         |
+| 0.9829 | 52100 | 0.0517        |
+| 0.9848 | 52200 | 0.0275        |
+| 0.9867 | 52300 | 0.0466        |
+| 0.9886 | 52400 | 0.0452        |
+| 0.9905 | 52500 | 0.0446        |
+| 0.9923 | 52600 | 0.0357        |
+| 0.9942 | 52700 | 0.0368        |
+| 0.9961 | 52800 | 0.0365        |
+| 0.9980 | 52900 | 0.0303        |
+| 0.9999 | 53000 | 0.0288        |
+</details>
+### Framework Versions
+- Python: 3.11.12
+- Sentence Transformers: 3.4.1
+- Transformers: 4.51.1
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.5.2
+- Datasets: 3.5.0
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "architectures": [
+    "NewModel"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pack_qkv": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 8.0,
+    "type": "ntk"
+  },
+  "rope_theta": 20000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.1",
+  "type_vocab_size": 1,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 250048
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.1",
+    "transformers": "4.51.1",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

configuration.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# coding=utf-8
+# Copyright 2024 The GTE Team Authors and Alibaba Group.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" NEW model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class NewConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`NewModel`] or a [`TFNewModel`]. It is used to
+    instantiate a NEW model according to the specified arguments, defining the model architecture. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the NEW
+    [izhx/new-base-en](https://huggingface.co/izhx/new-base-en) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 30522):
+            Vocabulary size of the NEW model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`NewModel`] or [`TFNewModel`].
+        hidden_size (`int`, *optional*, defaults to 768):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_hidden_layers (`int`, *optional*, defaults to 12):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 12):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        intermediate_size (`int`, *optional*, defaults to 3072):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
+        hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout ratio for the attention probabilities.
+        max_position_embeddings (`int`, *optional*, defaults to 512):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        type_vocab_size (`int`, *optional*, defaults to 2):
+            The vocabulary size of the `token_type_ids` passed when calling [`NewModel`] or [`TFNewModel`].
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
+            The epsilon used by the layer normalization layers.
+        position_embedding_type (`str`, *optional*, defaults to `"rope"`):
+            Type of position embedding. Choose one of `"absolute"`, `"rope"`.
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
+            strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
+            `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
+            `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
+            these scaling strategies behave:
+            https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
+            experimental feature, subject to breaking API changes in future versions.
+        classifier_dropout (`float`, *optional*):
+            The dropout ratio for the classification head.
+    Examples:
+    ```python
+    >>> from transformers import NewConfig, NewModel
+    >>> # Initializing a NEW izhx/new-base-en style configuration
+    >>> configuration = NewConfig()
+    >>> # Initializing a model (with random weights) from the izhx/new-base-en style configuration
+    >>> model = NewModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "new"
+    def __init__(
+        self,
+        vocab_size=30528,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.0,
+        max_position_embeddings=2048,
+        type_vocab_size=1,
+        initializer_range=0.02,
+        layer_norm_type='layer_norm',
+        layer_norm_eps=1e-12,
+        # pad_token_id=0,
+        position_embedding_type="rope",
+        rope_theta=10000.0,
+        rope_scaling=None,
+        classifier_dropout=None,
+        pack_qkv=True,
+        unpad_inputs=False,
+        use_memory_efficient_attention=False,
+        logn_attention_scale=False,
+        logn_attention_clip1=False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.initializer_range = initializer_range
+        self.layer_norm_type = layer_norm_type
+        self.layer_norm_eps = layer_norm_eps
+        self.position_embedding_type = position_embedding_type
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.classifier_dropout = classifier_dropout
+        self.pack_qkv = pack_qkv
+        self.unpad_inputs = unpad_inputs
+        self.use_memory_efficient_attention = use_memory_efficient_attention
+        self.logn_attention_scale = logn_attention_scale
+        self.logn_attention_clip1 = logn_attention_clip1

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdf748855813d79ca3904e4b67cbf5b1692effc5b0b9f98e21505d1b372d410e
+size 1221487872

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 8192,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa7a6ad87a7ce8fe196787355f6af7d03aee94d19c54a5eb1392ed18c8ef451a
+size 17082988

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": 8192,
+  "model_max_length": 8192,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "XLMRobertaTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}