mrm8488's picture
Update README.md
6802647 verified
metadata
language: []
library_name: sentence-transformers
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - dataset_size:1K<n<10K
  - loss:MatryoshkaLoss
  - loss:CoSENTLoss
base_model: intfloat/multilingual-e5-large
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
widget:
  - source_sentence: El hombre captura una pelota
    sentences:
      - Un hombre lanza una pelota en el aire.
      - Un hombre se encuentra tocando una flauta de madera.
      - La mujer está maquillándose usando sombra de ojos.
  - source_sentence: Un hombre está buscando algo.
    sentences:
      - En un mercado de granjeros, se encuentra un hombre.
      - Se acerca a la pista un avión suizo de color blanco.
      - dos chicas jóvenes se abrazan en la hierba.
  - source_sentence: El avión está tocando tierra.
    sentences:
      - El avión animado se encuentra en proceso de aterrizaje.
      - La capital de Siria fue golpeada por dos explosiones
      - Violentos incidentes afectan a estudiantes chinos en Francia
  - source_sentence: Un hombre saltando la cuerda.
    sentences:
      - Un hombre está saltando la cuerda.
      - Una mujer entrena a su perro para saltar en el aire.
      - Los gatitos están comiendo de los platos.
  - source_sentence: tres perros gruñendo entre 
    sentences:
      - Dos perros se aproximan uno al otro en el pasto.
      - Una mujer sonriente brinda cariño a un pequeño bebé.
      - Una mujer está montando a caballo en el campo.
pipeline_tag: sentence-similarity
model-index:
  - name: SentenceTransformer based on intfloat/multilingual-e5-large
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 768
          type: sts-dev-768
        metrics:
          - type: pearson_cosine
            value: 0.8279951103268512
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8342643795984531
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8228439538329566
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.834870903153992
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8231076969394738
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8349270059177344
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8196281042113861
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8248683461954115
            name: Spearman Dot
          - type: pearson_max
            value: 0.8279951103268512
            name: Pearson Max
          - type: spearman_max
            value: 0.8349270059177344
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 512
          type: sts-dev-512
        metrics:
          - type: pearson_cosine
            value: 0.8236357426336446
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8332692872015282
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8217552769156274
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8331746060276878
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8217859136681092
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8334069456110773
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8101789790612713
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8179205607773823
            name: Spearman Dot
          - type: pearson_max
            value: 0.8236357426336446
            name: Pearson Max
          - type: spearman_max
            value: 0.8334069456110773
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 256
          type: sts-dev-256
        metrics:
          - type: pearson_cosine
            value: 0.816222860848086
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8303708513421737
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8178715987143794
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8301047046554985
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8183826652089494
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8301804247624904
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7878741921967743
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7904844114269662
            name: Spearman Dot
          - type: pearson_max
            value: 0.8183826652089494
            name: Pearson Max
          - type: spearman_max
            value: 0.8303708513421737
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 128
          type: sts-dev-128
        metrics:
          - type: pearson_cosine
            value: 0.794202606017138
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8198385906414491
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8088714046889546
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8222921243120748
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8092312345267045
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8220266161646009
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7341586721030032
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7351749794310246
            name: Spearman Dot
          - type: pearson_max
            value: 0.8092312345267045
            name: Pearson Max
          - type: spearman_max
            value: 0.8222921243120748
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 64
          type: sts-dev-64
        metrics:
          - type: pearson_cosine
            value: 0.7727295051414095
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8076629783565549
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7976419723073269
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8147883308842346
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7979124462870892
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8123832197697319
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.6725844492342726
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6673162832940408
            name: Spearman Dot
          - type: pearson_max
            value: 0.7979124462870892
            name: Pearson Max
          - type: spearman_max
            value: 0.8147883308842346
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 768
          type: sts-test-768
        metrics:
          - type: pearson_cosine
            value: 0.8630482725201897
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8813284718659181
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8770818288812614
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8810971983428288
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8770132070253477
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8812162173545179
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8581811981775829
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8707402246720045
            name: Spearman Dot
          - type: pearson_max
            value: 0.8770818288812614
            name: Pearson Max
          - type: spearman_max
            value: 0.8813284718659181
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 512
          type: sts-test-512
        metrics:
          - type: pearson_cosine
            value: 0.8589909139210625
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8799604919891442
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8744468387217347
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8791142262015441
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8747974723064821
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8795698184784307
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8464185524060444
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8549652098582826
            name: Spearman Dot
          - type: pearson_max
            value: 0.8747974723064821
            name: Pearson Max
          - type: spearman_max
            value: 0.8799604919891442
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 256
          type: sts-test-256
        metrics:
          - type: pearson_cosine
            value: 0.8528262537030415
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8762917275750132
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8715060008387856
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8780718380107112
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.87251419758469
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8788770265821976
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.801980870958869
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8007112694661982
            name: Spearman Dot
          - type: pearson_max
            value: 0.87251419758469
            name: Pearson Max
          - type: spearman_max
            value: 0.8788770265821976
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 128
          type: sts-test-128
        metrics:
          - type: pearson_cosine
            value: 0.8392066286150661
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8692426944903685
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8631603748425567
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8715673768304316
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8643871758114816
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8724091426441261
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7461565194503229
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7403017354497338
            name: Spearman Dot
          - type: pearson_max
            value: 0.8643871758114816
            name: Pearson Max
          - type: spearman_max
            value: 0.8724091426441261
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 64
          type: sts-test-64
        metrics:
          - type: pearson_cosine
            value: 0.8213671607347727
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8621003145087452
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8530869243121955
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8631973638935834
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.854140567169475
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8632627342101252
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.6853599968011839
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6726454086764928
            name: Spearman Dot
          - type: pearson_max
            value: 0.854140567169475
            name: Pearson Max
          - type: spearman_max
            value: 0.8632627342101252
            name: Spearman Max

SentenceTransformer based on intfloat/multilingual-e5-large

This is a sentence-transformers model finetuned from intfloat/multilingual-e5-large on the clibrain/stsb_multi_es_aug_gpt3.5-turbo_2 dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: intfloat/multilingual-e5-large
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 1024 tokens
  • Similarity Function: Cosine Similarity
  • Training Dataset:
    • stsb_multi_es_aug

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-64-5e")
# Run inference
sentences = [
    'tres perros gruñendo entre sí',
    'Dos perros se aproximan uno al otro en el pasto.',
    'Una mujer sonriente brinda cariño a un pequeño bebé.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1024]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.828
spearman_cosine 0.8343
pearson_manhattan 0.8228
spearman_manhattan 0.8349
pearson_euclidean 0.8231
spearman_euclidean 0.8349
pearson_dot 0.8196
spearman_dot 0.8249
pearson_max 0.828
spearman_max 0.8349

Semantic Similarity

Metric Value
pearson_cosine 0.8236
spearman_cosine 0.8333
pearson_manhattan 0.8218
spearman_manhattan 0.8332
pearson_euclidean 0.8218
spearman_euclidean 0.8334
pearson_dot 0.8102
spearman_dot 0.8179
pearson_max 0.8236
spearman_max 0.8334

Semantic Similarity

Metric Value
pearson_cosine 0.8162
spearman_cosine 0.8304
pearson_manhattan 0.8179
spearman_manhattan 0.8301
pearson_euclidean 0.8184
spearman_euclidean 0.8302
pearson_dot 0.7879
spearman_dot 0.7905
pearson_max 0.8184
spearman_max 0.8304

Semantic Similarity

Metric Value
pearson_cosine 0.7942
spearman_cosine 0.8198
pearson_manhattan 0.8089
spearman_manhattan 0.8223
pearson_euclidean 0.8092
spearman_euclidean 0.822
pearson_dot 0.7342
spearman_dot 0.7352
pearson_max 0.8092
spearman_max 0.8223

Semantic Similarity

Metric Value
pearson_cosine 0.7727
spearman_cosine 0.8077
pearson_manhattan 0.7976
spearman_manhattan 0.8148
pearson_euclidean 0.7979
spearman_euclidean 0.8124
pearson_dot 0.6726
spearman_dot 0.6673
pearson_max 0.7979
spearman_max 0.8148

Semantic Similarity

Metric Value
pearson_cosine 0.863
spearman_cosine 0.8813
pearson_manhattan 0.8771
spearman_manhattan 0.8811
pearson_euclidean 0.877
spearman_euclidean 0.8812
pearson_dot 0.8582
spearman_dot 0.8707
pearson_max 0.8771
spearman_max 0.8813

Semantic Similarity

Metric Value
pearson_cosine 0.859
spearman_cosine 0.88
pearson_manhattan 0.8744
spearman_manhattan 0.8791
pearson_euclidean 0.8748
spearman_euclidean 0.8796
pearson_dot 0.8464
spearman_dot 0.855
pearson_max 0.8748
spearman_max 0.88

Semantic Similarity

Metric Value
pearson_cosine 0.8528
spearman_cosine 0.8763
pearson_manhattan 0.8715
spearman_manhattan 0.8781
pearson_euclidean 0.8725
spearman_euclidean 0.8789
pearson_dot 0.802
spearman_dot 0.8007
pearson_max 0.8725
spearman_max 0.8789

Semantic Similarity

Metric Value
pearson_cosine 0.8392
spearman_cosine 0.8692
pearson_manhattan 0.8632
spearman_manhattan 0.8716
pearson_euclidean 0.8644
spearman_euclidean 0.8724
pearson_dot 0.7462
spearman_dot 0.7403
pearson_max 0.8644
spearman_max 0.8724

Semantic Similarity

Metric Value
pearson_cosine 0.8214
spearman_cosine 0.8621
pearson_manhattan 0.8531
spearman_manhattan 0.8632
pearson_euclidean 0.8541
spearman_euclidean 0.8633
pearson_dot 0.6854
spearman_dot 0.6726
pearson_max 0.8541
spearman_max 0.8633

Training Details

Training Dataset

stsb_multi_es_aug

  • Dataset: stsb_multi_es_aug
  • Size: 2,697 training samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 8 tokens
    • mean: 22.25 tokens
    • max: 68 tokens
    • min: 8 tokens
    • mean: 22.01 tokens
    • max: 79 tokens
    • min: 0.0
    • mean: 2.67
    • max: 5.0
  • Samples:
    sentence1 sentence2 score
    El pájaro de tamaño reducido se posó con delicadeza en una rama cubierta de escarcha. Un ave de color amarillo descansaba tranquilamente en una rama. 3.200000047683716
    Una chica está tocando la flauta en un parque. Un grupo de músicos está tocando en un escenario al aire libre. 1.286
    La aclamada escritora británica, Doris Lessing, galardonada con el premio Nobel, fallece La destacada autora británica, Doris Lessing, reconocida con el prestigioso Premio Nobel, muere 4.199999809265137
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "CoSENTLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

stsb_multi_es_aug

  • Dataset: stsb_multi_es_aug
  • Size: 697 evaluation samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 8 tokens
    • mean: 22.76 tokens
    • max: 67 tokens
    • min: 7 tokens
    • mean: 22.26 tokens
    • max: 63 tokens
    • min: 0.0
    • mean: 2.3
    • max: 5.0
  • Samples:
    sentence1 sentence2 score
    Un incendio ocurrido en un hospital psiquiátrico ruso resultó en la trágica muerte de 38 personas. Se teme que el incendio en un hospital psiquiátrico ruso cause la pérdida de la vida de 38 individuos. 4.199999809265137
    "Street dijo que el otro individuo a veces se siente avergonzado de su fiesta, lo cual provoca risas en la multitud" "A veces, el otro tipo se encuentra avergonzado de su fiesta y no se le puede culpar." 3.5
    El veterano diplomático de Malasia tuvo un encuentro con Suu Kyi el miércoles en la casa del lago en Yangon donde permanece bajo arresto domiciliario. Razali Ismail tuvo una reunión de 90 minutos con Suu Kyi, quien ganó el Premio Nobel de la Paz en 1991, en su casa del lago donde está recluida. 3.691999912261963
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "CoSENTLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • num_train_epochs: 5
  • warmup_ratio: 0.1
  • fp16: True

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 5
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss loss sts-dev-128_spearman_cosine sts-dev-256_spearman_cosine sts-dev-512_spearman_cosine sts-dev-64_spearman_cosine sts-dev-768_spearman_cosine sts-test-128_spearman_cosine sts-test-256_spearman_cosine sts-test-512_spearman_cosine sts-test-64_spearman_cosine sts-test-768_spearman_cosine
0.5917 100 21.7032 21.7030 0.8030 0.8124 0.8205 0.7839 0.8215 - - - - -
1.1834 200 21.4019 24.0898 0.7839 0.7972 0.8038 0.7680 0.8062 - - - - -
1.7751 300 21.2168 22.5421 0.7909 0.8027 0.8058 0.7786 0.8068 - - - - -
2.3669 400 20.7049 23.6522 0.7938 0.8049 0.8108 0.7873 0.8123 - - - - -
2.9586 500 20.5077 23.6100 0.8017 0.8116 0.8155 0.7893 0.8185 - - - - -
3.5503 600 19.2725 24.7539 0.8133 0.8254 0.8291 0.8032 0.8314 - - - - -
4.1420 700 19.0841 26.5286 0.8210 0.8298 0.8333 0.8102 0.8333 - - - - -
4.7337 800 18.6847 26.8158 0.8198 0.8304 0.8333 0.8077 0.8343 - - - - -
5.0 845 - - - - - - - 0.8692 0.8763 0.8800 0.8621 0.8813

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.0
  • Transformers: 4.41.1
  • PyTorch: 2.3.0+cu121
  • Accelerate: 0.30.1
  • Datasets: 2.19.1
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

CoSENTLoss

@online{kexuefm-8847,
    title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
    author={Su Jianlin},
    year={2022},
    month={Jan},
    url={https://kexue.fm/archives/8847},
}