metadata

tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:412178
  - loss:MultipleNegativesRankingLoss
base_model: answerdotai/ModernBERT-base
widget:
  - source_sentence: |-
      Clip off all parts from all bounding boxes that are outside of the image.

              Returns
              -------
              imgaug.BoundingBoxesOnImage
                  Bounding boxes, clipped to fall within the image dimensions.
    sentences:
      - |-
        def model_best(y1, y2, samples=1000, progressbar=True):
            """
            Bayesian Estimation Supersedes the T-Test

            This model runs a Bayesian hypothesis comparing if y1 and y2 come
            from the same distribution. Returns are assumed to be T-distributed.

            In addition, computes annual volatility and Sharpe of in and
            out-of-sample periods.

            This model replicates the example used in:
            Kruschke, John. (2012) Bayesian estimation supersedes the t
            test. Journal of Experimental Psychology: General.

            Parameters
            ----------
            y1 : array-like
                Array of returns (e.g. in-sample)
            y2 : array-like
                Array of returns (e.g. out-of-sample)
            samples : int, optional
                Number of posterior samples to draw.

            Returns
            -------
            model : pymc.Model object
                PyMC3 model containing all random variables.
            trace : pymc3.sampling.BaseTrace object
                A PyMC3 trace object that contains samples for each parameter
                of the posterior.

            See Also
            --------
            plot_stoch_vol : plotting of tochastic volatility model
            """

            y = np.concatenate((y1, y2))

            mu_m = np.mean(y)
            mu_p = 0.000001 * 1 / np.std(y)**2

            sigma_low = np.std(y) / 1000
            sigma_high = np.std(y) * 1000
            with pm.Model() as model:
                group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p,
                                        testval=y1.mean())
                group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p,
                                        testval=y2.mean())
                group1_std = pm.Uniform('group1_std', lower=sigma_low,
                                        upper=sigma_high, testval=y1.std())
                group2_std = pm.Uniform('group2_std', lower=sigma_low,
                                        upper=sigma_high, testval=y2.std())
                nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.

                returns_group1 = pm.StudentT('group1', nu=nu, mu=group1_mean,
                                             lam=group1_std**-2, observed=y1)
                returns_group2 = pm.StudentT('group2', nu=nu, mu=group2_mean,
                                             lam=group2_std**-2, observed=y2)

                diff_of_means = pm.Deterministic('difference of means',
                                                 group2_mean - group1_mean)
                pm.Deterministic('difference of stds',
                                 group2_std - group1_std)
                pm.Deterministic('effect size', diff_of_means /
                                 pm.math.sqrt((group1_std**2 +
                                               group2_std**2) / 2))

                pm.Deterministic('group1_annual_volatility',
                                 returns_group1.distribution.variance**.5 *
                                 np.sqrt(252))
                pm.Deterministic('group2_annual_volatility',
                                 returns_group2.distribution.variance**.5 *
                                 np.sqrt(252))

                pm.Deterministic('group1_sharpe', returns_group1.distribution.mean /
                                 returns_group1.distribution.variance**.5 *
                                 np.sqrt(252))
                pm.Deterministic('group2_sharpe', returns_group2.distribution.mean /
                                 returns_group2.distribution.variance**.5 *
                                 np.sqrt(252))

                trace = pm.sample(samples, progressbar=progressbar)
            return model, trace
      - |-
        def clip_out_of_image(self):
                """
                Clip off all parts from all bounding boxes that are outside of the image.

                Returns
                -------
                imgaug.BoundingBoxesOnImage
                    Bounding boxes, clipped to fall within the image dimensions.

                """
                bbs_cut = [bb.clip_out_of_image(self.shape)
                           for bb in self.bounding_boxes if bb.is_partly_within_image(self.shape)]
                return BoundingBoxesOnImage(bbs_cut, shape=self.shape)
      - |-
        def _initPermanence(self, potential, connectedPct):
            """
            Initializes the permanences of a column. The method
            returns a 1-D array the size of the input, where each entry in the
            array represents the initial permanence value between the input bit
            at the particular index in the array, and the column represented by
            the 'index' parameter.

            Parameters:
            ----------------------------
            :param potential: A numpy array specifying the potential pool of the column.
                            Permanence values will only be generated for input bits
                            corresponding to indices for which the mask value is 1.
            :param connectedPct: A value between 0 or 1 governing the chance, for each
                                 permanence, that the initial permanence value will
                                 be a value that is considered connected.
            """
            # Determine which inputs bits will start out as connected
            # to the inputs. Initially a subset of the input bits in a
            # column's potential pool will be connected. This number is
            # given by the parameter "connectedPct"
            perm = numpy.zeros(self._numInputs, dtype=realDType)
            for i in xrange(self._numInputs):
              if (potential[i] < 1):
                continue

              if (self._random.getReal64() <= connectedPct):
                perm[i] = self._initPermConnected()
              else:
                perm[i] = self._initPermNonConnected()

            # Clip off low values. Since we use a sparse representation
            # to store the permanence values this helps reduce memory
            # requirements.
            perm[perm < self._synPermTrimThreshold] = 0

            return perm
  - source_sentence: |-
      Perform a weighted average over dicts that are each on a different node
          Input: local_name2valcount: dict mapping key -> (value, count)
          Returns: key -> mean
    sentences:
      - >-
        def MotionBlur(k=5, angle=(0, 360), direction=(-1.0, 1.0), order=1,
        name=None, deterministic=False, random_state=None):
            """
            Augmenter that sharpens images and overlays the result with the original image.

            dtype support::

                See ``imgaug.augmenters.convolutional.Convolve``.

            Parameters
            ----------
            k : int or tuple of int or list of int or imgaug.parameters.StochasticParameter, optional
                Kernel size to use.

                    * If a single int, then that value will be used for the height
                      and width of the kernel.
                    * If a tuple of two ints ``(a, b)``, then the kernel size will be
                      sampled from the interval ``[a..b]``.
                    * If a list, then a random value will be sampled from that list per image.
                    * If a StochasticParameter, then ``N`` samples will be drawn from
                      that parameter per ``N`` input images, each representing the kernel
                      size for the nth image.

            angle : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional
                Angle of the motion blur in degrees (clockwise, relative to top center direction).

                    * If a number, exactly that value will be used.
                    * If a tuple ``(a, b)``, a random value from the range ``a <= x <= b`` will
                      be sampled per image.
                    * If a list, then a random value will be sampled from that list per image.
                    * If a StochasticParameter, a value will be sampled from the
                      parameter per image.

            direction : number or tuple of number or list of number or imgaug.parameters.StochasticParameter, optional
                Forward/backward direction of the motion blur. Lower values towards -1.0 will point the motion blur towards
                the back (with angle provided via `angle`). Higher values towards 1.0 will point the motion blur forward.
                A value of 0.0 leads to a uniformly (but still angled) motion blur.

                    * If a number, exactly that value will be used.
                    * If a tuple ``(a, b)``, a random value from the range ``a <= x <= b`` will
                      be sampled per image.
                    * If a list, then a random value will be sampled from that list per image.
                    * If a StochasticParameter, a value will be sampled from the
                      parameter per image.

            order : int or iterable of int or imgaug.ALL or imgaug.parameters.StochasticParameter, optional
                Interpolation order to use when rotating the kernel according to `angle`.
                See :func:`imgaug.augmenters.geometric.Affine.__init__`.
                Recommended to be ``0`` or ``1``, with ``0`` being faster, but less continuous/smooth as `angle` is changed,
                particularly around multiple of 45 degrees.

            name : None or str, optional
                See :func:`imgaug.augmenters.meta.Augmenter.__init__`.

            deterministic : bool, optional
                See :func:`imgaug.augmenters.meta.Augmenter.__init__`.

            random_state : None or int or numpy.random.RandomState, optional
                See :func:`imgaug.augmenters.meta.Augmenter.__init__`.

            Examples
            --------
            >>> aug = iaa.MotionBlur(k=15)

            Create a motion blur augmenter with kernel size of 15x15.

            >>> aug = iaa.MotionBlur(k=15, angle=[-45, 45])

            Create a motion blur augmenter with kernel size of 15x15 and a blur angle of either -45 or 45 degrees (randomly
            picked per image).

            """
            # TODO allow (1, None) and set to identity matrix if k == 1
            k_param = iap.handle_discrete_param(k, "k", value_range=(3, None), tuple_to_uniform=True, list_to_choice=True,
                                                allow_floats=False)
            angle_param = iap.handle_continuous_param(angle, "angle", value_range=None, tuple_to_uniform=True,
                                                      list_to_choice=True)
            direction_param = iap.handle_continuous_param(direction, "direction", value_range=(-1.0-1e-6, 1.0+1e-6),
                                                          tuple_to_uniform=True, list_to_choice=True)

            def create_matrices(image, nb_channels, random_state_func):
                # avoid cyclic import between blur and geometric
                from . import geometric as iaa_geometric

                # force discrete for k_sample via int() in case of stochastic parameter
                k_sample = int(k_param.draw_sample(random_state=random_state_func))
                angle_sample = angle_param.draw_sample(random_state=random_state_func)
                direction_sample = direction_param.draw_sample(random_state=random_state_func)

                k_sample = k_sample if k_sample % 2 != 0 else k_sample + 1
                direction_sample = np.clip(direction_sample, -1.0, 1.0)
                direction_sample = (direction_sample + 1.0) / 2.0

                matrix = np.zeros((k_sample, k_sample), dtype=np.float32)
                matrix[:, k_sample//2] = np.linspace(float(direction_sample), 1.0 - float(direction_sample), num=k_sample)
                rot = iaa_geometric.Affine(rotate=angle_sample, order=order)
                matrix = (rot.augment_image((matrix * 255).astype(np.uint8)) / 255.0).astype(np.float32)

                return [matrix/np.sum(matrix)] * nb_channels

            if name is None:
                name = "Unnamed%s" % (ia.caller_name(),)

            return iaa_convolutional.Convolve(create_matrices, name=name, deterministic=deterministic,
                                              random_state=random_state)
      - |-
        def rolling_sharpe(returns, rolling_sharpe_window):
            """
            Determines the rolling Sharpe ratio of a strategy.

            Parameters
            ----------
            returns : pd.Series
                Daily returns of the strategy, noncumulative.
                 - See full explanation in tears.create_full_tear_sheet.
            rolling_sharpe_window : int
                Length of rolling window, in days, over which to compute.

            Returns
            -------
            pd.Series
                Rolling Sharpe ratio.

            Note
            -----
            See https://en.wikipedia.org/wiki/Sharpe_ratio for more details.
            """

            return returns.rolling(rolling_sharpe_window).mean() \
                / returns.rolling(rolling_sharpe_window).std() \
                * np.sqrt(APPROX_BDAYS_PER_YEAR)
      - |-
        def mpi_weighted_mean(comm, local_name2valcount):
            """
            Perform a weighted average over dicts that are each on a different node
            Input: local_name2valcount: dict mapping key -> (value, count)
            Returns: key -> mean
            """
            all_name2valcount = comm.gather(local_name2valcount)
            if comm.rank == 0:
                name2sum = defaultdict(float)
                name2count = defaultdict(float)
                for n2vc in all_name2valcount:
                    for (name, (val, count)) in n2vc.items():
                        try:
                            val = float(val)
                        except ValueError:
                            if comm.rank == 0:
                                warnings.warn('WARNING: tried to compute mean on non-float {}={}'.format(name, val))
                        else:
                            name2sum[name] += val * count
                            name2count[name] += count
                return {name : name2sum[name] / name2count[name] for name in name2sum}
            else:
                return {}
  - source_sentence: |-
      Generate and return the following encoder related substitution variables:

        encoderSpecsStr:
          For the base description file, this string defines the default
          encoding dicts for each encoder. For example:
               '__gym_encoder' : {   'fieldname': 'gym',
                'n': 13,
                'name': 'gym',
                'type': 'SDRCategoryEncoder',
                'w': 7},
              '__address_encoder' : {   'fieldname': 'address',
                'n': 13,
                'name': 'address',
                'type': 'SDRCategoryEncoder',
                'w': 7}

        encoderSchemaStr:
          For the base description file, this is a list containing a
          DeferredDictLookup entry for each encoder. For example:
              [DeferredDictLookup('__gym_encoder'),
               DeferredDictLookup('__address_encoder'),
               DeferredDictLookup('__timestamp_timeOfDay_encoder'),
               DeferredDictLookup('__timestamp_dayOfWeek_encoder'),
               DeferredDictLookup('__consumption_encoder')],

        permEncoderChoicesStr:
          For the permutations file, this defines the possible
          encoder dicts for each encoder. For example:
              '__timestamp_dayOfWeek_encoder': [
                           None,
                           {'fieldname':'timestamp',
                            'name': 'timestamp_timeOfDay',
                            'type':'DateEncoder'
                            'dayOfWeek': (7,1)
                            },
                           {'fieldname':'timestamp',
                            'name': 'timestamp_timeOfDay',
                            'type':'DateEncoder'
                            'dayOfWeek': (7,3)
                            },
                        ],

              '__field_consumption_encoder': [
                          None,
                          {'fieldname':'consumption',
                           'name': 'consumption',
                           'type':'AdaptiveScalarEncoder',
                           'n': 13,
                           'w': 7,
                            }
                         ]



        Parameters:
        --------------------------------------------------
        includedFields:  item from the 'includedFields' section of the
                          description JSON object. This is a list of dicts, each
                          dict defining the field name, type, and optional min
                          and max values.

        retval:  (encoderSpecsStr, encoderSchemaStr permEncoderChoicesStr)
    sentences:
      - |-
        def _generateEncoderStringsV1(includedFields):
          """ Generate and return the following encoder related substitution variables:

          encoderSpecsStr:
            For the base description file, this string defines the default
            encoding dicts for each encoder. For example:
                 '__gym_encoder' : {   'fieldname': 'gym',
                  'n': 13,
                  'name': 'gym',
                  'type': 'SDRCategoryEncoder',
                  'w': 7},
                '__address_encoder' : {   'fieldname': 'address',
                  'n': 13,
                  'name': 'address',
                  'type': 'SDRCategoryEncoder',
                  'w': 7}

          encoderSchemaStr:
            For the base description file, this is a list containing a
            DeferredDictLookup entry for each encoder. For example:
                [DeferredDictLookup('__gym_encoder'),
                 DeferredDictLookup('__address_encoder'),
                 DeferredDictLookup('__timestamp_timeOfDay_encoder'),
                 DeferredDictLookup('__timestamp_dayOfWeek_encoder'),
                 DeferredDictLookup('__consumption_encoder')],

          permEncoderChoicesStr:
            For the permutations file, this defines the possible
            encoder dicts for each encoder. For example:
                '__timestamp_dayOfWeek_encoder': [
                             None,
                             {'fieldname':'timestamp',
                              'name': 'timestamp_timeOfDay',
                              'type':'DateEncoder'
                              'dayOfWeek': (7,1)
                              },
                             {'fieldname':'timestamp',
                              'name': 'timestamp_timeOfDay',
                              'type':'DateEncoder'
                              'dayOfWeek': (7,3)
                              },
                          ],

                '__field_consumption_encoder': [
                            None,
                            {'fieldname':'consumption',
                             'name': 'consumption',
                             'type':'AdaptiveScalarEncoder',
                             'n': 13,
                             'w': 7,
                              }
                           ]



          Parameters:
          --------------------------------------------------
          includedFields:  item from the 'includedFields' section of the
                            description JSON object. This is a list of dicts, each
                            dict defining the field name, type, and optional min
                            and max values.

          retval:  (encoderSpecsStr, encoderSchemaStr permEncoderChoicesStr)


          """

          # ------------------------------------------------------------------------
          # First accumulate the possible choices for each encoder
          encoderChoicesList = []
          for fieldInfo in includedFields:

            fieldName = fieldInfo['fieldName']

            # Get the list of encoder choices for this field
            (choicesList, aggFunction) = _generateEncoderChoicesV1(fieldInfo)
            encoderChoicesList.extend(choicesList)


          # ------------------------------------------------------------------------
          # Generate the string containing the encoder specs and encoder schema. See
          #  the function comments for an example of the encoderSpecsStr and
          #  encoderSchemaStr
          #
          encoderSpecsList = []
          for encoderChoices in encoderChoicesList:
            # Use the last choice as the default in the base file because the 1st is
            # often None
            encoder = encoderChoices[-1]

            # Check for bad characters
            for c in _ILLEGAL_FIELDNAME_CHARACTERS:
              if encoder['name'].find(c) >= 0:
                raise _ExpGeneratorException("Illegal character in field: %r (%r)" % (
                  c, encoder['name']))

            encoderSpecsList.append("%s: \n%s%s" % (
                _quoteAndEscape(encoder['name']),
                2*_ONE_INDENT,
                pprint.pformat(encoder, indent=2*_INDENT_STEP)))

          encoderSpecsStr = ',\n  '.join(encoderSpecsList)


          # ------------------------------------------------------------------------
          # Generate the string containing the permutation encoder choices. See the
          #  function comments above for an example of the permEncoderChoicesStr

          permEncoderChoicesList = []
          for encoderChoices in encoderChoicesList:
            permEncoderChoicesList.append("%s: %s," % (
                _quoteAndEscape(encoderChoices[-1]['name']),
                pprint.pformat(encoderChoices, indent=2*_INDENT_STEP)))
          permEncoderChoicesStr = '\n'.join(permEncoderChoicesList)
          permEncoderChoicesStr = _indentLines(permEncoderChoicesStr, 1,
                                               indentFirstLine=False)

          # Return results
          return (encoderSpecsStr, permEncoderChoicesStr)
      - |-
        def shift(self, top=None, right=None, bottom=None, left=None):
                """
                Shift/move the line strings from one or more image sides.

                Parameters
                ----------
                top : None or int, optional
                    Amount of pixels by which to shift all bounding boxes from the
                    top.

                right : None or int, optional
                    Amount of pixels by which to shift all bounding boxes from the
                    right.

                bottom : None or int, optional
                    Amount of pixels by which to shift all bounding boxes from the
                    bottom.

                left : None or int, optional
                    Amount of pixels by which to shift all bounding boxes from the
                    left.

                Returns
                -------
                imgaug.augmentables.lines.LineStringsOnImage
                    Shifted line strings.

                """
                lss_new = [ls.shift(top=top, right=right, bottom=bottom, left=left)
                           for ls in self.line_strings]
                return LineStringsOnImage(lss_new, shape=self.shape)
      - |-
        def cross_entropy_reward_loss(logits, actions, rewards, name=None):
            """Calculate the loss for Policy Gradient Network.

            Parameters
            ----------
            logits : tensor
                The network outputs without softmax. This function implements softmax inside.
            actions : tensor or placeholder
                The agent actions.
            rewards : tensor or placeholder
                The rewards.

            Returns
            --------
            Tensor
                The TensorFlow loss function.

            Examples
            ----------
            >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])
            >>> network = InputLayer(states_batch_pl, name='input')
            >>> network = DenseLayer(network, n_units=H, act=tf.nn.relu, name='relu1')
            >>> network = DenseLayer(network, n_units=3, name='out')
            >>> probs = network.outputs
            >>> sampling_prob = tf.nn.softmax(probs)
            >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
            >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
            >>> loss = tl.rein.cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
            >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)

            """
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name)

            return tf.reduce_sum(tf.multiply(cross_entropy, rewards))
  - source_sentence: |-
      Translate an index into coordinates, using the given coordinate system.

        Similar to ``numpy.unravel_index``.

        :param index: (int) The index of the point. The coordinates are expressed as a 
               single index by using the dimensions as a mixed radix definition. For 
               example, in dimensions 42x10, the point [1, 4] is index 
               1*420 + 4*10 = 460.

        :param dimensions (list of ints) The coordinate system.

        :returns: (list) of coordinates of length ``len(dimensions)``.
    sentences:
      - |-
        def coordinatesFromIndex(index, dimensions):
          """
          Translate an index into coordinates, using the given coordinate system.

          Similar to ``numpy.unravel_index``.

          :param index: (int) The index of the point. The coordinates are expressed as a 
                 single index by using the dimensions as a mixed radix definition. For 
                 example, in dimensions 42x10, the point [1, 4] is index 
                 1*420 + 4*10 = 460.

          :param dimensions (list of ints) The coordinate system.

          :returns: (list) of coordinates of length ``len(dimensions)``.
          """
          coordinates = [0] * len(dimensions)

          shifted = index
          for i in xrange(len(dimensions) - 1, 0, -1):
            coordinates[i] = shifted % dimensions[i]
            shifted = shifted / dimensions[i]

          coordinates[0] = shifted

          return coordinates
      - |-
        def step(self, observation, **extra_feed):
                """
                Compute next action(s) given the observation(s)

                Parameters:
                ----------

                observation     observation data (either single or a batch)

                **extra_feed    additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__)

                Returns:
                -------
                (action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple
                """

                a, v, state, neglogp = self._evaluate([self.action, self.vf, self.state, self.neglogp], observation, **extra_feed)
                if state.size == 0:
                    state = None
                return a, v, state, neglogp
      - |-
        def pretty_eta(seconds_left):
            """Print the number of seconds in human readable format.

            Examples:
            2 days
            2 hours and 37 minutes
            less than a minute

            Paramters
            ---------
            seconds_left: int
                Number of seconds to be converted to the ETA
            Returns
            -------
            eta: str
                String representing the pretty ETA.
            """
            minutes_left = seconds_left // 60
            seconds_left %= 60
            hours_left = minutes_left // 60
            minutes_left %= 60
            days_left = hours_left // 24
            hours_left %= 24

            def helper(cnt, name):
                return "{} {}{}".format(str(cnt), name, ('s' if cnt > 1 else ''))

            if days_left > 0:
                msg = helper(days_left, 'day')
                if hours_left > 0:
                    msg += ' and ' + helper(hours_left, 'hour')
                return msg
            if hours_left > 0:
                msg = helper(hours_left, 'hour')
                if minutes_left > 0:
                    msg += ' and ' + helper(minutes_left, 'minute')
                return msg
            if minutes_left > 0:
                return helper(minutes_left, 'minute')
            return 'less than a minute'
  - source_sentence: Validates control dictionary for the experiment context
    sentences:
      - >-
        def load_file_list(path=None, regx='\.jpg', printable=True,
        keep_prefix=False):
            r"""Return a file list in a folder by given a path and regular expression.

            Parameters
            ----------
            path : str or None
                A folder path, if `None`, use the current directory.
            regx : str
                The regx of file name.
            printable : boolean
                Whether to print the files infomation.
            keep_prefix : boolean
                Whether to keep path in the file name.

            Examples
            ----------
            >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)')

            """
            if path is None:
                path = os.getcwd()
            file_list = os.listdir(path)
            return_list = []
            for _, f in enumerate(file_list):
                if re.search(regx, f):
                    return_list.append(f)
            # return_list.sort()
            if keep_prefix:
                for i, f in enumerate(return_list):
                    return_list[i] = os.path.join(path, f)

            if printable:
                logging.info('Match file list = %s' % return_list)
                logging.info('Number of files = %d' % len(return_list))
            return return_list
      - |-
        def getCompletingSwarms(self):
            """Return the list of all completing swarms.

            Parameters:
            ---------------------------------------------------------------------
            retval:   list of active swarm Ids
            """
            swarmIds = []
            for swarmId, info in self._state['swarms'].iteritems():
              if info['status'] == 'completing':
                swarmIds.append(swarmId)

            return swarmIds
      - |-
        def __validateExperimentControl(self, control):
            """ Validates control dictionary for the experiment context"""
            # Validate task list
            taskList = control.get('tasks', None)
            if taskList is not None:
              taskLabelsList = []

              for task in taskList:
                validateOpfJsonValue(task, "opfTaskSchema.json")
                validateOpfJsonValue(task['taskControl'], "opfTaskControlSchema.json")

                taskLabel = task['taskLabel']

                assert isinstance(taskLabel, types.StringTypes), \
                       "taskLabel type: %r" % type(taskLabel)
                assert len(taskLabel) > 0, "empty string taskLabel not is allowed"

                taskLabelsList.append(taskLabel.lower())

              taskLabelDuplicates = filter(lambda x: taskLabelsList.count(x) > 1,
                                           taskLabelsList)
              assert len(taskLabelDuplicates) == 0, \
                     "Duplcate task labels are not allowed: %s" % taskLabelDuplicates

            return
pipeline_tag: sentence-similarity
library_name: sentence-transformers

SentenceTransformer based on answerdotai/ModernBERT-base

This is a sentence-transformers model finetuned from answerdotai/ModernBERT-base on the code_search_net dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Type: Sentence Transformer
Base model: answerdotai/ModernBERT-base
Maximum Sequence Length: 4096 tokens
Output Dimensionality: 768 dimensions
Similarity Function: Cosine Similarity
Training Dataset:
- code_search_net

Model Sources

Documentation: Sentence Transformers Documentation
Repository: Sentence Transformers on GitHub
Hugging Face: Sentence Transformers on Hugging Face

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 4096, 'do_lower_case': False}) with Transformer model: ModernBertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("juanwisz/modernbert-python-code-retrieval")
# Run inference
sentences = [
    'Validates control dictionary for the experiment context',
    'def __validateExperimentControl(self, control):\n    """ Validates control dictionary for the experiment context"""\n    # Validate task list\n    taskList = control.get(\'tasks\', None)\n    if taskList is not None:\n      taskLabelsList = []\n\n      for task in taskList:\n        validateOpfJsonValue(task, "opfTaskSchema.json")\n        validateOpfJsonValue(task[\'taskControl\'], "opfTaskControlSchema.json")\n\n        taskLabel = task[\'taskLabel\']\n\n        assert isinstance(taskLabel, types.StringTypes), \\\n               "taskLabel type: %r" % type(taskLabel)\n        assert len(taskLabel) > 0, "empty string taskLabel not is allowed"\n\n        taskLabelsList.append(taskLabel.lower())\n\n      taskLabelDuplicates = filter(lambda x: taskLabelsList.count(x) > 1,\n                                   taskLabelsList)\n      assert len(taskLabelDuplicates) == 0, \\\n             "Duplcate task labels are not allowed: %s" % taskLabelDuplicates\n\n    return',
    'def load_file_list(path=None, regx=\'\\.jpg\', printable=True, keep_prefix=False):\n    r"""Return a file list in a folder by given a path and regular expression.\n\n    Parameters\n    ----------\n    path : str or None\n        A folder path, if `None`, use the current directory.\n    regx : str\n        The regx of file name.\n    printable : boolean\n        Whether to print the files infomation.\n    keep_prefix : boolean\n        Whether to keep path in the file name.\n\n    Examples\n    ----------\n    >>> file_list = tl.files.load_file_list(path=None, regx=\'w1pre_[0-9]+\\.(npz)\')\n\n    """\n    if path is None:\n        path = os.getcwd()\n    file_list = os.listdir(path)\n    return_list = []\n    for _, f in enumerate(file_list):\n        if re.search(regx, f):\n            return_list.append(f)\n    # return_list.sort()\n    if keep_prefix:\n        for i, f in enumerate(return_list):\n            return_list[i] = os.path.join(path, f)\n\n    if printable:\n        logging.info(\'Match file list = %s\' % return_list)\n        logging.info(\'Number of files = %d\' % len(return_list))\n    return return_list',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Training Details

Training Dataset

code_search_net

Dataset: code_search_net
Size: 412,178 training samples
Columns: query and positive
Approximate statistics based on the first 1000 samples:
query positive
type string string
details
min: 4 tokens
mean: 73.72 tokens
max: 2258 tokens

min: 46 tokens
mean: 300.87 tokens
max: 3119 tokens

	query	positive
type	string	string
details	min: 4 tokens mean: 73.72 tokens max: 2258 tokens	min: 46 tokens mean: 300.87 tokens max: 3119 tokens

Samples:

query	positive
`Extracts the list of arguments that start with any of the specified prefix values`	`def findArgs(args, prefixes): """ Extracts the list of arguments that start with any of the specified prefix values """ return list([ arg for arg in args if len([p for p in prefixes if arg.lower().startswith(p.lower())]) > 0 ])`
`Removes any arguments in the supplied list that are contained in the specified blacklist`	`def stripArgs(args, blacklist): """ Removes any arguments in the supplied list that are contained in the specified blacklist """ blacklist = [b.lower() for b in blacklist] return list([arg for arg in args if arg.lower() not in blacklist])`
`Executes a child process and captures its output`	def capture(command, input=None, cwd=None, shell=False, raiseOnError=False): """ Executes a child process and captures its output """ # Attempt to execute the child process proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, shell=shell, universal_newlines=True) (stdout, stderr) = proc.communicate(input) # If the child process failed and we were asked to raise an exception, do so if raiseOnError == True and proc.returncode != 0: raise Exception( 'child process ' + str(command) + ' failed with exit code ' + str(proc.returncode) + '\nstdout: "' + stdout + '"' + '\nstderr: "' + stderr + '"' ) return CommandOutput(proc.returncode, stdout, stderr)

Loss: MultipleNegativesRankingLoss with these parameters:

{
    "scale": 20.0,
    "similarity_fct": "cos_sim"
}

Evaluation Dataset

code_search_net

Dataset: code_search_net
Size: 23,107 evaluation samples
Columns: query and positive
Approximate statistics based on the first 1000 samples:
query positive
type string string
details
min: 5 tokens
mean: 168.27 tokens
max: 2118 tokens

min: 48 tokens
mean: 467.9 tokens
max: 4096 tokens

	query	positive
type	string	string
details	min: 5 tokens mean: 168.27 tokens max: 2118 tokens	min: 48 tokens mean: 467.9 tokens max: 4096 tokens

Samples:

query	positive
Train a deepq model. Parameters ------- env: gym.Env environment to train on network: string or a function neural network to use as a q function approximator. If string, has to be one of the names of registered models in baselines.common.models (mlp, cnn, conv_only). If a function, should take an observation tensor and return a latent variable tensor, which will be mapped to the Q function heads (see build_q_func in baselines.deepq.models for details on that) seed: int or None prng seed. The runs with the same seed "should" give the same results. If None, no seeding is used. lr: float learning rate for adam optimizer total_timesteps: int number of env steps to optimizer for buffer_size: int size of the replay buffer exploration_fraction: float fraction of entire training period over which the exploration rate is annealed exploration_final_eps: float final value of ra...	def learn(env, network, seed=None, lr=5e-4, total_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, train_freq=1, batch_size=32, print_freq=100, checkpoint_freq=10000, checkpoint_path=None, learning_starts=1000, gamma=1.0, target_network_update_freq=500, prioritized_replay=False, prioritized_replay_alpha=0.6, prioritized_replay_beta0=0.4, prioritized_replay_beta_iters=None, prioritized_replay_eps=1e-6, param_noise=False, callback=None, load_path=None, **network_kwargs ): """Train a deepq model. Parameters ------- env: gym.Env environment to train on network: string or a function neural network to use as a q function approximator. If string, has to be one of the ...
`Save model to a pickle located at path`	def save_act(self, path=None): """Save model to a pickle located at path""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_variables(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
`CNN from Nature paper.`	def nature_cnn(unscaled_images, conv_kwargs): """ CNN from Nature paper. """ scaled_images = tf.cast(unscaled_images, tf.float32) / 255. activ = tf.nn.relu h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), conv_kwargs)) h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), conv_kwargs)) h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), conv_kwargs)) h3 = conv_to_fc(h3) return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))

Loss: MultipleNegativesRankingLoss with these parameters:

{
    "scale": 20.0,
    "similarity_fct": "cos_sim"
}

Training Hyperparameters

Non-Default Hyperparameters

eval_strategy: epoch
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
learning_rate: 2e-05
num_train_epochs: 10
warmup_steps: 1000
fp16: True

All Hyperparameters

Click to expand

overwrite_output_dir: False
do_predict: False
eval_strategy: epoch
prediction_loss_only: True
per_device_train_batch_size: 4
per_device_eval_batch_size: 8
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 4
eval_accumulation_steps: None
torch_empty_cache_steps: None
learning_rate: 2e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 10
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.0
warmup_steps: 1000
log_level: passive
log_level_replica: warning
log_on_each_node: True
logging_nan_inf_filter: True
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_checkpoint: False
no_cuda: False
use_cpu: False
use_mps_device: False
seed: 42
data_seed: None
jit_mode_eval: False
use_ipex: False
bf16: False
fp16: True
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: False
fp16_full_eval: False
tf32: None
local_rank: 0
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: False
debug: []
dataloader_drop_last: False
dataloader_num_workers: 0
dataloader_prefetch_factor: None
past_index: -1
disable_tqdm: False
remove_unused_columns: True
label_names: None
load_best_model_at_end: False
ignore_data_skip: False
fsdp: []
fsdp_min_num_params: 0
fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
deepspeed: None
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: None
adafactor: False
group_by_length: False
length_column_name: length
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: False
dataloader_pin_memory: True
dataloader_persistent_workers: False
skip_memory_metrics: True
use_legacy_prediction_loop: False
push_to_hub: False
resume_from_checkpoint: None
hub_model_id: None
hub_strategy: every_save
hub_private_repo: None
hub_always_push: False
gradient_checkpointing: False
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: False
include_for_metrics: []
eval_do_concat_batches: True
fp16_backend: auto
push_to_hub_model_id: None
push_to_hub_organization: None
mp_parameters:
auto_find_batch_size: False
full_determinism: False
torchdynamo: None
ray_scope: last
ddp_timeout: 1800
torch_compile: False
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: False
include_num_input_tokens_seen: False
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: False
eval_on_start: False
use_liger_kernel: False
eval_use_gather_object: False
average_tokens_across_devices: False
prompts: None
batch_sampler: batch_sampler
multi_dataset_batch_sampler: proportional

Training Logs

Click to expand

Epoch	Step	Training Loss	Validation Loss
0.0078	200	0.634	-
0.0155	400	0.0046	-
0.0233	600	0.0009	-
0.0311	800	0.0004	-
0.0388	1000	0.0001	-
0.0466	1200	0.0002	-
0.0543	1400	0.0001	-
0.0621	1600	0.0001	-
0.0699	1800	0.0001	-
0.0776	2000	0.0	-
0.0854	2200	0.0	-
0.0932	2400	0.0	-
0.1009	2600	0.0	-
0.1087	2800	0.0005	-
0.1165	3000	0.0005	-
0.1242	3200	0.0002	-
0.1320	3400	0.0	-
0.1397	3600	0.0	-
0.1475	3800	0.0	-
0.1553	4000	0.0001	-
0.1630	4200	0.0	-
0.1708	4400	0.0001	-
0.1786	4600	0.0001	-
0.1863	4800	0.0	-
0.1941	5000	0.0	-
0.2019	5200	0.0	-
0.2096	5400	0.0	-
0.2174	5600	0.0	-
0.2251	5800	0.0	-
0.2329	6000	0.0004	-
0.2407	6200	0.0	-
0.2484	6400	0.0001	-
0.2562	6600	0.0	-
0.2640	6800	0.0	-
0.2717	7000	0.0	-
0.2795	7200	0.0	-
0.2873	7400	0.0	-
0.2950	7600	0.0	-
0.3028	7800	0.0	-
0.3105	8000	0.0	-
0.3183	8200	0.0	-
0.3261	8400	0.0004	-
0.3338	8600	0.0	-
0.3416	8800	0.0	-
0.3494	9000	0.0	-
0.3571	9200	0.0	-
0.3649	9400	0.0	-
0.3727	9600	0.0	-
0.3804	9800	0.0	-
0.3882	10000	0.0	-
0.3959	10200	0.0	-
0.4037	10400	0.0	-
0.4115	10600	0.0	-
0.4192	10800	0.0	-
0.4270	11000	0.0	-
0.4348	11200	0.0	-
0.4425	11400	0.0	-
0.4503	11600	0.0	-
0.4581	11800	0.0	-
0.4658	12000	0.0	-
0.4736	12200	0.0	-
0.4813	12400	0.0	-
0.4891	12600	0.0005	-
0.4969	12800	0.0	-
0.5046	13000	0.0	-
0.5124	13200	0.0001	-
0.5202	13400	0.0	-
0.5279	13600	0.0	-
0.5357	13800	0.0	-
0.5435	14000	0.0	-
0.5512	14200	0.0	-
0.5590	14400	0.0004	-
0.5667	14600	0.0	-
0.5745	14800	0.0	-
0.5823	15000	0.0	-
0.5900	15200	0.0	-
0.5978	15400	0.0	-
0.6056	15600	0.0	-
0.6133	15800	0.0	-
0.6211	16000	0.0	-
0.6289	16200	0.0	-
0.6366	16400	0.0006	-
0.6444	16600	0.0	-
0.6521	16800	0.0005	-
0.6599	17000	0.0	-
0.6677	17200	0.0	-
0.6754	17400	0.0	-
0.6832	17600	0.0	-
0.6910	17800	0.0	-
0.6987	18000	0.0005	-
0.7065	18200	0.0001	-
0.7143	18400	0.0	-
0.7220	18600	0.0	-
0.7298	18800	0.0	-
0.7375	19000	0.0	-
0.7453	19200	0.0	-
0.7531	19400	0.0	-
0.7608	19600	0.0	-
0.7686	19800	0.0001	-
0.7764	20000	0.0	-
0.7841	20200	0.0	-
0.7919	20400	0.0	-
0.7997	20600	0.0004	-
0.8074	20800	0.0	-
0.8152	21000	0.0	-
0.8229	21200	0.0	-
0.8307	21400	0.0009	-
0.8385	21600	0.0	-
0.8462	21800	0.0	-
0.8540	22000	0.0	-
0.8618	22200	0.0	-
0.8695	22400	0.0002	-
0.8773	22600	0.0	-
0.8851	22800	0.0	-
0.8928	23000	0.0001	-
0.9006	23200	0.0	-
0.9083	23400	0.0	-
0.9161	23600	0.0	-
0.9239	23800	0.0	-
0.9316	24000	0.0	-
0.9394	24200	0.0	-
0.9472	24400	0.0	-
0.9549	24600	0.0	-
0.9627	24800	0.0	-
0.9704	25000	0.0	-
0.9782	25200	0.0	-
0.9860	25400	0.0	-
0.9937	25600	0.0	-
1.0	25762	-	0.0001
1.0015	25800	0.0005	-
1.0092	26000	0.0	-
1.0170	26200	0.0	-
1.0248	26400	0.0	-
1.0325	26600	0.0	-
1.0403	26800	0.0	-
1.0481	27000	0.0	-
1.0558	27200	0.0	-
1.0636	27400	0.0	-
1.0713	27600	0.0	-
1.0791	27800	0.0	-
1.0869	28000	0.0	-
1.0946	28200	0.0	-
1.1024	28400	0.0	-
1.1102	28600	0.0	-
1.1179	28800	0.0	-
1.1257	29000	0.0	-
1.1335	29200	0.0	-
1.1412	29400	0.0	-
1.1490	29600	0.0	-
1.1567	29800	0.0	-
1.1645	30000	0.0	-
1.1723	30200	0.0	-
1.1800	30400	0.0	-
1.1878	30600	0.0	-
1.1956	30800	0.0	-
1.2033	31000	0.0	-
1.2111	31200	0.0	-
1.2189	31400	0.0	-
1.2266	31600	0.0004	-
1.2344	31800	0.0004	-
1.2421	32000	0.0	-
1.2499	32200	0.0	-
1.2577	32400	0.0	-
1.2654	32600	0.0	-
1.2732	32800	0.0	-
1.2810	33000	0.0	-
1.2887	33200	0.0	-
1.2965	33400	0.0	-
1.3043	33600	0.0	-
1.3120	33800	0.0	-
1.3198	34000	0.0	-
1.3275	34200	0.0	-
1.3353	34400	0.0	-
1.3431	34600	0.0	-
1.3508	34800	0.0004	-
1.3586	35000	0.0005	-
1.3664	35200	0.0004	-
1.3741	35400	0.0011	-
1.3819	35600	0.0	-
1.3897	35800	0.0	-
1.3974	36000	0.0	-
1.4052	36200	0.0	-
1.4129	36400	0.0	-
1.4207	36600	0.0	-
1.4285	36800	0.0	-
1.4362	37000	0.0	-
1.4440	37200	0.0001	-
1.4518	37400	0.0	-
1.4595	37600	0.0	-
1.4673	37800	0.0	-
1.4751	38000	0.0	-
1.4828	38200	0.0004	-
1.4906	38400	0.0003	-
1.4983	38600	0.0	-
1.5061	38800	0.0	-
1.5139	39000	0.0	-
1.5216	39200	0.0	-
1.5294	39400	0.0004	-
1.5372	39600	0.0004	-
1.5449	39800	0.0	-
1.5527	40000	0.0	-
1.5605	40200	0.0	-
1.5682	40400	0.0	-
1.5760	40600	0.0009	-
1.5837	40800	0.0	-
1.5915	41000	0.0009	-
1.5993	41200	0.0	-
1.6070	41400	0.0	-
1.6148	41600	0.0	-
1.6226	41800	0.0	-
1.6303	42000	0.0	-
1.6381	42200	0.0	-
1.6459	42400	0.0	-
1.6536	42600	0.0	-
1.6614	42800	0.0	-
1.6691	43000	0.0	-
1.6769	43200	0.0	-
1.6847	43400	0.0	-
1.6924	43600	0.0	-
1.7002	43800	0.0	-
1.7080	44000	0.0	-
1.7157	44200	0.0	-
1.7235	44400	0.0	-
1.7313	44600	0.0	-
1.7390	44800	0.0	-
1.7468	45000	0.0	-
1.7545	45200	0.0	-
1.7623	45400	0.0	-
1.7701	45600	0.0	-
1.7778	45800	0.0	-
1.7856	46000	0.0	-
1.7934	46200	0.0	-
1.8011	46400	0.0	-
1.8089	46600	0.0	-
1.8167	46800	0.0	-
1.8244	47000	0.0	-
1.8322	47200	0.0	-
1.8399	47400	0.0	-
1.8477	47600	0.0	-
1.8555	47800	0.0004	-
1.8632	48000	0.0	-
1.8710	48200	0.0	-
1.8788	48400	0.0	-
1.8865	48600	0.0	-
1.8943	48800	0.0	-
1.9021	49000	0.0004	-
1.9098	49200	0.0	-
1.9176	49400	0.0	-
1.9253	49600	0.0004	-
1.9331	49800	0.0	-
1.9409	50000	0.0	-
1.9486	50200	0.0	-
1.9564	50400	0.0	-
1.9642	50600	0.0004	-
1.9719	50800	0.0	-
1.9797	51000	0.0	-
1.9875	51200	0.0	-
1.9952	51400	0.0004	-
2.0	51524	-	0.0001
2.0030	51600	0.0	-
2.0107	51800	0.0	-
2.0185	52000	0.0	-
2.0262	52200	0.0	-
2.0340	52400	0.0004	-
2.0418	52600	0.0004	-
2.0495	52800	0.0	-
2.0573	53000	0.0008	-
2.0651	53200	0.0	-
2.0728	53400	0.0	-
2.0806	53600	0.0	-
2.0883	53800	0.0	-
2.0961	54000	0.0	-
2.1039	54200	0.0	-
2.1116	54400	0.0	-
2.1194	54600	0.0	-
2.1272	54800	0.0	-
2.1349	55000	0.0	-
2.1427	55200	0.0	-
2.1505	55400	0.0	-
2.1582	55600	0.0	-
2.1660	55800	0.0	-
2.1737	56000	0.0	-
2.1815	56200	0.0	-
2.1893	56400	0.0	-
2.1970	56600	0.0	-
2.2048	56800	0.0	-
2.2126	57000	0.0	-
2.2203	57200	0.0	-
2.2281	57400	0.0	-
2.2359	57600	0.0	-
2.2436	57800	0.0	-
2.2514	58000	0.0004	-
2.2591	58200	0.0	-
2.2669	58400	0.0004	-
2.2747	58600	0.0	-
2.2824	58800	0.0	-
2.2902	59000	0.0	-
2.2980	59200	0.0	-
2.3057	59400	0.0	-
2.3135	59600	0.0	-
2.3213	59800	0.0004	-
2.3290	60000	0.0	-
2.3368	60200	0.0004	-
2.3445	60400	0.0	-
2.3523	60600	0.0	-
2.3601	60800	0.0	-
2.3678	61000	0.0	-
2.3756	61200	0.0	-
2.3834	61400	0.0	-
2.3911	61600	0.0	-
2.3989	61800	0.0	-
2.4067	62000	0.0005	-
2.4144	62200	0.0	-
2.4222	62400	0.0	-
2.4299	62600	0.0	-
2.4377	62800	0.0	-
2.4455	63000	0.0	-
2.4532	63200	0.0	-
2.4610	63400	0.0	-
2.4688	63600	0.0	-
2.4765	63800	0.0	-
2.4843	64000	0.0	-
2.4921	64200	0.0	-
2.4998	64400	0.0	-
2.5076	64600	0.0	-
2.5153	64800	0.0	-
2.5231	65000	0.0	-
2.5309	65200	0.0	-
2.5386	65400	0.0	-
2.5464	65600	0.0004	-
2.5542	65800	0.0	-
2.5619	66000	0.0	-
2.5697	66200	0.0	-
2.5775	66400	0.0	-
2.5852	66600	0.0	-
2.5930	66800	0.0	-
2.6007	67000	0.0	-
2.6085	67200	0.0	-
2.6163	67400	0.0	-
2.6240	67600	0.0	-
2.6318	67800	0.0	-
2.6396	68000	0.0	-
2.6473	68200	0.0	-
2.6551	68400	0.0	-
2.6629	68600	0.0	-
2.6706	68800	0.0004	-
2.6784	69000	0.0	-
2.6861	69200	0.0	-
2.6939	69400	0.0	-
2.7017	69600	0.0004	-
2.7094	69800	0.0004	-
2.7172	70000	0.0	-
2.7250	70200	0.0	-
2.7327	70400	0.0	-
2.7405	70600	0.0	-
2.7483	70800	0.0	-
2.7560	71000	0.0004	-
2.7638	71200	0.0	-
2.7715	71400	0.0	-
2.7793	71600	0.0	-
2.7871	71800	0.0	-
2.7948	72000	0.0	-
2.8026	72200	0.0	-
2.8104	72400	0.0	-
2.8181	72600	0.0	-
2.8259	72800	0.0	-
2.8337	73000	0.0004	-
2.8414	73200	0.0	-
2.8492	73400	0.0	-
2.8569	73600	0.0	-
2.8647	73800	0.0004	-
2.8725	74000	0.0	-
2.8802	74200	0.0	-
2.8880	74400	0.0	-
2.8958	74600	0.0	-
2.9035	74800	0.0	-
2.9113	75000	0.0	-
2.9191	75200	0.0	-
2.9268	75400	0.0004	-
2.9346	75600	0.0	-
2.9423	75800	0.0	-
2.9501	76000	0.0	-
2.9579	76200	0.0	-
2.9656	76400	0.0	-
2.9734	76600	0.0004	-
2.9812	76800	0.0	-
2.9889	77000	0.0	-
2.9967	77200	0.0	-
3.0	77286	-	0.0000

Framework Versions

Python: 3.11.11
Sentence Transformers: 3.3.1
Transformers: 4.48.0
PyTorch: 2.5.1+cu121
Accelerate: 1.2.1
Datasets: 3.2.0
Tokenizers: 0.21.0

Citation

BibTeX

ModernBERT

@misc{warner2024smarterbetterfasterlonger,
      title={Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference}, 
      author={Benjamin Warner and Antoine Chaffin and Benjamin Clavié and Orion Weller and Oskar Hallström and Said Taghadouini and Alexis Gallagher and Raja Biswas and Faisal Ladhak and Tom Aarsen and Nathan Cooper and Griffin Adams and Jeremy Howard and Iacopo Poli},
      year={2024},
      eprint={2412.13663},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2412.13663}, 
}

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}