Spaces:
Sleeping
Sleeping
File size: 2,812 Bytes
22e1b62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import nltk
import numpy as np
from config import metric
from utils import refine_candidate_text
from texts.bart_score import (
bart_score,
check_bart_score,
)
def compute_metrics(evaluation_predictions):
"""
Function to compute evaluation metrics for model predictions.
Parameters:
evaluation_predictions (tuple): A tuple containing two elements:
- predictions (array-like): The raw prediction scores from the model.
- labels (array-like): The true labels for the evaluation data.
Returns:
dict: A dictionary containing the computed evaluation metrics.
"""
# Unpack predictions and labels from the input tuple
raw_predictions, true_labels = evaluation_predictions
# Convert raw prediction scores to predicted class labels
predicted_labels = np.argmax(raw_predictions, axis=1)
# Compute and return the evaluation metrics
return metric.compute(
prediction_scores=predicted_labels,
references=true_labels,
average="macro",
)
def extract_by_best_similarity(input_text, raw_text):
"""
Extracts the best candidate string from the raw text based on the highest
similarity score compared to the input text. The similarity score is
calculated using the BART score.
Args:
input_text (str): The original text.
raw_text (str): The raw text containing multiple candidate strings.
Returns:
str: The best candidate string with the highest similarity score.
Returns the input text if no suitable candidate is found.
"""
# Refine the raw text
refined_raw_text = refine_candidate_text(input_text, raw_text)
# Tokenize the refined raw text into sentences
raw_candidates = nltk.sent_tokenize(refined_raw_text)
# Split sentences further by newlines to get individual candidates
candidate_list = []
for sentence in raw_candidates:
candidate_list.extend(sentence.split("\n"))
# Initialize variables to track the best similarity score
# and the best candidate
best_similarity = -9999
best_candidate = ""
# Iterate over each candidate to find the best one based on the BART score
for candidate in candidate_list:
refined_candidate = refine_candidate_text(input_text, candidate)
if check_bart_score(input_text, refined_candidate):
score = bart_score(input_text, refined_candidate)[0]
if score > best_similarity:
best_similarity = score
best_candidate = refined_candidate
# Print the best candidate found
print(f"best_candidate = {best_candidate}")
# Return the best candidate if found, otherwise return the input text
if best_candidate == "":
return input_text
return best_candidate
|