File size: 7,189 Bytes
3b98ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
"""
Evaluation metrics for document ranking.
This file contains implementation of various evaluation metrics 
for assessing the quality of document rankings.
"""
import numpy as np

def recall_at_k(true_items, predicted_items, k=10):
    """
    Calculate recall at k for a single query.
    
    Parameters:
    true_items (list): List of true relevant items
    predicted_items (list): List of predicted items (ranked)
    k (int): Number of top items to consider
    
    Returns:
    float: Recall@k value between 0 and 1
    """
    if not true_items:
        return 0.0  # No relevant items to recall
    
    # Get the top k predicted items
    top_k_items = predicted_items[:k]
    
    # Count the number of true items in the top k predictions
    relevant_in_top_k = sum(1 for item in top_k_items if item in true_items)
    
    # Calculate recall: (relevant items in top k) / (total relevant items)
    return relevant_in_top_k / len(true_items)

def mean_recall_at_k(true_items_list, predicted_items_list, k=10):
    """
    Calculate mean recall at k across multiple queries.
    
    Parameters:
    true_items_list (list of lists): List of true relevant items for each query
    predicted_items_list (list of lists): List of predicted items for each query
    k (int): Number of top items to consider
    
    Returns:
    float: Mean Recall@k value between 0 and 1
    """
    if len(true_items_list) != len(predicted_items_list):
        raise ValueError("Number of true item lists must match number of predicted item lists")
    
    if not true_items_list:
        return 0.0  # No data provided
    
    # Calculate recall@k for each query
    recalls = [recall_at_k(true_items, predicted_items, k) 
               for true_items, predicted_items in zip(true_items_list, predicted_items_list)]
    
    # Return mean recall@k
    return sum(recalls) / len(recalls)

def average_precision(true_items, predicted_items):
    """
    Calculate average precision for a single query.
    
    Parameters:
    true_items (list): List of true relevant items
    predicted_items (list): List of predicted items (ranked)
    
    Returns:
    float: Average precision value between 0 and 1
    """
    if not true_items or not predicted_items:
        return 0.0
    
    # Track number of relevant items seen and running sum of precision values
    relevant_count = 0
    precision_sum = 0.0
    
    # Calculate precision at each position where a relevant item is found
    for i, item in enumerate(predicted_items):
        position = i + 1  # 1-indexed position
        
        if item in true_items:
            relevant_count += 1
            # Precision at this position = relevant items seen / position
            precision_at_position = relevant_count / position
            precision_sum += precision_at_position
    
    # Average precision = sum of precision values / total relevant items
    total_relevant = len(true_items)
    return precision_sum / total_relevant if total_relevant > 0 else 0.0

def mean_average_precision(true_items_list, predicted_items_list):
    """
    Calculate mean average precision (MAP) across multiple queries.
    
    Parameters:
    true_items_list (list of lists): List of true relevant items for each query
    predicted_items_list (list of lists): List of predicted items for each query
    
    Returns:
    float: MAP value between 0 and 1
    """
    if len(true_items_list) != len(predicted_items_list):
        raise ValueError("Number of true item lists must match number of predicted item lists")
    
    if not true_items_list:
        return 0.0  # No data provided
    
    # Calculate average precision for each query
    aps = [average_precision(true_items, predicted_items) 
           for true_items, predicted_items in zip(true_items_list, predicted_items_list)]
    
    # Return mean average precision
    return sum(aps) / len(aps)

def inverse_ranking(true_items, predicted_items):
    """
    Calculate inverse ranking for the first relevant item.
    
    Parameters:
    true_items (list): List of true relevant items
    predicted_items (list): List of predicted items (ranked)
    
    Returns:
    float: Inverse ranking value between 0 and 1
    """
    if not true_items or not predicted_items:
        return 0.0
    
    # Find position of first relevant item (1-indexed)
    for i, item in enumerate(predicted_items):
        if item in true_items:
            rank = i + 1
            return 1.0 / rank  # Inverse ranking
    
    # No relevant items found in predictions
    return 0.0

def mean_inv_ranking(true_items_list, predicted_items_list):
    """
    Calculate mean inverse ranking (MIR) across multiple queries.
    
    Parameters:
    true_items_list (list of lists): List of true relevant items for each query
    predicted_items_list (list of lists): List of predicted items for each query
    
    Returns:
    float: MIR value between 0 and 1
    """
    if len(true_items_list) != len(predicted_items_list):
        raise ValueError("Number of true item lists must match number of predicted item lists")
    
    if not true_items_list:
        return 0.0  # No data provided
    
    # Calculate inverse ranking for each query
    inv_ranks = [inverse_ranking(true_items, predicted_items) 
                 for true_items, predicted_items in zip(true_items_list, predicted_items_list)]
    
    # Return mean inverse ranking
    return sum(inv_ranks) / len(inv_ranks)

def ranking(true_items, predicted_items):
    """
    Calculate the rank of the first relevant item.
    
    Parameters:
    true_items (list): List of true relevant items
    predicted_items (list): List of predicted items (ranked)
    
    Returns:
    float: Rank of the first relevant item (1-indexed)
    """
    if not true_items or not predicted_items:
        return float('inf')  # No relevant items to find
    
    # Find position of first relevant item (1-indexed)
    for i, item in enumerate(predicted_items):
        if item in true_items:
            return i + 1  # Return rank (1-indexed)
    
    # No relevant items found in predictions
    return float('inf')

def mean_ranking(true_items_list, predicted_items_list):
    """
    Calculate mean ranking across multiple queries.
    
    Parameters:
    true_items_list (list of lists): List of true relevant items for each query
    predicted_items_list (list of lists): List of predicted items for each query
    
    Returns:
    float: Mean ranking value (higher is worse)
    """
    if len(true_items_list) != len(predicted_items_list):
        raise ValueError("Number of true item lists must match number of predicted item lists")
    
    if not true_items_list:
        return float('inf')  # No data provided
    
    # Calculate ranking for each query
    ranks = [ranking(true_items, predicted_items) 
             for true_items, predicted_items in zip(true_items_list, predicted_items_list)]
    
    # Filter out 'inf' values for mean calculation
    finite_ranks = [r for r in ranks if r != float('inf')]
    
    # Return mean ranking
    return sum(finite_ranks) / len(finite_ranks) if finite_ranks else float('inf')