AjayP13 commited on
Commit
2fe76e3
·
verified ·
1 Parent(s): 8681f11

Delete pooling_coverage.py

Browse files
Files changed (1) hide show
  1. pooling_coverage.py +0 -160
pooling_coverage.py DELETED
@@ -1,160 +0,0 @@
1
- import torch
2
-
3
-
4
- class GaussianCoveragePooling(torch.nn.Module):
5
- def __init__(self, coverage_chunks, sigma, alpha):
6
- """
7
- Custom pooling layer that computes weighted mean pooling using Gaussian-based weights.
8
-
9
- Args:
10
- coverage_chunks (int): Number of weighted pooling operations (N).
11
- sigma (float): Standard deviation for Gaussian weighting.
12
- alpha (float): Weighting factor for merging with standard mean pooling.
13
- """
14
- super().__init__()
15
- self.coverage_chunks = coverage_chunks
16
- self.sigma = sigma # Controls width of Gaussians
17
- self.alpha = alpha # Blends standard mean with weighted mean
18
-
19
- def forward(self, features, chunk_indicators=None):
20
- """
21
- Computes weighted mean pooling using Gaussian-based weights.
22
-
23
- Args:
24
- self (SentenceTransformer): The model.
25
- features (dict): The token embeddings and attention mask.
26
- chunk_indicators (tensor[bz, 1]): Index indicators to return a specific chunk,
27
- leave as None to return embeddings for all chunks. Mainly useful for training,
28
- not inference. Leave as None for inference.
29
- """
30
-
31
- # Get token embeddings and attention mask
32
- token_embeddings = features[
33
- "token_embeddings"
34
- ] # (batch_size, seq_len, hidden_dim)
35
- attention_mask = (
36
- features["attention_mask"].float().unsqueeze(-1)
37
- ) # (batch_size, seq_len, 1)
38
-
39
- # Get shapes and devices
40
- batch_size, seq_len, hidden_dim = token_embeddings.shape
41
- device = token_embeddings.device
42
-
43
- # Compute actual sequence lengths (ignoring padding)
44
- # (batch_size, 1)
45
- seq_lengths = attention_mask.squeeze(-1).sum(dim=1, keepdim=True)
46
- max_seq_length = int(torch.max(seq_lengths).item())
47
-
48
- # Standard mean pooling
49
- sum_embeddings = torch.sum(token_embeddings * attention_mask, dim=1)
50
- sum_mask = torch.sum(attention_mask, dim=1).clamp(min=1e-9)
51
- standard_mean = sum_embeddings / sum_mask # (batch_size, hidden_dim)
52
-
53
- # Compute chunk centers dynamically based on sequence length
54
- chunk_positions = torch.linspace(0, 1, self.coverage_chunks + 2, device=device)[
55
- 1:-1
56
- ] # Excludes 0 and 1
57
- chunk_centers = chunk_positions * seq_lengths # (batch_size, N)
58
-
59
- # Token positions per sequence (batch_size, seq_len)
60
- token_positions = (
61
- torch.arange(seq_len, device=device).float().unsqueeze(0)
62
- ) # (1, seq_len)
63
-
64
- # Compute Gaussian weights (batch_size, N, seq_len)
65
- seq_lengths = seq_lengths.view(seq_lengths.shape[0], 1, 1).repeat(
66
- 1, self.coverage_chunks, max_seq_length
67
- )
68
- gaussians = torch.exp(
69
- -0.5
70
- * (
71
- (token_positions.unsqueeze(1) - chunk_centers.unsqueeze(2))
72
- / (self.sigma * seq_lengths)
73
- )
74
- ** 2
75
- )
76
-
77
- # Mask out padding and normalize Gaussian weights per sequence
78
- # (batch_size, N, seq_len)
79
- gaussians = gaussians * attention_mask.squeeze(-1).unsqueeze(1)
80
-
81
- # Normalize against gaussian weights
82
- gaussians /= gaussians.sum(dim=2, keepdim=True).clamp(min=1e-9)
83
-
84
- # Compute weighted mean for each chunk (batch_size, N, hidden_dim)
85
- weighted_means = torch.einsum(
86
- "bns,bsh->bnh", gaussians.to(token_embeddings.dtype), token_embeddings
87
- )
88
-
89
- # Blend with standard mean pooling
90
- # (batch_size, N, hidden_dim)
91
- combined_embeddings = (1 - self.alpha) * standard_mean.unsqueeze(
92
- 1
93
- ) + self.alpha * weighted_means
94
-
95
- # Add an embedding for the entire document at index 0
96
- # (batch_size, N+1, hidden_dim)
97
- combined_embeddings = torch.cat(
98
- [torch.zeros_like(combined_embeddings[:, :1]), combined_embeddings], 1
99
- )
100
- combined_embeddings[:, 0:1, :] = standard_mean.unsqueeze(1)
101
-
102
- # Select the indicator if provided
103
- if chunk_indicators is not None:
104
- combined_embeddings = combined_embeddings[
105
- torch.arange(combined_embeddings.size(0)), chunk_indicators
106
- ]
107
-
108
- # Normalize all the embeddings
109
- combined_embeddings = torch.nn.functional.normalize(
110
- combined_embeddings, p=2, dim=-1
111
- )
112
-
113
- # Flatten final embeddings (batch_size, hidden_dim * (N+1))
114
- if chunk_indicators is None:
115
- sentence_embedding = combined_embeddings.reshape(
116
- batch_size, hidden_dim * (self.coverage_chunks + 1)
117
- )
118
- else:
119
- sentence_embedding = combined_embeddings
120
-
121
- # Return the final flattened entence embedding
122
- features["sentence_embedding"] = sentence_embedding
123
- return features
124
-
125
-
126
- def use_gaussian_coverage_pooling(m, coverage_chunks=10, sigma=0.05, alpha=1.0):
127
- """
128
- Add custom pooling layer that computes weighted mean pooling using Gaussian-based weights.
129
-
130
- Args:
131
- m (SentenceTransformer): The model to add pooling layer to.
132
- coverage_chunks (int): Number of weighted pooling operations (N).
133
- sigma (float): Standard deviation for Gaussian weighting.
134
- alpha (float): Weighting factor for merging with standard mean pooling.
135
- """
136
- if isinstance(m[1], GaussianCoveragePooling):
137
- m = unuse_gaussian_coverage_pooling(m)
138
- word_embedding_model = m[0]
139
- custom_pooling = GaussianCoveragePooling(
140
- coverage_chunks=coverage_chunks, sigma=sigma, alpha=alpha
141
- )
142
- old_pooling = m[1]
143
- new_m = m.__class__(modules=[word_embedding_model, custom_pooling])
144
- new_m.old_pooling = {"old_pooling": old_pooling}
145
- return new_m
146
-
147
-
148
- def unuse_gaussian_coverage_pooling(m):
149
- """
150
- Removes the custom pooling layer.
151
-
152
- Args:
153
- m (SentenceTransformer): The model to remove the pooling layer from.
154
- """
155
-
156
- if isinstance(m[1], GaussianCoveragePooling):
157
- new_m = m.__class__(modules=[m[0], m.old_pooling["old_pooling"]])
158
- return new_m
159
- else:
160
- return m