Skip to content

Commit

Permalink
Fix bug in SPLADE on-the-fly encoding with PyTorch re: #1625 and add …
Browse files Browse the repository at this point in the history
…test case (#1626)
  • Loading branch information
AileenLin authored Sep 10, 2023
1 parent d8dc5b3 commit 27931da
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyserini/encode/_splade.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def encode(self, text, max_length=256, **kwargs):
batch_aggregated_logits, _ = torch.max(torch.log(1 + torch.relu(batch_logits))
* input_attention.unsqueeze(-1), dim=1)
batch_aggregated_logits = batch_aggregated_logits.cpu().detach().numpy()
raw_weights = self._output_to_weight_dicts(batch_token_ids, batch_weights)
raw_weights = self._output_to_weight_dicts(batch_aggregated_logits)
return self._get_encoded_query_token_wight_dicts(raw_weights)[0]

def _output_to_weight_dicts(self, batch_aggregated_logits):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,17 @@ def test_onnx_encode_unicoil(self):

temp_object.close()
del temp_object

temp_object1 = LuceneImpactSearcher(f'{self.index_dir}lucene9-index.cacm', 'naver/splade-cocondenser-ensembledistil')

# this function will never be called in _impact_searcher, here to check quantization correctness
results = temp_object1.encode("here is a test")
self.assertEqual(results.get("here"), 156)
self.assertEqual(results.get("a"), 31)
self.assertEqual(results.get("test"), 149)

temp_object1.close()
del temp_object1

def tearDown(self):
os.remove(self.tarball_name)
Expand Down

0 comments on commit 27931da

Please sign in to comment.