Skip to content

Commit

Permalink
add monoT5 reranking
Browse files Browse the repository at this point in the history
  • Loading branch information
malteweber committed May 12, 2024
1 parent 1b8e23c commit a392ff3
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions trec_biogen/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from functools import cached_property
from os import environ
from typing import Any, Hashable
from pyterrier_t5 import MonoT5ReRanker

from elasticsearch7 import Elasticsearch
from elasticsearch7_dsl.query import Query, Match, Exists, Bool
Expand Down Expand Up @@ -94,6 +95,7 @@ def _build_result(article: Article) -> dict[Hashable, Any]:
@dataclass(frozen=True)
class Pipeline(Transformer):


@cached_property
def _elasticsearch(self) -> Elasticsearch:
return elasticsearch_connection()
Expand All @@ -104,6 +106,9 @@ def _elasticsearch_index_pubmed(self) -> str | None:

@cached_property
def _pipeline(self) -> Transformer:

monoT5 = MonoT5ReRanker(verbose=True, batch_size=16)

pipeline = Transformer.identity()

# Retrieve or re-rank documents with Elasticsearch (BM25).
Expand All @@ -112,11 +117,10 @@ def _pipeline(self) -> Transformer:
client=self._elasticsearch,
query_builder=_build_query,
result_builder=_build_result,
num_results=10,
num_results=100,
index=self._elasticsearch_index_pubmed,
verbose=True,
)

) >> monoT5
# TODO: Re-rank documents?

# TODO: Split passages.
Expand Down

0 comments on commit a392ff3

Please sign in to comment.