dedupeio · fgregg · Sep 2, 2022 · Jun 12, 2022 · Jun 12, 2022 · Jun 12, 2022
diff --git a/dedupe/_typing.py b/dedupe/_typing.py
@@ -81,10 +81,14 @@ class TrainingData(TypedDict):
 
 
 class Classifier(Protocol):
-    def fit(self, X: object, y: object) -> None:
+    """Takes an array of pairwise distances and computes the likelihood they are a pair."""
+
+    def fit(self, X: numpy.typing.NDArray[numpy.float_], y: LabelsLike) -> None:
         ...
 
-    def predict_proba(self, X: object) -> numpy.typing.NDArray[numpy.float_]:
+    def predict_proba(
+        self, X: numpy.typing.NDArray[numpy.float_]
+    ) -> numpy.typing.NDArray[numpy.float_]:
         ...
 
 

diff --git a/dedupe/api.py b/dedupe/api.py
@@ -1298,8 +1298,6 @@ class Dedupe(ActiveMatching, DedupeMatching):
     entity.
     """
 
-    ActiveLearner = labeler.DedupeDisagreementLearner
-
     def prepare_training(
         self,
         data: Data,
@@ -1341,7 +1339,7 @@ def prepare_training(
         # existing training data, so add them to data dictionary
         examples, y = flatten_training(self.training_pairs)
 
-        self.active_learner = self.ActiveLearner(
+        self.active_learner = labeler.DedupeDisagreementLearner(
             self.data_model,
             data,
             index_include=examples,
@@ -1361,8 +1359,6 @@ class Link(ActiveMatching):
     Mixin Class for Active Learning Record Linkage
     """
 
-    ActiveLearner = labeler.RecordLinkDisagreementLearner
-
     def prepare_training(
         self,
         data_1: Data,
@@ -1410,7 +1406,7 @@ def prepare_training(
         # existing training data, so add them to data dictionaries
         examples, y = flatten_training(self.training_pairs)
 
-        self.active_learner = self.ActiveLearner(
+        self.active_learner = labeler.RecordLinkDisagreementLearner(
             self.data_model,
             data_1,
             data_2,