diff --git a/data/Fig1-designProcess/blend1Subset.pickle b/data/Fig1-designProcess/blend1Subset.pickle index 6071621..6ae45cd 100644 Binary files a/data/Fig1-designProcess/blend1Subset.pickle and b/data/Fig1-designProcess/blend1Subset.pickle differ diff --git a/data/Fig1-designProcess/blend2Subset.pickle b/data/Fig1-designProcess/blend2Subset.pickle index ec2c85b..b618534 100644 Binary files a/data/Fig1-designProcess/blend2Subset.pickle and b/data/Fig1-designProcess/blend2Subset.pickle differ diff --git a/data/Fig1-designProcess/blend3Subset.pickle b/data/Fig1-designProcess/blend3Subset.pickle index 817160e..a3dde5d 100644 Binary files a/data/Fig1-designProcess/blend3Subset.pickle and b/data/Fig1-designProcess/blend3Subset.pickle differ diff --git a/data/Fig1-designProcess/distinct1Subset.pickle b/data/Fig1-designProcess/distinct1Subset.pickle index a5e8b33..5c47140 100644 Binary files a/data/Fig1-designProcess/distinct1Subset.pickle and b/data/Fig1-designProcess/distinct1Subset.pickle differ diff --git a/data/Fig1-designProcess/distinct2Subset.pickle b/data/Fig1-designProcess/distinct2Subset.pickle index bf5921e..e23db38 100644 Binary files a/data/Fig1-designProcess/distinct2Subset.pickle and b/data/Fig1-designProcess/distinct2Subset.pickle differ diff --git a/data/Fig1-designProcess/distinct3Subset.pickle b/data/Fig1-designProcess/distinct3Subset.pickle index c5c68e9..25f51b7 100644 Binary files a/data/Fig1-designProcess/distinct3Subset.pickle and b/data/Fig1-designProcess/distinct3Subset.pickle differ diff --git a/data/Fig1-designProcess/distinctSubset.pickle b/data/Fig1-designProcess/distinctSubset.pickle index cdc928d..b5ba377 100644 Binary files a/data/Fig1-designProcess/distinctSubset.pickle and b/data/Fig1-designProcess/distinctSubset.pickle differ diff --git a/data/Fig1-designProcess/fullData.pickle b/data/Fig1-designProcess/fullData.pickle index e0a9fce..1e793fd 100644 Binary files a/data/Fig1-designProcess/fullData.pickle and b/data/Fig1-designProcess/fullData.pickle differ diff --git a/data/Fig1-designProcess/hullSubset.pickle b/data/Fig1-designProcess/hullSubset.pickle index 20ef179..d252b75 100644 Binary files a/data/Fig1-designProcess/hullSubset.pickle and b/data/Fig1-designProcess/hullSubset.pickle differ diff --git a/data/Fig1-designProcess/outliersSubset.pickle b/data/Fig1-designProcess/outliersSubset.pickle index 596d588..8f611be 100644 Binary files a/data/Fig1-designProcess/outliersSubset.pickle and b/data/Fig1-designProcess/outliersSubset.pickle differ diff --git a/data/solverData.csv b/data/solverData.csv index 8e3a2dc..bc530b3 100644 --- a/data/solverData.csv +++ b/data/solverData.csv @@ -76,3 +76,15 @@ Uni-criterion: clusterCenters,greedySwap,1000,2,10,0.13878262508660555,0.7724079 "Uni-criterion: preserveMetric, mean",worstOfRandom,1000,10,10,0.031927124597132206,9.503260009237106 "Uni-criterion: preserveMetric, mean",bestOfRandom,1000,10,10,0.027917416766285896,1.460277122561562 "Uni-criterion: preserveMetric, mean",greedySwap,1000,10,10,0.132963459007442,0.5640211475433015 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,10,0.08583308400011447,0.0 +"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.03175995800029341,-81.35404458341424 +"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.16693012500036275,-83.95544350897778 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.07353841700023622,0.0 +"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.03256029200019839,-69.60149355751726 +"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.15679762499985372,-72.5809741971094 +"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,8.897876584000187,-38.19786578061197 +"Multi-criterion: 10*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.382594166999297,-71.67868194859224 +"Multi-criterion: 1*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,10.003441083000325,-76.19767611430132 +"Uni-criterion: distinctness, distances",greedyMixed,200,2,32,0.06363425000017742,-42.57843298944509 +"Uni-criterion: distinctness, distances",greedyMixed,200,2,24,0.04308179100007692,-46.946516069915035 +"Uni-criterion: distinctness, distances",greedyMixed,200,2,93,0.19906350000019302,-75.52440490189333 diff --git a/figures/Fig1-designProcess/blend.pdf b/figures/Fig1-designProcess/blend.pdf index 7988e2e..c7bb4e9 100644 Binary files a/figures/Fig1-designProcess/blend.pdf and b/figures/Fig1-designProcess/blend.pdf differ diff --git a/figures/Fig1-designProcess/express.pdf b/figures/Fig1-designProcess/express.pdf index dab5267..6f651f1 100644 Binary files a/figures/Fig1-designProcess/express.pdf and b/figures/Fig1-designProcess/express.pdf differ diff --git a/figures/Fig1-designProcess/tune.pdf b/figures/Fig1-designProcess/tune.pdf index b6f18bd..e69f1de 100644 Binary files a/figures/Fig1-designProcess/tune.pdf and b/figures/Fig1-designProcess/tune.pdf differ diff --git a/flexibleSubsetSelection/logger.py b/flexibleSubsetSelection/logger.py index efbac8e..351bfb7 100644 --- a/flexibleSubsetSelection/logger.py +++ b/flexibleSubsetSelection/logger.py @@ -7,9 +7,17 @@ # --- Logger ------------------------------------------------------------------- -def setup(name: str = "flexibleSubsetSelection", level: int = logging.NOTSET): +def setup(name: str = "flexibleSubsetSelection", + level: int = logging.NOTSET) -> logging.Logger: """ - Sets up the logger for the package. + Sets up logging for the package. + + Inputs: + name: The name of the logger, defaults to package level name. + level: The level to set the logger to from Python logging. + + Returns: + log: The Python logger object to be used for logging in the package. """ log = logging.getLogger(name) if not log.hasHandlers(): diff --git a/flexibleSubsetSelection/loss.py b/flexibleSubsetSelection/loss.py index 298425a..53a3fe3 100644 --- a/flexibleSubsetSelection/loss.py +++ b/flexibleSubsetSelection/loss.py @@ -1,8 +1,7 @@ -# --- Imports ------------------------------------------------------------------ +# --- Imports and Setup -------------------------------------------------------- # Standard library from functools import partial -import logging from typing import Any, Callable, Dict, List # Third party diff --git a/flexibleSubsetSelection/objective.py b/flexibleSubsetSelection/objective.py index f209339..d5cda1c 100644 --- a/flexibleSubsetSelection/objective.py +++ b/flexibleSubsetSelection/objective.py @@ -159,9 +159,12 @@ def entropy(array: np.ndarray) -> float: probabilities = np.array(list(counts.values()))/total return np.sum(probabilities * np.log(probabilities)) +from ott.problems.linear import linear_problem + def sinkhorn(subset, fullData, solveFunction): - geometry = pointcloud.PointCloud(fullData, subset) - sinkhornOutput = solveFunction(geometry) + geometry = pointcloud.PointCloud(fullData, subset, epsilon=1) + problem = linear_problem.LinearProblem(geometry) + sinkhornOutput = solveFunction(problem) return sinkhornOutput.reg_ot_cost diff --git a/flexibleSubsetSelection/sets.py b/flexibleSubsetSelection/sets.py index 2cf3ae3..f9f38cf 100644 --- a/flexibleSubsetSelection/sets.py +++ b/flexibleSubsetSelection/sets.py @@ -1,7 +1,6 @@ -# --- Imports ------------------------------------------------------------------ +# --- Imports and Setup -------------------------------------------------------- # Standard library -import logging from pathlib import Path from typing import Literal @@ -20,6 +19,7 @@ # Setup logger log = logger.setup(name=__name__) + # --- Dataset and Subset Classes ----------------------------------------------- class Set: @@ -328,9 +328,9 @@ def __repr__(self) -> str: """ string = f"Subset(size={self.size}" if self.solveTime is not None: - string = ", ".join(string, f"time={round(self.solveTime, 4)}s") + string = ", ".join(string, f"time={self.solveTime:.4f}s") if self.loss is not None: - string = ", ".join(string, f"loss={round(self.loss, 4)})") + string = ", ".join(string, f"loss={self.loss:.4f})") return string def __str__(self) -> str: diff --git a/flexibleSubsetSelection/timer.py b/flexibleSubsetSelection/timer.py index feb097d..f59aa0f 100644 --- a/flexibleSubsetSelection/timer.py +++ b/flexibleSubsetSelection/timer.py @@ -13,25 +13,25 @@ def __init__(self): self._startTime = None def start(self): - """Start a new timer""" + """Start a new timer.""" if self._startTime is not None: - raise TimerError(f"Timer is running. Use .stop() to stop it") + raise TimerError(f"Timer is running. Use .stop() to stop it.") self._startTime = time.perf_counter() def stop(self) -> None: - """Stop the timer, and return the elapsed time""" + """Stop the timer, and return the elapsed time.""" if self._startTime is None: - raise TimerError(f"Timer is not running. Use .start() to start it") + raise TimerError(f"Timer is not running. Use .start() to start it.") self.elapsedTime = time.perf_counter() - self._startTime self._startTime = None def __enter__(self): - """Start a new timer as a context manager""" + """Start a new timer as a context manager.""" self.start() return self def __exit__(self, *exc_info): - """Stop the context manager timer""" + """Stop the context manager timer.""" self.stop() \ No newline at end of file