diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 9060a37..bae340e 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,6 +1,6 @@ # https://pre-commit.com # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file. -name: pre-commit +name: Pre-commit checks on: pull_request: push: diff --git a/.github/workflows/pypi-upload.yml b/.github/workflows/pypi-upload.yml index 8f91fd4..9e63ec0 100644 --- a/.github/workflows/pypi-upload.yml +++ b/.github/workflows/pypi-upload.yml @@ -44,4 +44,4 @@ jobs: - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/experiments/README.md b/experiments/README.md index fd36656..8086888 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -16,7 +16,7 @@ as well as few additional packages listed in `requirements.txt` file. They can b pip install -r requirements.txt ``` -## Strucuture +## Directory and files - `datasets/` - the scripts expect to find datasets in this directory - `predictions/` - the scripts expect to find probability estimes from different models (e.g. LightXML) in this directory diff --git a/experiments/notebooks/plot_mixed_objective.ipynb b/experiments/notebooks/plot_mixed_objective.ipynb index d87347a..ecd1f2e 100644 --- a/experiments/notebooks/plot_mixed_objective.ipynb +++ b/experiments/notebooks/plot_mixed_objective.ipynb @@ -11,13 +11,20 @@ "Erik Schultheis, Marek Wydmuch, Wojciech Kotłowski, Rohit Babbar, Krzysztof Dembczyński. _Generalized test utilities for long-tail performance in extreme multi-label classification_. NeurIPS 2023.\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Function for createing the plots\n", + "# Function for creating the plots\n", "\n", "import json\n", "import matplotlib.pyplot as plt\n", diff --git a/experiments/run_all_bc_experiments.sh b/experiments/run_all_bc_experiments.sh old mode 100644 new mode 100755 diff --git a/experiments/run_bc_experiment.py b/experiments/run_bc_experiment.py index a647558..7840521 100644 --- a/experiments/run_bc_experiment.py +++ b/experiments/run_bc_experiment.py @@ -32,39 +32,36 @@ "power-law-with-beta=0.25": (power_law_weighted_instance, {"beta": 0.25}), "log": (predict_log_weighted_per_instance, {}), "optimal-macro-recall": (predict_for_optimal_macro_recall, {}), - + # # Block coordinate with default parameters - commented out because it better to use variatns with specific tolerance to stopping condition # "block-coord-macro-prec": (bc_macro_precision, {}), # "block-coord-macro-recall": (bc_macro_recall, {}), # "block-coord-macro-f1": (bc_macro_f1, {}), # "block-coord-cov": (bc_coverage, {}), - # Tolerance on stopping condiction experiments "block-coord-macro-prec-tol=1e-3": (bc_macro_precision, {"tolerance": 1e-3}), "block-coord-macro-prec-tol=1e-4": (bc_macro_precision, {"tolerance": 1e-4}), "block-coord-macro-prec-tol=1e-5": (bc_macro_precision, {"tolerance": 1e-5}), "block-coord-macro-prec-tol=1e-6": (bc_macro_precision, {"tolerance": 1e-6}), "block-coord-macro-prec-tol=1e-7": (bc_macro_precision, {"tolerance": 1e-7}), - + # # For recall all should be the same "block-coord-macro-recall-tol=1e-3": (bc_macro_recall, {"tolerance": 1e-3}), "block-coord-macro-recall-tol=1e-4": (bc_macro_recall, {"tolerance": 1e-4}), "block-coord-macro-recall-tol=1e-5": (bc_macro_recall, {"tolerance": 1e-5}), "block-coord-macro-recall-tol=1e-6": (bc_macro_recall, {"tolerance": 1e-6}), "block-coord-macro-recall-tol=1e-7": (bc_macro_recall, {"tolerance": 1e-7}), - "block-coord-macro-f1-tol=1e-3": (bc_macro_f1, {"tolerance": 1e-3}), "block-coord-macro-f1-tol=1e-4": (bc_macro_f1, {"tolerance": 1e-4}), "block-coord-macro-f1-tol=1e-5": (bc_macro_f1, {"tolerance": 1e-5}), "block-coord-macro-f1-tol=1e-6": (bc_macro_f1, {"tolerance": 1e-6}), "block-coord-macro-f1-tol=1e-7": (bc_macro_f1, {"tolerance": 1e-7}), - "block-coord-cov-tol=1e-3": (bc_coverage, {"tolerance": 1e-3}), "block-coord-cov-tol=1e-4": (bc_coverage, {"tolerance": 1e-4}), "block-coord-cov-tol=1e-5": (bc_coverage, {"tolerance": 1e-5}), "block-coord-cov-tol=1e-6": (bc_coverage, {"tolerance": 1e-6}), "block-coord-cov-tol=1e-7": (bc_coverage, {"tolerance": 1e-7}), - + # # Greedy / 1 iter variants "greedy-macro-prec": (bc_macro_precision, {"init_y_pred": "greedy", "max_iter": 1}), "greedy-macro-recall": ( diff --git a/generate_logo.py b/generate_logo.py index b1ee030..b0b3261 100644 --- a/generate_logo.py +++ b/generate_logo.py @@ -1,8 +1,9 @@ -from PIL import Image, ImageDraw, ImageColor -import numpy as np +import random from math import pi + +import numpy as np import seaborn as sns -import random +from PIL import Image, ImageColor, ImageDraw def create_gradient(width, height, gradient, angle): @@ -13,7 +14,9 @@ def create_gradient(width, height, gradient, angle): for y in range(height): for x in range(width): # Calculate the normalized position along the gradient line - t = (x * np.cos(angle) + y * np.sin(angle)) / (width * np.cos(angle) + height * np.sin(angle)) + t = (x * np.cos(angle) + y * np.sin(angle)) / ( + width * np.cos(angle) + height * np.sin(angle) + ) # Clamp the position to the range [0, 1] t = max(0, min(1, t)) # Find the two closest color stops @@ -24,9 +27,9 @@ def create_gradient(width, height, gradient, angle): c1 = gradient[i][1] c2 = gradient[i + 1][1] if isinstance(c1, str): - c1 = ImageColor.getcolor(c1, 'RGBA') + c1 = ImageColor.getcolor(c1, "RGBA") if isinstance(c2, str): - c2 = ImageColor.getcolor(c2, 'RGBA') + c2 = ImageColor.getcolor(c2, "RGBA") s1 = gradient[i][0] s2 = gradient[i + 1][0] f = (t - s1) / (s2 - s1) @@ -48,7 +51,7 @@ def create_logo_image(grid, filled_color, column_gradients, cell_size): image_height = len(grid) * cell_size[1] # Create a new image in RGBA mode - img = Image.new('RGBA', (image_width, image_height)) + img = Image.new("RGBA", (image_width, image_height)) draw = ImageDraw.Draw(img) # Create a gradient for each column @@ -57,10 +60,10 @@ def create_logo_image(grid, filled_color, column_gradients, cell_size): cell_size[0], image_height, column_gradients[i % len(column_gradients)], - pi / 2 + pi / 2, ) img.paste(gradient_image, (i * cell_size[0], 0), gradient_image) - + for i, row in enumerate(grid): # Define the starting and ending y coordinates for the row start_y = i * cell_size[1] @@ -68,7 +71,7 @@ def create_logo_image(grid, filled_color, column_gradients, cell_size): for j, cell in enumerate(row): # If cell is empty, skip it - if cell == 'X': + if cell == "X": # Define the starting and ending x coordinates for the cell start_x = j * cell_size[0] end_x = start_x + cell_size[0] @@ -77,18 +80,20 @@ def create_logo_image(grid, filled_color, column_gradients, cell_size): return img -def calculate_columns_gradients(grid, left_gradient, right_gradient, seed, add_alpha_gradient=True): +def calculate_columns_gradients( + grid, left_gradient, right_gradient, seed, add_alpha_gradient=True +): random.seed(seed) def add_stops(gradient): return [(i / (len(gradient) - 1), v) for i, v in enumerate(gradient)] - + def clip(val, min_val, max_val): return min(max(val, min_val), max_val) - + def color_mod_val(): return random.random() * 16 - 8 - + columns = len(grid[0]) # Create interpolated gradients for each column @@ -125,7 +130,6 @@ def color_mod_val(): if __name__ == "__main__": - # Initial logo version grid = """ .................................... @@ -137,8 +141,10 @@ def color_mod_val(): .X.X.X...X.X.X...X.X.X...X.X..X...X. .X.X.XXX.XXX.XXX.XXX.X...X.X..X.XX.. .................................... -""".strip().split('\n') - +""".strip().split( + "\n" + ) + # Logo with the same number of filled cells in each row (k=13) grid = """ .................................... @@ -150,19 +156,23 @@ def color_mod_val(): .X.X.X...XXX.X...X.X.X...X.X..X..... .X.X.XXX.....XXX.XXX.X........X..... .................................... -""".strip().split('\n') - +""".strip().split( + "\n" + ) + # Count the number of filled cells in each row # for i, row in enumerate(grid): # print(f"Full cells in row {i}: {row.count('X')}") cell_size = (20, 20) filled_color = (255, 255, 255, 255) # RGB color for filled cells - columns_gradients = calculate_columns_gradients(grid, sns.color_palette("crest"), sns.color_palette("flare"), 1993) + columns_gradients = calculate_columns_gradients( + grid, sns.color_palette("crest"), sns.color_palette("flare"), 1993 + ) # Generate the gradient image logo_image = create_logo_image(grid, filled_color, columns_gradients, cell_size) # Save the image or display it - logo_image.save('xCOLUMNs_logo.png') # Save the image as 'xCOLUMNs_logo.png' + logo_image.save("xCOLUMNs_logo.png") # Save the image as 'xCOLUMNs_logo.png' logo_image.show() # Show the image