diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 934f582ed..4ac57510d 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -11,6 +11,7 @@ ata autoconfig autodetected autoresize +backported barbie BINDIR blkid @@ -122,6 +123,7 @@ Lun lvm lvmteam Marzinski +misdetection mpath mpathb mpathpersist @@ -184,7 +186,6 @@ sas sbp scsi sda -sdc setmarginal setprkey setprstatus @@ -205,11 +206,11 @@ suse svg switchgroup sys +SYSDIR sysfs sysinit tcp terabytes -SYSDIR TESTDEPS testname tgill @@ -231,6 +232,7 @@ unsetprkey unsetprstatus unspec usb +USEC userdata userspace usr diff --git a/.github/workflows/abi-stable.yaml b/.github/workflows/abi-stable.yaml new file mode 100644 index 000000000..472914ce3 --- /dev/null +++ b/.github/workflows/abi-stable.yaml @@ -0,0 +1,97 @@ +name: check-abi for stable branch +on: + push: + branches: + - 'stable-*' + paths: + - '.github/workflows/abi-stable.yaml' + - '**.h' + - '**.c' + - '**.version' + pull_request: + branches: + - 'stable-*' + workflow_dispatch: + +jobs: + reference-abi: + runs-on: ubuntu-22.04 + steps: + - name: get parent tag + run: > + echo ${{ github.ref }} | + sed -E 's,refs/heads/stable-([0-9]\.[0-9]*)\.y,PARENT_TAG=\1.0,' >> $GITHUB_ENV + if: ${{ github.event_name == 'push' }} + - name: get parent tag + run: echo PARENT_TAG=${{ github.base_ref }} >> $GITHUB_ENV + if: ${{ github.event_name == 'pull_request' }} + - name: assert parent tag + run: /bin/false + if: ${{ env.PARENT_TAG == '' }} + - name: update + run: sudo apt-get update + - name: dependencies + run: > + sudo apt-get install --yes gcc + gcc make pkg-config abigail-tools + libdevmapper-dev libreadline-dev libaio-dev libsystemd-dev + libudev-dev libjson-c-dev liburcu-dev libcmocka-dev libedit-dev + - name: checkout ${{ env.PARENT_TAG }} + uses: actions/checkout@v4 + with: + ref: ${{ env.PARENT_TAG }} + - name: build ABI for ${{ env.PARENT_TAG }} + run: make -j$(grep -c ^processor /proc/cpuinfo) -Orecurse abi + - name: save ABI + uses: actions/upload-artifact@v4 + with: + name: multipath-abi-${{ env.PARENT_TAG }} + path: abi + + check-abi: + runs-on: ubuntu-22.04 + needs: reference-abi + steps: + - name: get parent tag + run: > + echo ${{ github.ref }} | + sed -E 's,refs/heads/stable-([0-9]\.[0-9]*)\.y,PARENT_TAG=\1.0,' >> $GITHUB_ENV + if: ${{ github.event_name == 'push' }} + - name: get parent tag + run: echo PARENT_TAG=${{ github.base_ref }} >> $GITHUB_ENV + if: ${{ github.event_name == 'pull_request' }} + - name: assert parent tag + run: /bin/false + if: ${{ env.PARENT_TAG == '' }} + - name: checkout ${{ github.base_ref }} + uses: actions/checkout@v4 + with: + ref: ${{ github.base_ref }} + - name: download ABI for ${{ env.PARENT_TAG }} + id: download_abi + uses: actions/download-artifact@v4 + with: + name: multipath-abi-${{ env.PARENT_TAG }} + path: reference-abi + - name: update + run: sudo apt-get update + if: steps.download_abi.outcome != 'success' + - name: dependencies + run: > + sudo apt-get install --yes gcc + gcc make pkg-config abigail-tools + libdevmapper-dev libreadline-dev libaio-dev libsystemd-dev + libudev-dev libjson-c-dev liburcu-dev libcmocka-dev libedit-dev + - name: check ABI of ${{ github.ref }} against ${{ env.PARENT_TAG }} + id: check_abi + run: make -j$(grep -c ^processor /proc/cpuinfo) -Orecurse abi-test + continue-on-error: true + - name: save differences + if: ${{ steps.check_abi.outcome != 'success' }} + uses: actions/upload-artifact@v4 + with: + name: abi-test + path: abi-test + - name: fail + run: /bin/false + if: steps.check_abi.outcome != 'success' diff --git a/.github/workflows/abi.yaml b/.github/workflows/abi.yaml index 393322e56..d5f8b477e 100644 --- a/.github/workflows/abi.yaml +++ b/.github/workflows/abi.yaml @@ -28,7 +28,7 @@ jobs: - name: get reference ABI id: reference continue-on-error: true - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@v6 with: workflow: abi.yaml branch: ${{ env.ABI_BRANCH }} @@ -45,10 +45,11 @@ jobs: - name: create ABI run: make -Orecurse -j$(grep -c ^processor /proc/cpuinfo) abi.tar.gz - name: save ABI - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: abi path: abi + overwrite: true - name: compare ABI against reference id: compare continue-on-error: true @@ -56,10 +57,11 @@ jobs: run: make abi-test - name: save differences if: ${{ steps.compare.outcome == 'failure' }} - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: abi-test path: abi-test + overwrite: true - name: fail # MUST use >- here, otherwise the condition always evaluates to true diff --git a/.github/workflows/build-and-unittest.yaml b/.github/workflows/build-and-unittest.yaml index a838f9a2c..1a727e141 100644 --- a/.github/workflows/build-and-unittest.yaml +++ b/.github/workflows/build-and-unittest.yaml @@ -5,10 +5,12 @@ on: - master - queue - tip + - 'stable-*' pull_request: branches: - master - queue + - 'stable-*' jobs: jammy: runs-on: ubuntu-22.04 diff --git a/.github/workflows/foreign.yaml b/.github/workflows/foreign.yaml index 9e4d35e0f..0f80957b9 100644 --- a/.github/workflows/foreign.yaml +++ b/.github/workflows/foreign.yaml @@ -5,6 +5,7 @@ on: - master - queue - tip + - 'stable-*' paths: - '.github/workflows/foreign.yaml' - '**.h' @@ -15,6 +16,7 @@ on: branches: - master - queue + - 'stable-*' paths: - '.github/workflows/foreign.yaml' - '**.h' @@ -36,14 +38,13 @@ jobs: - name: checkout uses: actions/checkout@v1 - name: build - run: make -j8 -Orecurse test-progs - - name: create binary archive - run: make test-progs.tar + run: make -j -Orecurse test-progs.tar - name: upload binary archive - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: cross-${{ matrix.os }}-${{ matrix.arch }} path: test-progs.tar + overwrite: true test: runs-on: ubuntu-22.04 @@ -61,11 +62,11 @@ jobs: run: echo CONTAINER_ARCH="arm/v7" >> $GITHUB_ENV if: ${{ matrix.arch == 'armhf' }} - name: download binary archive - uses: actions/download-artifact@v1 + uses: actions/download-artifact@v4 with: name: cross-${{ matrix.os }}-${{ matrix.arch }} - name: unpack binary archive - run: tar xfv cross-${{ matrix.os }}-${{ matrix.arch }}/test-progs.tar + run: tar xfv test-progs.tar - name: enable foreign arch uses: dbhi/qus/action@main - name: run tests @@ -100,11 +101,11 @@ jobs: run: echo CONTAINER_ARCH="arm/v7" >> $GITHUB_ENV if: ${{ matrix.arch == 'armhf' }} - name: download binary archive - uses: actions/download-artifact@v1 + uses: actions/download-artifact@v4 with: name: cross-${{ matrix.os }}-${{ matrix.arch }} - name: unpack binary archive - run: tar xfv cross-${{ matrix.os }}-${{ matrix.arch }}/test-progs.tar + run: tar xfv test-progs.tar - name: enable foreign arch uses: dbhi/qus/action@main - name: run tests diff --git a/.github/workflows/multiarch-stable.yaml b/.github/workflows/multiarch-stable.yaml index e51d383ce..ffede53de 100644 --- a/.github/workflows/multiarch-stable.yaml +++ b/.github/workflows/multiarch-stable.yaml @@ -5,6 +5,7 @@ on: - master - queue - tip + - 'stable-*' paths: - '.github/workflows/multiarch-stable.yaml' - '**.h' @@ -15,6 +16,7 @@ on: branches: - master - queue + - 'stable-*' paths: - '.github/workflows/multiarch-stable.yaml' - '**.h' diff --git a/.github/workflows/multiarch.yaml b/.github/workflows/multiarch.yaml index df95a02fb..d2b833a4c 100644 --- a/.github/workflows/multiarch.yaml +++ b/.github/workflows/multiarch.yaml @@ -5,6 +5,7 @@ on: - master - queue - tip + - 'stable-*' paths: - '.github/workflows/multiarch.yaml' - '**.h' @@ -15,6 +16,7 @@ on: branches: - master - queue + - 'stable-*' paths: - '.github/workflows/multiarch.yaml' - '**.h' diff --git a/.github/workflows/native.yaml b/.github/workflows/native.yaml index 80ff6df56..c9d9df9e6 100644 --- a/.github/workflows/native.yaml +++ b/.github/workflows/native.yaml @@ -5,6 +5,7 @@ on: - master - queue - tip + - 'stable-*' paths: - '.github/workflows/native.yaml' - '**.h' @@ -15,6 +16,7 @@ on: branches: - master - queue + - 'stable-*' paths: - '.github/workflows/native.yaml' - '**.h' @@ -35,17 +37,9 @@ jobs: - debian-bookworm - fedora-40 - opensuse-leap - container: ghcr.io/mwilck/multipath-build-${{ matrix.os }} steps: - name: checkout uses: actions/checkout@v1 - - name: build and test - if: ${{ matrix.os != 'debian-jessie' }} - run: make -j -Orecurse test - - name: build and test (jessie) - # On jessie, we use libreadline 5 (no licensing issue) - if: ${{ matrix.os == 'debian-jessie' }} - run: make -j -Orecurse READLINE=libreadline test - name: set archive name # Leap containers have cpio but not tar @@ -54,26 +48,71 @@ jobs: - name: set archive name run: echo ARCHIVE_TGT=test-progs.tar >> $GITHUB_ENV if: ${{ matrix.os != 'opensuse-leap' }} - - name: create binary archive - run: make ${{ env.ARCHIVE_TGT }} + + - name: build and test + if: ${{ matrix.os != 'debian-jessie' }} + uses: mosteo-actions/docker-run@v1 + with: + image: ghcr.io/mwilck/multipath-build-${{ matrix.os }} + command: -j -Orecurse test + - name: build and test (jessie) + # On jessie, we use libreadline 5 (no licensing issue) + if: ${{ matrix.os == 'debian-jessie' }} + uses: mosteo-actions/docker-run@v1 + with: + image: ghcr.io/mwilck/multipath-build-${{ matrix.os }} + command: -j -Orecurse READLINE=libreadline test + + - name: create ${{ env.ARCHIVE_TGT }} + if: ${{ matrix.os != 'debian-jessie' }} + uses: mosteo-actions/docker-run@v1 + with: + image: ghcr.io/mwilck/multipath-build-${{ matrix.os }} + command: ${{ env.ARCHIVE_TGT }} + - name: create ${{ env.ARCHIVE_TGT }} (jessie) + if: ${{ matrix.os == 'debian-jessie' }} + uses: mosteo-actions/docker-run@v1 + with: + image: ghcr.io/mwilck/multipath-build-${{ matrix.os }} + command: READLINE=libreadline ${{ env.ARCHIVE_TGT }} + - name: upload binary archive - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: native-${{ matrix.os }} path: ${{ env.ARCHIVE_TGT }} + overwrite: true + + clang: + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + os: + - debian-jessie + - debian-buster + - debian-bullseye + - debian-bookworm + - fedora-40 + - opensuse-leap + steps: + - name: checkout + uses: actions/checkout@v1 - - name: clean - run: make clean - name: clang if: ${{ matrix.os != 'debian-jessie' }} - env: - CC: clang - run: make -j -Orecurse test + uses: mosteo-actions/docker-run@v1 + with: + image: ghcr.io/mwilck/multipath-build-${{ matrix.os }} + params: -e CC=clang + command: -j -Orecurse test - name: clang (jessie) if: ${{ matrix.os == 'debian-jessie' }} - env: - CC: clang - run: make READLINE=libreadline test + uses: mosteo-actions/docker-run@v1 + with: + image: ghcr.io/mwilck/multipath-build-${{ matrix.os }} + params: -e CC=clang + command: -j -Orecurse READLINE=libreadline test root-test: runs-on: ubuntu-22.04 @@ -98,14 +137,14 @@ jobs: uses: actions/checkout@v1 - name: download binary archive - uses: actions/download-artifact@v1 + uses: actions/download-artifact@v4 with: name: native-${{ matrix.os }} - name: unpack binary archive - run: cpio -idv < native-${{ matrix.os }}/test-progs.cpio + run: cpio -idv < test-progs.cpio if: ${{ matrix.os == 'opensuse-leap' }} - name: unpack binary archive - run: tar xfmv native-${{ matrix.os }}/test-progs.tar + run: tar xfmv test-progs.tar if: ${{ matrix.os != 'opensuse-leap' }} - name: run root tests diff --git a/.github/workflows/rolling.yaml b/.github/workflows/rolling.yaml index 3536b9446..66af7a447 100644 --- a/.github/workflows/rolling.yaml +++ b/.github/workflows/rolling.yaml @@ -5,6 +5,7 @@ on: - master - queue - tip + - 'stable-*' paths: - '.github/workflows/rolling.yaml' - '**.h' @@ -15,6 +16,7 @@ on: branches: - master - queue + - 'stable-*' paths: - '.github/workflows/rolling.yaml' - '**.h' diff --git a/NEWS.md b/NEWS.md index 69acda47c..1b239ffa7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,57 @@ # multipath-tools Release Notes +## multipath-tools 0.10.2, 2025/02 + +This release contains backported bug fixes from the stable-0.11.y branch. + +### Bug fixes + +* Fix multipathd crash because of invalid path group index value, for example + if an invalid path device was removed from a map. + Fixes [#105](https://github.com/opensvc/multipath-tools/issues/105). +* Make sure maps are reloaded in the path checker loop after detecting an + inconsistent or wrong kernel state (e.g. missing or falsely mapped path + device). Wrongly mapped paths will be unmapped and released to the system. + Fixes another issue reported in + [#105](https://github.com/opensvc/multipath-tools/issues/105). +* Fix the problem that `group_by_tpg` might be disabled if one or more + paths were offline during initial configuration. +* Fix possible misdetection of changed pathgroups in a map. +* Fix the problem that if a map was scheduled to be reloaded already, + `max_sectors_kb` might not be set on a path device that + was being added to a multipath map. This problem was introduced in 0.9.9. + +## multipath-tools 0.10.1, 2025/01 + +This is the first bug fix release on the `stable-0.10.y` branch. It contains +bug fixes from 0.11.0, and some CI-related fixes. + +### Bug fixes + +* Fixed the problem that multipathd wouldn't start on systems with certain types + of device mapper devices, in particular devices with multiple DM targets. + The problem was introduced in 0.10.0. + Fixes [#102](https://github.com/opensvc/multipath-tools/issues/102). +* Fixed a corner case in the udev rules which could cause a device not to be + activated during boot if a cold plug uevent is processed for a previously + not configured multipath map while this map was suspended. This problem existed + since 0.9.8. +* Fixed the problem that devices with `no_path_retry fail` and no setting + for `dev_loss_tmo` might get the `dev_loss_tmo` set to 0, causing the + device to be deleted immediately in the event of a transport disruption. + This bug was introduced in 0.9.6. +* Fixed the problem that, if there were multiple maps with deferred failback + (`failback` value > 0 in `multipath.conf`), some maps might fail back later + than configured. The problem existed since 0.9.6. +* Removed a warning message that multipathd would print if systemd's + `WATCHDOG_USEC` environment variable had the value "0", which means that the + watchdog is simply disabled. This (minor) problem existed since 0.4.9. +* Fixed a memory leak in the nvme foreign library. The bug existed since + 0.7.8. +* Fixed a problem in the marginal path detection algorithm that could cause + the io error check for a recently failed path to be delayed. This bug + existed since 0.7.4. + ## multipath-tools 0.10.0, 2024/08 ### User-Visible Changes diff --git a/libmpathutil/msort.c b/libmpathutil/msort.c index 50f799d91..9df7b267d 100644 --- a/libmpathutil/msort.c +++ b/libmpathutil/msort.c @@ -259,9 +259,12 @@ msort_r (void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg) * If this is safe for them, it should be for us, too. */ #pragma GCC diagnostic push -#if __GNUC__ >= 8 +#if __GNUC__ >= 8 || __clang_major__ >= 19 #pragma GCC diagnostic ignored "-Wcast-function-type" #endif +#if __clang_major__ >= 19 +#pragma GCC diagnostic ignored "-Wcast-function-type-mismatch" +#endif void msort (void *b, size_t n, size_t s, __compar_fn_t cmp) { diff --git a/libmultipath/config.c b/libmultipath/config.c index 0e3a5cc18..226ddecbe 100644 --- a/libmultipath/config.c +++ b/libmultipath/config.c @@ -865,6 +865,9 @@ static void set_max_checkint_from_watchdog(struct config *conf) unsigned long checkint; if (envp && sscanf(envp, "%lu", &checkint) == 1) { + if (checkint == 0) + /* watchdog disabled */ + return; /* Value is in microseconds */ checkint /= 1000000; if (checkint < 1 || checkint > UINT_MAX) { diff --git a/libmultipath/configure.c b/libmultipath/configure.c index a72579812..d9fac3848 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -426,6 +426,11 @@ compute_pgid(struct pathgroup * pgp) pgp->id ^= (long)pp; } +static void cleanup_bitfield(struct bitfield **p) +{ + free(*p); +} + static int pgcmp (struct multipath * mpp, struct multipath * cmpp) { @@ -433,16 +438,25 @@ pgcmp (struct multipath * mpp, struct multipath * cmpp) struct pathgroup * pgp; struct pathgroup * cpgp; int r = 0; + struct bitfield *bf __attribute__((cleanup(cleanup_bitfield))) = NULL; if (!mpp) return 0; + if (VECTOR_SIZE(mpp->pg) != VECTOR_SIZE(cmpp->pg)) + return 1; + + bf = alloc_bitfield(VECTOR_SIZE(cmpp->pg)); + if (!bf) + return 1; + vector_foreach_slot (mpp->pg, pgp, i) { compute_pgid(pgp); vector_foreach_slot (cmpp->pg, cpgp, j) { if (pgp->id == cpgp->id && !pathcmp(pgp, cpgp)) { + set_bit_in_bitfield(j, bf); r = 0; break; } @@ -451,6 +465,10 @@ pgcmp (struct multipath * mpp, struct multipath * cmpp) if (r) return r; } + vector_foreach_slot (cmpp->pg, cpgp, j) { + if (!is_bit_set_in_bitfield(j, bf)) + return 1; + } return r; } @@ -563,8 +581,6 @@ trigger_paths_udev_change(struct multipath *mpp, bool is_mpath) vector_foreach_slot(pgp->paths, pp, j) trigger_path_udev_change(pp, is_mpath); } - - mpp->needs_paths_uevent = 0; } static int sysfs_set_max_sectors_kb(struct multipath *mpp) @@ -936,10 +952,10 @@ int domap(struct multipath *mpp, char *params, int is_daemon) * succeeded */ mpp->force_udev_reload = 0; - if (mpp->action == ACT_CREATE && - (remember_wwid(mpp->wwid) == 1 || - mpp->needs_paths_uevent)) + if (mpp->action == ACT_CREATE) { + remember_wwid(mpp->wwid); trigger_paths_udev_change(mpp, true); + } if (!is_daemon) { /* multipath client mode */ dm_switchgroup(mpp->alias, mpp->bestpg); @@ -964,8 +980,7 @@ int domap(struct multipath *mpp, char *params, int is_daemon) } dm_setgeometry(mpp); return DOMAP_OK; - } else if (r == DOMAP_FAIL && mpp->action == ACT_CREATE && - mpp->needs_paths_uevent) + } else if (r == DOMAP_FAIL && mpp->action == ACT_CREATE) trigger_paths_udev_change(mpp, false); return DOMAP_FAIL; diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c index c497c2250..a0070c566 100644 --- a/libmultipath/devmapper.c +++ b/libmultipath/devmapper.c @@ -536,7 +536,7 @@ static uint16_t build_udev_flags(const struct multipath *mpp, int reload) MPATH_UDEV_RELOAD_FLAG : 0); } -int dm_addmap_create (struct multipath *mpp, char * params) +int dm_addmap_create (struct multipath *mpp, char *params) { int ro; uint16_t udev_flags = build_udev_flags(mpp, 0); @@ -546,9 +546,7 @@ int dm_addmap_create (struct multipath *mpp, char * params) if (dm_addmap(DM_DEVICE_CREATE, TGT_MPATH, mpp, params, ro, udev_flags)) { - if (unmark_failed_wwid(mpp->wwid) == - WWID_FAILED_CHANGED) - mpp->needs_paths_uevent = 1; + unmark_failed_wwid(mpp->wwid); return 1; } /* @@ -566,8 +564,7 @@ int dm_addmap_create (struct multipath *mpp, char * params) break; } } - if (mark_failed_wwid(mpp->wwid) == WWID_FAILED_CHANGED) - mpp->needs_paths_uevent = 1; + mark_failed_wwid(mpp->wwid); return 0; } @@ -718,7 +715,7 @@ static int libmp_mapinfo__(int flags, mapid_t id, mapinfo_t info, const char *ma if (info.target || info.status || info.size || flags & MAPINFO_TGT_TYPE__) { if (dm_get_next_target(dmt, NULL, &start, &length, &target_type, ¶ms) != NULL) { - condlog(2, "%s: map %s has multiple targets", fname__, map_id); + condlog(3, "%s: map %s has multiple targets", fname__, map_id); return DMP_NOT_FOUND; } if (!params) { @@ -1262,10 +1259,8 @@ int dm_get_maps(vector mp) } vector_set_slot(mp, mpp); break; - case DMP_NO_MATCH: - break; default: - return 1; + break; } next = names->next; names = (void *) names + next; diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c index e94705bf2..5043330cc 100644 --- a/libmultipath/discovery.c +++ b/libmultipath/discovery.c @@ -942,7 +942,7 @@ sysfs_set_scsi_tmo (struct config *conf, struct multipath *mpp) continue; } - if (pp->dev_loss == DEV_LOSS_TMO_UNSET) + if (pp->dev_loss == DEV_LOSS_TMO_UNSET && min_dev_loss != 0) pp->dev_loss = min_dev_loss; else if (pp->dev_loss < min_dev_loss) { pp->dev_loss = min_dev_loss; diff --git a/libmultipath/foreign.c b/libmultipath/foreign.c index 28d1b1158..af5b0ed3b 100644 --- a/libmultipath/foreign.c +++ b/libmultipath/foreign.c @@ -129,7 +129,7 @@ static void free_pre(void *arg) static int _init_foreign(const char *enable) { char pathbuf[PATH_MAX]; - struct dirent **di; + struct dirent **di = NULL; struct scandir_result sr; int r, i; regex_t *enable_re = NULL; diff --git a/libmultipath/foreign/nvme.c b/libmultipath/foreign/nvme.c index 0b7f4eabd..cde660ced 100644 --- a/libmultipath/foreign/nvme.c +++ b/libmultipath/foreign/nvme.c @@ -675,7 +675,8 @@ static void _find_controllers(struct context *ctx, struct nvme_map *map) pthread_cleanup_push_cast(free_scandir_result, &sr); for (i = 0; i < r; i++) { char *fn = di[i]->d_name; - struct udev_device *ctrl, *udev; + struct udev_device *ctrl; + struct udev_device *udev __attribute__((cleanup(cleanup_udev_device))) = NULL; if (safe_snprintf(pathbuf + n, sizeof(pathbuf) - n, "/%s", fn)) continue; @@ -719,11 +720,11 @@ static void _find_controllers(struct context *ctx, struct nvme_map *map) continue; path->gen.ops = &nvme_path_ops; - path->udev = udev; + path->udev = steal_ptr(udev); path->seen = true; path->map = map; path->ctl = udev_device_get_parent_with_subsystem_devtype - (udev, "nvme", NULL); + (path->udev, "nvme", NULL); if (path->ctl == NULL) { condlog(1, "%s: %s: failed to get controller for %s", __func__, THIS, fn); @@ -744,7 +745,7 @@ static void _find_controllers(struct context *ctx, struct nvme_map *map) } vector_set_slot(&map->pgvec, &path->pg); condlog(3, "%s: %s: new path %s added to %s", - __func__, THIS, udev_device_get_sysname(udev), + __func__, THIS, udev_device_get_sysname(path->udev), udev_device_get_sysname(map->udev)); } pthread_cleanup_pop(1); diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c index 4996c0b04..879c310a1 100644 --- a/libmultipath/io_err_stat.c +++ b/libmultipath/io_err_stat.c @@ -367,7 +367,8 @@ int need_io_err_check(struct path *pp) return 1; get_monotonic_time(&curr_time); if ((curr_time.tv_sec - pp->io_err_dis_reinstate_time) > - pp->mpp->marginal_path_err_recheck_gap_time) { + pp->mpp->marginal_path_err_recheck_gap_time || + pp->io_err_dis_reinstate_time == 0) { io_err_stat_log(4, "%s: reschedule checking after %d seconds", pp->dev, pp->mpp->marginal_path_err_recheck_gap_time); diff --git a/libmultipath/pgpolicies.c b/libmultipath/pgpolicies.c index edc3c611f..23ef2bdc2 100644 --- a/libmultipath/pgpolicies.c +++ b/libmultipath/pgpolicies.c @@ -127,6 +127,8 @@ split_marginal_paths(vector paths, vector *normal_p, vector *marginal_p) int group_paths(struct multipath *mp, int marginal_pathgroups) { vector normal, marginal; + struct path *pp; + int i; if (!mp->pg) mp->pg = vector_alloc(); @@ -138,6 +140,10 @@ int group_paths(struct multipath *mp, int marginal_pathgroups) if (!mp->pgpolicyfn) goto fail; + /* Reset pgindex, we're going to invalidate it */ + vector_foreach_slot(mp->paths, pp, i) + pp->pgindex = 0; + if (!marginal_pathgroups || split_marginal_paths(mp->paths, &normal, &marginal) != 0) { if (mp->pgpolicyfn(mp, mp->paths) != 0) diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c index a3fce2039..c09a06198 100644 --- a/libmultipath/propsel.c +++ b/libmultipath/propsel.c @@ -255,14 +255,18 @@ verify_alua_prio(struct multipath *mp) { int i; struct path *pp; + bool assume_alua = false; vector_foreach_slot(mp->paths, pp, i) { const char *name = prio_name(&pp->prio); + if (!prio_selected(&pp->prio)) + continue; if (strncmp(name, PRIO_ALUA, PRIO_NAME_LEN) && strncmp(name, PRIO_SYSFS, PRIO_NAME_LEN)) return false; + assume_alua = true; } - return true; + return assume_alua; } int select_detect_pgpolicy(struct config *conf, struct multipath *mp) diff --git a/libmultipath/structs.c b/libmultipath/structs.c index 61c8f32c3..485172525 100644 --- a/libmultipath/structs.c +++ b/libmultipath/structs.c @@ -239,8 +239,18 @@ free_pgvec (vector pgvec, enum free_path_mode free_paths) if (!pgvec) return; - vector_foreach_slot(pgvec, pgp, i) + vector_foreach_slot(pgvec, pgp, i) { + + /* paths are going to be re-grouped, reset pgindex */ + if (free_paths != FREE_PATHS) { + struct path *pp; + int j; + + vector_foreach_slot(pgp->paths, pp, j) + pp->pgindex = 0; + } free_pathgroup(pgp, free_paths); + } vector_free(pgvec); } diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c index 5df495b3e..683ab473a 100644 --- a/libmultipath/structs_vec.c +++ b/libmultipath/structs_vec.c @@ -108,6 +108,7 @@ static bool update_pathvec_from_dm(vector pathvec, struct multipath *mpp, struct config *conf; bool mpp_has_wwid; bool must_reload = false; + bool pg_deleted = false; if (!mpp->pg) return false; @@ -125,6 +126,10 @@ static bool update_pathvec_from_dm(vector pathvec, struct multipath *mpp, vector_foreach_slot(pgp->paths, pp, j) { + /* A pathgroup has been deleted before. Invalidate pgindex */ + if (pg_deleted) + pp->pgindex = 0; + if (pp->mpp && pp->mpp != mpp) { condlog(0, "BUG: %s: found path %s which is already in %s", mpp->alias, pp->dev, pp->mpp->alias); @@ -139,6 +144,13 @@ static bool update_pathvec_from_dm(vector pathvec, struct multipath *mpp, must_reload = true; dm_fail_path(mpp->alias, pp->dev_t); vector_del_slot(pgp->paths, j--); + /* + * pp->pgindex has been set in disassemble_map(), + * which has probably been called just before for + * mpp. So he pgindex relates to mpp and may be + * wrong for pp->mpp. Invalidate it. + */ + pp->pgindex = 0; continue; } pp->mpp = mpp; @@ -237,6 +249,8 @@ static bool update_pathvec_from_dm(vector pathvec, struct multipath *mpp, vector_del_slot(mpp->pg, i--); free_pathgroup(pgp, KEEP_PATHS); must_reload = true; + /* Invalidate pgindex for all other pathgroups */ + pg_deleted = true; } mpp->need_reload = mpp->need_reload || must_reload; return must_reload; @@ -335,8 +349,7 @@ int adopt_paths(vector pathvec, struct multipath *mpp, */ if (!current_mpp || !mp_find_path_by_devt(current_mpp, pp->dev_t)) - mpp->need_reload = mpp->need_reload || - set_path_max_sectors_kb(pp, mpp->max_sectors_kb); + mpp->need_reload = set_path_max_sectors_kb(pp, mpp->max_sectors_kb) || mpp->need_reload; } pp->mpp = mpp; @@ -354,6 +367,7 @@ void orphan_path(struct path *pp, const char *reason) { condlog(3, "%s: orphan path, %s", pp->dev, reason); pp->mpp = NULL; + pp->pgindex = 0; uninitialize_path(pp); } diff --git a/multipath/11-dm-mpath.rules.in b/multipath/11-dm-mpath.rules.in index 30647b99a..a816edbf4 100644 --- a/multipath/11-dm-mpath.rules.in +++ b/multipath/11-dm-mpath.rules.in @@ -24,16 +24,24 @@ ENV{DM_UDEV_RULES_VSN}=="1|2", ENV{.DM_SUSPENDED}!="1", ENV{DISK_RO}!="1", \ ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}="", GOTO="scan_import" LABEL="mpath_coldplug_end" +IMPORT{db}="DM_COLDPLUG_SUSPENDED" + # If this uevent didn't come from dm, don't try to update the # device state # Note that .MPATH_DEVICE_READY_OLD=="" here. Thus we won't activate the # device below at mpath_is_ready, which is correct. ENV{DM_COOKIE}!="?*", ENV{DM_ACTION}!="PATH_*", \ - IMPORT{db}="DM_COLDPLUG_SUSPENDED", \ GOTO="check_mpath_ready" ENV{.MPATH_DEVICE_READY_OLD}="$env{MPATH_DEVICE_READY}" +# If the device wasn't ready previously and is currently suspended, +# we have to postpone the activation until the next event. +# In this case, we have to set MPATH_DEVICE_READY=0; otherwise, the +# MPATH_UNCHANGED logic will cause later rules to skipped in the next event. +ENV{.MPATH_DEVICE_READY_OLD}!="1", ENV{.DM_SUSPENDED}=="1", \ + ENV{MPATH_DEVICE_READY}="0", GOTO="check_mpath_unchanged" + # multipath sets DM_SUBSYSTEM_UDEV_FLAG2 when it reloads a # table with no active devices. If this happens, mark the # device not ready @@ -66,26 +74,25 @@ LABEL="check_mpath_unchanged" # A previous coldplug event occurred while the device was suspended. # Activation might have been partially skipped. Activate the device now, -# i.e. disable the MPATH_UNCHANGED logic and set DM_ACTIVATION=1. -IMPORT{db}="DM_COLDPLUG_SUSPENDED" +# i.e. disable the MPATH_UNCHANGED logic. ENV{DM_COLDPLUG_SUSPENDED}=="1", ENV{.DM_SUSPENDED}!="1", \ - ENV{DM_ACTIVATION}="1", ENV{MPATH_UNCHANGED}="0", \ + ENV{MPATH_UNCHANGED}="0", \ PROGRAM="@SYSDIR_BIN@/logger -t 11-dm-mpath.rules -p daemon.notice \"Forcing activation of previously suspended device\"", \ GOTO="check_mpath_ready" # DM_SUBSYSTEM_UDEV_FLAG0 is the "RELOAD" flag for multipath subsystem. -# Drop the DM_ACTIVATION flag here as mpath reloads tables if any of its +# Set the MPATH_UNCHANGED flag here as mpath reloads tables if any of its # paths are lost/recovered. For any stack above the mpath device, this is not # something that should be reacted upon since it would be useless extra work. # It's exactly mpath's job to provide *seamless* device access to any of the # paths that are available underneath. ENV{DM_SUBSYSTEM_UDEV_FLAG0}=="1", \ - ENV{DM_ACTIVATION}="0", ENV{MPATH_UNCHANGED}="1" + ENV{MPATH_UNCHANGED}="1" -# For path failed or reinstated events, unset DM_ACTIVATION. +# For path failed or reinstated events, set MPATH_UNCHANGED. # This is similar to the DM_SUBSYSTEM_UDEV_FLAG0 case above. ENV{DM_ACTION}=="PATH_FAILED|PATH_REINSTATED", \ - ENV{DM_ACTIVATION}="0", ENV{MPATH_UNCHANGED}="1" + ENV{MPATH_UNCHANGED}="1" LABEL="check_mpath_ready" @@ -105,15 +112,10 @@ GOTO="scan_import" LABEL="mpath_is_ready" -# If the device comes back online, set DM_ACTIVATION so that -# upper layers do a rescan. If the device is currently suspended, -# we have to postpone the activation until the next event. -# In this case, we have to set MPATH_DEVICE_READY=0; otherwise, the -# MPATH_UNCHANGED logic will cause later rules to skipped in the next event. -ENV{.MPATH_DEVICE_READY_OLD}!="0", GOTO="scan_import" -ENV{.DM_SUSPENDED}=="1", ENV{MPATH_DEVICE_READY}="0", GOTO="scan_import" - -ENV{DM_ACTIVATION}="1", ENV{MPATH_UNCHANGED}="0" +# If the device comes back online, clear MPATH_UNCHANGED so that +# upper layers will do a rescan. Don't do this if .MPATH_DEVICE_READY_OLD +# is just empty (see comment above the DM_COOKIE test above). +ENV{.MPATH_DEVICE_READY_OLD}=="0", ENV{MPATH_UNCHANGED}="0" # The code to check multipath state ends here. We need to set # properties and symlinks regardless whether the map is usable or @@ -130,7 +132,7 @@ ENV{DM_UDEV_PRIMARY_SOURCE_FLAG}!="1", GOTO="import_end" ENV{DM_UDEV_RULES_VSN}!="1|2", GOTO="import_end" # Don't import the properties from db if we will run blkid later. -ENV{.DM_NOSCAN}!="1", ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}!="1", GOTO="import_end" +ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}!="1", GOTO="import_end" IMPORT{db}="ID_FS_TYPE" IMPORT{db}="ID_FS_USAGE" @@ -143,6 +145,12 @@ IMPORT{db}="ID_PART_GPT_AUTO_ROOT" LABEL="import_end" +# If MPATH_UNCHANGED is set, adapt DM_ACTIVATION and DM_NOSCAN. +# .DM_NOSCAN controls whether blkid will be run in 13-dm-disk.rules; +# we don't want to do that if MPATH_UNCHANGED is 1. +ENV{MPATH_UNCHANGED}=="0", ENV{DM_ACTIVATION}="1" +ENV{MPATH_UNCHANGED}=="1", ENV{DM_ACTIVATION}="0", ENV{.DM_NOSCAN}="1" + # Reset previous DM_COLDPLUG_SUSPENDED if activation happens now ENV{.DM_SUSPENDED}!="1", ENV{DM_ACTIVATION}=="1", ENV{DM_COLDPLUG_SUSPENDED}="" diff --git a/multipathd/main.c b/multipathd/main.c index 1b7fd04f1..4b089c0e2 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -794,8 +794,10 @@ coalesce_maps(struct vectors *vecs, vector nmpv) vector_del_slot(ompv, i); i--; } - else + else { condlog(2, "%s devmap removed", ompp->alias); + trigger_paths_udev_change(ompp, false); + } } else if (reassign_maps) { condlog(3, "%s: Reassign existing device-mapper" " devices", ompp->alias); @@ -2068,7 +2070,7 @@ static void deferred_failback_tick (struct vectors *vecs) { struct multipath * mpp; - unsigned int i; + int i; bool need_reload; vector_foreach_slot (vecs->mpvec, mpp, i) { @@ -2080,9 +2082,12 @@ deferred_failback_tick (struct vectors *vecs) if (!mpp->failback_tick && need_switch_pathgroup(mpp, &need_reload)) { - if (need_reload) - reload_and_sync_map(mpp, vecs); - else + if (need_reload) { + if (reload_and_sync_map(mpp, vecs) == 2) { + /* multipath device removed */ + i--; + } + } else switch_pathgroup(mpp); } }