From 729b2d0d6de878928559e83a5492bfca2efaa183 Mon Sep 17 00:00:00 2001 From: Ambre Austen Suhamy Date: Mon, 27 Jan 2025 18:15:10 +0100 Subject: [PATCH] Fix directory targets with empty subdirectories Signed-off-by: Ambre Austen Suhamy --- src/dune_cache/local.ml | 171 +++---- src/dune_cache/shared.ml | 18 +- src/dune_cache/trimmer.ml | 9 +- src/dune_cache_storage/dune_cache_storage.ml | 34 +- src/dune_cache_storage/dune_cache_storage.mli | 5 +- src/dune_engine/build_system.ml | 14 +- src/dune_engine/build_system.mli | 7 +- src/dune_engine/rule_cache.ml | 6 +- src/dune_engine/target_promotion.ml | 26 +- src/dune_targets/dune_targets.ml | 463 ++++++++++++------ src/dune_targets/dune_targets.mli | 62 ++- src/fs/fs.ml | 15 +- .../directory-targets/subdirs-only.t | 23 + .../test-cases/dune-cache/empty-dir.t | 1 + .../test-cases/promote/deep-subdir.t | 22 +- 15 files changed, 546 insertions(+), 330 deletions(-) create mode 100644 test/blackbox-tests/test-cases/directory-targets/subdirs-only.t diff --git a/src/dune_cache/local.ml b/src/dune_cache/local.ml index 4a38e900201..3a2d8957d44 100644 --- a/src/dune_cache/local.ml +++ b/src/dune_cache/local.ml @@ -35,6 +35,11 @@ module Target = struct Path.Build.chmod path ~mode:(Path.Permissions.remove Path.Permissions.write st_perm); let executable = Path.Permissions.test Path.Permissions.execute st_perm in Some { executable } + | { Unix.st_kind = Unix.S_DIR; st_perm; _ } -> + (* Adding "executable" permissions to directories mean we can traverse them. *) + Path.Build.chmod path ~mode:(Path.Permissions.add Path.Permissions.execute st_perm); + (* the value of [executable] here is ignored, but [Some] is meaningful. *) + Some { executable = true } | (exception Unix.Unix_error _) | _ -> None ;; end @@ -79,12 +84,8 @@ module Artifacts = struct (artifacts : Digest.t Targets.Produced.t) = let entries = - Targets.Produced.foldi artifacts ~init:[] ~f:(fun target file_digest entries -> - let entry : Metadata_entry.t = - { file_path = Path.Local.to_string target; file_digest } - in - entry :: entries) - |> List.rev + Targets.Produced.to_list_map artifacts ~f:(fun target digest -> + { Metadata_entry.path = Path.Local.to_string target; digest }) in Metadata_file.store ~mode { metadata; entries } ~rule_digest ;; @@ -103,12 +104,16 @@ module Artifacts = struct Result.try_with (fun () -> (* CR-someday rleshchinskiy: We recreate the directory structure here but it might be simpler to just use file digests instead of file names and no subdirectories. *) - Path.Local.Map.iteri targets.dirs ~f:(fun path _ -> - Path.mkdir_p (Path.append_local temp_dir path)); - Targets.Produced.iteri targets ~f:(fun path _ -> - let path_in_build_dir = Path.build (Path.Build.append_local targets.root path) in - let path_in_temp_dir = Path.append_local temp_dir path in - portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir)) + (* The comment above seems outdated wrt. 'no subdirectories'... *) + Targets.Produced.iteri + targets + ~d:(fun dir -> Path.mkdir_p (Path.append_local temp_dir dir)) + ~f:(fun file _ -> + let path_in_build_dir = + Path.build (Path.Build.append_local targets.root file) + in + let path_in_temp_dir = Path.append_local temp_dir file in + portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir)) ;; (* Step II of [store_skipping_metadata]. @@ -133,70 +138,75 @@ module Artifacts = struct artifacts ~init:Store_result.empty ~f:(fun target digest results -> - let path_in_temp_dir = Path.append_local temp_dir target in - let path_in_cache = file_path ~file_digest:digest in - let store_using_hardlinks () = - match - Dune_cache_storage.Util.Optimistically.link - ~src:path_in_temp_dir - ~dst:path_in_cache - with - | exception Unix.Unix_error (Unix.EEXIST, _, _) -> - (* We end up here if the cache already contains an entry for this - artifact. We deduplicate by keeping only one copy, in the - cache. *) - let path_in_build_dir = - Path.build (Path.Build.append_local artifacts.root target) - in - (match - Path.unlink_no_err path_in_temp_dir; - (* At first, we deduplicate the temporary file. Doing this - intermediate step allows us to keep the original target in case - the below link step fails. This might happen if the trimmer has - just deleted [path_in_cache]. In this rare case, this function - fails with an [Error], and so we might end up with some - duplicates in the workspace. *) - link_even_if_there_are_too_many_links_already - ~src:path_in_cache - ~dst:path_in_temp_dir; - (* Now we can simply rename the temporary file into the target, - knowing that the original target remains in place if the - renaming fails. + match digest with + | None -> + (* No digest means [target] is a directory, simply ignore it. *) + results + | Some file_digest -> + let path_in_temp_dir = Path.append_local temp_dir target in + let path_in_cache = file_path ~file_digest in + let store_using_hardlinks () = + match + Dune_cache_storage.Util.Optimistically.link + ~src:path_in_temp_dir + ~dst:path_in_cache + with + | exception Unix.Unix_error (Unix.EEXIST, _, _) -> + (* We end up here if the cache already contains an entry for this + artifact. We deduplicate by keeping only one copy, in the + cache. *) + let path_in_build_dir = + Path.build (Path.Build.append_local artifacts.root target) + in + (match + Path.unlink_no_err path_in_temp_dir; + (* At first, we deduplicate the temporary file. Doing this + intermediate step allows us to keep the original target in case + the below link step fails. This might happen if the trimmer has + just deleted [path_in_cache]. In this rare case, this function + fails with an [Error], and so we might end up with some + duplicates in the workspace. *) + link_even_if_there_are_too_many_links_already + ~src:path_in_cache + ~dst:path_in_temp_dir; + (* Now we can simply rename the temporary file into the target, + knowing that the original target remains in place if the + renaming fails. - One curious case to think about is if the file in the cache - happens to have the same inode as the file in the workspace. In - that case this deduplication should be a no-op, but the - [rename] operation has a quirk where [path_in_temp_dir] can - remain on disk. This is not a problem because we clean the - temporary directory later. *) - Path.rename path_in_temp_dir path_in_build_dir - with - | exception e -> Store_result.Error e - | () -> Already_present) - | exception e -> Error e - | () -> Stored - in - let store_using_test_and_rename () = - (* CR-someday amokhov: There is a race here. If [path_in_cache] is - created after [Path.exists] but before [Path.rename], it will be - silently overwritten. Find a good way to avoid this race. *) - match Path.exists path_in_cache with - | true -> Store_result.Already_present - | false -> - (match - Dune_cache_storage.Util.Optimistically.rename - ~src:path_in_temp_dir - ~dst:path_in_cache - with - | exception e -> Error e - | () -> Stored) - in - let result = - match (mode : Dune_cache_storage.Mode.t) with - | Hardlink -> store_using_hardlinks () - | Copy -> store_using_test_and_rename () - in - Store_result.combine results result) + One curious case to think about is if the file in the cache + happens to have the same inode as the file in the workspace. In + that case this deduplication should be a no-op, but the + [rename] operation has a quirk where [path_in_temp_dir] can + remain on disk. This is not a problem because we clean the + temporary directory later. *) + Path.rename path_in_temp_dir path_in_build_dir + with + | exception e -> Store_result.Error e + | () -> Already_present) + | exception e -> Error e + | () -> Stored + in + let store_using_test_and_rename () = + (* CR-someday amokhov: There is a race here. If [path_in_cache] is + created after [Path.exists] but before [Path.rename], it will be + silently overwritten. Find a good way to avoid this race. *) + match Path.exists path_in_cache with + | true -> Store_result.Already_present + | false -> + (match + Dune_cache_storage.Util.Optimistically.rename + ~src:path_in_temp_dir + ~dst:path_in_cache + with + | exception e -> Error e + | () -> Stored) + in + let result = + match (mode : Dune_cache_storage.Mode.t) with + | Hardlink -> store_using_hardlinks () + | Copy -> store_using_test_and_rename () + in + Store_result.combine results result) ;; let store_skipping_metadata ~mode ~targets ~compute_digest @@ -281,10 +291,7 @@ module Artifacts = struct | Copy -> copy ~src ~dst); Unwind.push unwind (fun () -> Path.Build.unlink_no_err target) in - try - Path.Local.Map.iteri artifacts.dirs ~f:(fun dir _ -> mk_dir dir); - Targets.Produced.iteri artifacts ~f:mk_file - with + try Targets.Produced.iteri artifacts ~f:mk_file ~d:mk_dir with | exn -> Unwind.unwind unwind; reraise exn @@ -294,10 +301,8 @@ module Artifacts = struct let restore ~mode ~rule_digest ~target_dir = Restore_result.bind (list ~rule_digest) ~f:(fun (entries : Metadata_entry.t list) -> let artifacts = - Path.Local.Map.of_list_map_exn - entries - ~f:(fun { Metadata_entry.file_path; file_digest } -> - Path.Local.of_string file_path, file_digest) + Path.Local.Map.of_list_map_exn entries ~f:(fun { Metadata_entry.path; digest } -> + Path.Local.of_string path, digest) |> Targets.Produced.of_files target_dir in try diff --git a/src/dune_cache/shared.ml b/src/dune_cache/shared.ml index 21577b83efb..a5a31098130 100644 --- a/src/dune_cache/shared.ml +++ b/src/dune_cache/shared.ml @@ -121,17 +121,22 @@ struct ] in let update_cached_digests ~targets_and_digests = - Targets.Produced.iteri targets_and_digests ~f:(fun path digest -> + Targets.Produced.iter_files targets_and_digests ~f:(fun path digest -> Cached_digest.set (Path.Build.append_local targets_and_digests.root path) digest) in match Targets.Produced.map_with_errors - produced_targets - ~all_errors:false - ~f:(fun target () -> + ~f:(fun target -> + (* All of this monad boilerplate seems unnecessary since we don't care about errors... *) match Local.Target.create target with | Some t -> Ok t | None -> Error ()) + ~d:(fun target -> + match Local.Target.create target with + | Some _ -> Ok () + | None -> Error ()) + ~all_errors:false + produced_targets with | Error _ -> Fiber.return None | Ok targets -> @@ -190,10 +195,7 @@ struct ~remove_write_permissions:should_remove_write_permissions_on_generated_files in match - Targets.Produced.map_with_errors - produced_targets - ~all_errors:true - ~f:(fun target () -> compute_digest target) + Targets.Produced.map_with_errors ~f:compute_digest ~all_errors:true produced_targets with | Ok result -> result | Error errors -> diff --git a/src/dune_cache/trimmer.ml b/src/dune_cache/trimmer.ml index 22d1f95c18d..6f51d186031 100644 --- a/src/dune_cache/trimmer.ml +++ b/src/dune_cache/trimmer.ml @@ -49,11 +49,12 @@ let trim_broken_metadata_entries ~trimmed_so_far = keep them untrimmed for now. *) false | Metadata.Artifacts { entries; _ } -> - List.exists - entries - ~f:(fun { Artifacts.Metadata_entry.file_digest; _ } -> + List.exists entries ~f:(function + | { Artifacts.Metadata_entry.digest = Some file_digest; path = _ } -> let reference = file_path ~file_digest in - not (Path.exists reference))) + not (Path.exists reference) + (* no digest means it's a directory. *) + | { digest = None; path = _ } -> false)) in match should_be_removed with | true -> diff --git a/src/dune_cache_storage/dune_cache_storage.ml b/src/dune_cache_storage/dune_cache_storage.ml index bea1de2b1e8..db52f34966e 100644 --- a/src/dune_cache_storage/dune_cache_storage.ml +++ b/src/dune_cache_storage/dune_cache_storage.ml @@ -215,28 +215,38 @@ end module Artifacts = struct module Metadata_entry = struct type t = - { file_path : string - ; file_digest : Digest.t + { path : string (** Can have more than one component for directory targets *) + ; digest : Digest.t option + (** This digest is always present in case [file_path] points to a file, and absent when it's a directory. *) } let equal x y = - Digest.equal x.file_digest y.file_digest && String.equal x.file_path y.file_path + String.equal x.path y.path && Option.equal Digest.equal x.digest y.digest ;; - let to_sexp { file_path; file_digest } = - Sexp.List [ Atom file_path; Atom (Digest.to_string file_digest) ] + let digest_to_sexp = function + | None -> Sexp.Atom "" + | Some digest -> Sexp.Atom (Digest.to_string digest) ;; - let of_sexp = function - | Sexp.List [ Atom file_path; Atom file_digest ] -> - (match Digest.from_hex file_digest with - | Some file_digest -> Ok { file_path; file_digest } + let to_sexp { path; digest } = Sexp.List [ Atom path; digest_to_sexp digest ] + + let digest_of_sexp = function + | "" -> Ok None + | digest -> + (match Digest.from_hex digest with + | Some digest -> Ok (Some digest) | None -> Error (Failure - (sprintf - "Cannot parse file digest %s in cache metadata entry" - file_digest))) + (sprintf "Cannot parse file digest %S in cache metadata entry" digest))) + ;; + + let of_sexp = function + | Sexp.List [ Atom path; Atom digest ] -> + (match digest_of_sexp digest with + | Ok digest -> Ok { path; digest } + | Error e -> Error e) | _ -> Error (Failure "Cannot parse cache metadata entry") ;; end diff --git a/src/dune_cache_storage/dune_cache_storage.mli b/src/dune_cache_storage/dune_cache_storage.mli index 85503886345..6f33d3f8d04 100644 --- a/src/dune_cache_storage/dune_cache_storage.mli +++ b/src/dune_cache_storage/dune_cache_storage.mli @@ -68,8 +68,9 @@ end module Artifacts : sig module Metadata_entry : sig type t = - { file_path : string (** Can have more than one component for directory targets *) - ; file_digest : Digest.t + { path : string (** Can have more than one component for directory targets *) + ; digest : Digest.t option + (** This digest is always present in case [file_path] points to a file, and absent when it's a directory. *) } end diff --git a/src/dune_engine/build_system.ml b/src/dune_engine/build_system.ml index 7cb8e92c139..3df76605e53 100644 --- a/src/dune_engine/build_system.ml +++ b/src/dune_engine/build_system.ml @@ -206,6 +206,7 @@ end = struct (* Fact: alias [a] expands to the set of file-digest pairs [digests] *) Dep.Fact.alias a digests | File f -> + (* Not necessarily a file, can also be a directory. *) let+ digest = build_file f in (* Fact: file [f] has digest [digest] *) Dep.Fact.file f digest @@ -856,9 +857,12 @@ end = struct rleshchinskiy: Is this digest ever used? [build_dir] discards it and do we (or should we) ever use [build_file] to build directories? Perhaps this could be split in two memo tables, one for files and one for directories. *) + (* ElectreAAS: Tentative answer to above comments: a lot of functions are called + [build_file] or [create_file] even though they also handle directories. + Also yes this digest is used by [Exported.build_dep] defined above. *) (match Cached_digest.build_file ~allow_dirs:true path with | Ok digest -> digest, Dir_target { targets } - (* Must be a directory target *) + (* Must be a directory target. *) | Error _ -> (* CR-someday amokhov: The most important reason we end up here is [No_such_file]. I think some of the outcomes above are impossible @@ -1060,7 +1064,8 @@ let file_exists fn = (Path.Build.Map.mem rules_here.by_file_targets (Path.as_in_build_dir_exn fn)) | Build_under_directory_target { directory_target_ancestor } -> let+ path_map = build_dir (Path.build directory_target_ancestor) in - Targets.Produced.mem path_map (Path.as_in_build_dir_exn fn) + (* Note that in the case of directory targets, we also check if directories exist. *) + Targets.Produced.mem_any path_map (Path.as_in_build_dir_exn fn) ;; let files_of ~dir = @@ -1161,6 +1166,11 @@ let build_file p = () ;; +let build_dir p = + let+ (_ : Digest.t Targets.Produced.t) = build_dir p in + () +;; + let with_file p ~f = let+ () = build_file p in f p diff --git a/src/dune_engine/build_system.mli b/src/dune_engine/build_system.mli index f521a117fc1..4bd899806d1 100644 --- a/src/dune_engine/build_system.mli +++ b/src/dune_engine/build_system.mli @@ -2,9 +2,12 @@ open Import -(** Build a file. *) +(** Build a target, which may be a file or a directory. *) val build_file : Path.t -> unit Memo.t +(** Build a directory. *) +val build_dir : Path.t -> unit Memo.t + (** Build a file and read its contents with [f]. The execution of [f] is not memoized, so call sites should be careful to avoid duplicating [f]'s work. *) val with_file : Path.t -> f:(Path.t -> 'a) -> 'a Memo.t @@ -12,7 +15,7 @@ val with_file : Path.t -> f:(Path.t -> 'a) -> 'a Memo.t (** Build a file and read its contents. Like [with_file ~f:Io.read_file] but memoized. *) val read_file : Path.t -> string Memo.t -(** Return [true] if a file exists or is buildable *) +(** Return [true] if a file or directory exists or is buildable. *) val file_exists : Path.t -> bool Memo.t (** Build a set of dependencies and return learned facts about them. *) diff --git a/src/dune_engine/rule_cache.ml b/src/dune_engine/rule_cache.ml index fec4ffec41a..001d6539c17 100644 --- a/src/dune_engine/rule_cache.ml +++ b/src/dune_engine/rule_cache.ml @@ -139,8 +139,10 @@ module Workspace_local = struct | Error error -> Miss (Error_while_collecting_directory_targets error) | Ok targets -> (match - Targets.Produced.map_with_errors targets ~all_errors:false ~f:(fun target () -> - Cached_digest.build_file ~allow_dirs:true target) + Targets.Produced.map_with_errors + ~all_errors:false + ~f:(Cached_digest.build_file ~allow_dirs:true) + targets with | Ok produced_targets -> Dune_cache.Hit_or_miss.Hit produced_targets | Error _ -> Miss Targets_missing) diff --git a/src/dune_engine/target_promotion.ml b/src/dune_engine/target_promotion.ml index ebe09cbaa2a..6fa3b74838d 100644 --- a/src/dune_engine/target_promotion.ml +++ b/src/dune_engine/target_promotion.ml @@ -184,7 +184,7 @@ let promote ~(targets : _ Targets.Produced.t) ~(promote : Rule.Promote.t) ~promo in (* Here we know that the promotion directory exists but we may need to create additional subdirectories for [targets.dirs]. *) - Path.Local.Map.iteri targets.dirs ~f:(fun dir (_ : Digest.t Filename.Map.t) -> + Targets.Produced.iter_dirs targets ~f:(fun dir -> create_directory_if_needed ~dir:(Path.Build.append_local targets.root dir)); let promote_until_clean = match promote.lifetime with @@ -208,8 +208,8 @@ let promote ~(targets : _ Targets.Produced.t) ~(promote : Rule.Promote.t) ~promo ~promote_until_clean) in (* There can be some files or directories left over from earlier builds, so we - need to remove them from [targets.dirs]. *) - let remove_stale_files_and_subdirectories ~dir ~expected_filenames = + need to remove them from [targets]. *) + let remove_stale_files_and_subdirectories ~dir = (* CR-someday rleshchinskiy: This can probably be made more efficient by relocating root once. *) let build_dir = Path.Build.append_local targets.root dir in @@ -224,17 +224,15 @@ let promote ~(targets : _ Targets.Produced.t) ~(promote : Rule.Promote.t) ~promo | Error unix_error -> directory_target_error ~unix_error ~dst_dir [] | Ok dir_contents -> Fs_cache.Dir_contents.iter dir_contents ~f:(function - | filename, S_REG -> - if not (String.Map.mem expected_filenames filename) - then Path.unlink_no_err (Path.relative dst_dir filename) - | dirname, S_DIR -> - let src_dir = Path.Local.relative dir dirname in - if not (Path.Local.Map.mem targets.dirs src_dir) - then Path.rm_rf (Path.relative dst_dir dirname) + | file_name, S_REG -> + if not (Targets.Produced.mem targets (Path.Build.relative build_dir file_name)) + then Path.unlink_no_err (Path.relative dst_dir file_name) + | dir_name, S_DIR -> + let src_dir = Path.Build.relative build_dir dir_name in + if not (Targets.Produced.mem_dir targets src_dir) + then Path.rm_rf (Path.relative dst_dir dir_name) | name, _kind -> Path.unlink_no_err (Path.relative dst_dir name)) in - Fiber.sequential_iter_seq - (Path.Local.Map.to_seq targets.dirs) - ~f:(fun (dir, filenames) -> - remove_stale_files_and_subdirectories ~dir ~expected_filenames:filenames) + Fiber.sequential_iter_seq (Targets.Produced.all_dirs_seq targets) ~f:(fun dir -> + remove_stale_files_and_subdirectories ~dir) ;; diff --git a/src/dune_targets/dune_targets.ml b/src/dune_targets/dune_targets.ml index 4fd1cf81fbc..846fdbb11ed 100644 --- a/src/dune_targets/dune_targets.ml +++ b/src/dune_targets/dune_targets.ml @@ -162,12 +162,36 @@ module Produced = struct (* CR-someday amokhov: A hierarchical representation of the produced file trees may be better. It would allow for hierarchical traversals and reduce the number of internal invariants. *) + + (** All file and directory names are relative to the root (['a t]). *) + type 'a dir_contents = + { files : 'a Filename.Map.t (* mapping file name -> 'a *) + ; subdirs : 'a dir_contents Filename.Map.t + (* mapping directory name -> 'a dir_contents *) + } + + let is_empty_dir_conts { files; subdirs } = + Filename.Map.is_empty files && Filename.Map.is_empty subdirs + ;; + type 'a t = { root : Path.Build.t - ; files : 'a Filename.Map.t - ; dirs : 'a Filename.Map.t Path.Local.Map.t + ; contents : 'a dir_contents } + let equal + { root = root1; contents = contents1 } + { root = root2; contents = contents2 } + ~equal + = + let rec eq_aux { files = files1; subdirs = dirs1 } { files = files2; subdirs = dirs2 } + = + Filename.Map.equal files1 files2 ~equal + && Filename.Map.equal dirs1 dirs2 ~equal:eq_aux + in + Path.Build.equal root1 root2 && eq_aux contents1 contents2 + ;; + module Error = struct type t = | Missing_dir of Path.Build.t @@ -215,218 +239,333 @@ module Produced = struct ;; end - let of_validated = - (* The call sites ensure that [dir = Path.Build.append_local validated.root local]. *) - let rec collect (dir : Path.Build.t) (local : Path.Local.t) - : (unit Filename.Map.t Path.Local.Map.t, Error.t) result - = - match Path.readdir_unsorted_with_kinds (Path.build dir) with - | Error (Unix.ENOENT, _, _) -> Error (Missing_dir dir) - | Error e -> Error (Unreadable_dir (dir, e)) - | Ok dir_contents -> - let open Result.O in - let+ filenames, dirs = - Result.List.fold_left - dir_contents - ~init:(Filename.Map.empty, Path.Local.Map.empty) - ~f:(fun (acc_filenames, acc_dirs) (filename, kind) -> - match (kind : File_kind.t) with - (* CR-someday rleshchinskiy: Make semantics of symlinks more consistent. *) - | S_LNK | S_REG -> - Ok (Filename.Map.add_exn acc_filenames filename (), acc_dirs) - | S_DIR -> - let+ dir = - collect - (Path.Build.relative dir filename) - (Path.Local.relative local filename) - in - acc_filenames, Path.Local.Map.union_exn acc_dirs dir - | _ -> Error (Unsupported_file (Path.Build.relative dir filename, kind))) + let empty = { files = Filename.Map.empty; subdirs = Filename.Map.empty } + + (** The call sites ensure that [dir = Path.Build.append_local validated.root local]. + No need for [local] actually... *) + let rec contents_of_dir ~file_f (dir : Path.Build.t) : ('a dir_contents, Error.t) result + = + let open Result.O in + let init = empty in + match Path.readdir_unsorted_with_kinds (Path.build dir) with + | Error (Unix.ENOENT, _, _) -> Error (Missing_dir dir) + | Error e -> Error (Unreadable_dir (dir, e)) + | Ok dir_contents -> + Result.List.fold_left dir_contents ~init ~f:(fun dir_contents (name, kind) -> + match (kind : File_kind.t) with + | S_LNK | S_REG -> + let files = + match file_f (Path.Local.relative (Path.Build.local dir) name) with + | Some payload -> Filename.Map.add_exn dir_contents.files name payload + | None -> dir_contents.files + in + Ok { dir_contents with files } + | S_DIR -> + let+ subdirs_contents = + contents_of_dir ~file_f (Path.Build.relative dir name) + in + { dir_contents with + subdirs = Filename.Map.add_exn dir_contents.subdirs name subdirs_contents + } + | _ -> Error (Unsupported_file (Path.Build.relative dir name, kind))) + ;; + + let of_validated (validated : Validated.t) = + let open Result.O in + (* We assume here that [dir_name] is either a child of [root], or that we're ok with having [root/a/b] but not [root/a]. *) + let aggregate_dir { root; contents } dir_name = + let dir = Path.Build.relative root dir_name in + let* new_contents = contents_of_dir ~file_f:(fun _ -> Some ()) dir in + if is_empty_dir_conts new_contents + then Error (Empty_dir dir) + else ( + let contents = + { contents with + subdirs = Filename.Map.add_exn contents.subdirs dir_name new_contents + } in - if not (Filename.Map.is_empty filenames) - then Path.Local.Map.add_exn dirs local filenames - else dirs - in - let directory root dirname = - let open Result.O in - let dir = Path.Build.relative root dirname in - let* files = collect dir (Path.Local.of_string dirname) in - if Path.Local.Map.is_empty files then Error (Empty_dir dir) else Ok files + Ok { root; contents }) in - fun (validated : Validated.t) -> - match - Filename.Set.to_list validated.dirs - |> Result.List.map ~f:(directory validated.root) - with - | Error _ as error -> error - | Ok dirs -> - let files = - (* CR-someday rleshchinskiy: Check if the files actually exist here. Currently, - we check this here for directory targets but for files, the check is done by - the cache. *) - Filename.Set.to_map validated.files ~f:(fun _ -> ()) - in - (* The [union_exn] below can't raise because each map in [dirs] contains - unique keys, which are paths rooted at the corresponding [dir]s. *) - let dirs = - List.fold_left dirs ~init:Path.Local.Map.empty ~f:Path.Local.Map.union_exn + let rooted_files = Filename.Set.to_map validated.files ~f:(Fun.const ()) in + Filename.Set.to_list validated.dirs + |> Result.List.fold_left + ~init: + { root = validated.root + ; contents = { files = rooted_files; subdirs = Filename.Map.empty } + } + ~f:aggregate_dir + ;; + + let of_files root (file_list : 'a option Path.Local.Map.t) : 'a t = + let rec aux mb_payload contents path = + match path, mb_payload with + | [], _ -> + Code_error.raise + "I've been hoisted by my own petard! (path.explode)" + [ "file_list", Path.Local.Map.to_dyn Dyn.opaque file_list ] + | [ final ], Some payload -> + { contents with files = Filename.Map.add_exn contents.files final payload } + | [ final ], None -> + { contents with subdirs = Filename.Map.add_exn contents.subdirs final empty } + | parent :: rest, _ -> + let subdirs = + Filename.Map.update contents.subdirs parent ~f:(fun contents_opt -> + Some (aux mb_payload (Option.value contents_opt ~default:empty) rest)) in - Ok { root = validated.root; files; dirs } + { contents with subdirs } + in + let init = empty in + let contents = + Path.Local.Map.foldi file_list ~init ~f:(fun file mb_payload contents -> + let parent = Path.Local.parent_exn file in + if Path.Local.is_root parent + then ( + match mb_payload with + | Some payload -> + { contents with + files = + Filename.Map.add_exn contents.files (Path.Local.to_string file) payload + } + | None -> + { contents with + subdirs = + Filename.Map.add_exn contents.subdirs (Path.Local.to_string file) empty + }) + else aux mb_payload contents (Path.Local.explode file)) + in + { root; contents } ;; - let of_files root files = - let f file payload t = - let parent = Path.Local.parent_exn file in - if Path.Local.is_root parent - then - { t with - files = Filename.Map.add_exn t.files (Path.Local.to_string file) payload - } - else ( - let fn = Path.Local.basename file in - { t with - dirs = - Path.Local.Map.update t.dirs parent ~f:(fun files -> - let files = Option.value files ~default:Filename.Map.empty in - Some (Filename.Map.add_exn files fn payload)) - }) + let find_any { root; contents } name = + let open Option.O in + let rec aux path { files; subdirs } = function + | [] -> + Code_error.raise + "I've been hoisted by my own petard! (path.explode)" + [ "name", Path.Build.to_dyn name ] + | [ final ] -> + (* There's probably a nicer way to put this... *) + (match Filename.Map.find files final with + | Some payload -> Some (Left payload) + | None -> + (* The order shouldn't matter, it's not possible to have both a file + and a directory with the exact same path and name. *) + (match Filename.Map.find subdirs final with + | Some contents -> Some (Right contents.files) + | None -> None)) + | parent :: rest -> + let path = Path.Local.relative path parent in + let* subdir = Filename.Map.find subdirs parent in + aux path subdir rest in - let init = { root; files = Filename.Map.empty; dirs = Path.Local.Map.empty } in - Path.Local.Map.foldi files ~init ~f + let root = Path.Build.local root in + let* path = Path.Local.descendant (Path.Build.local name) ~of_:root in + aux root contents (Path.Local.explode path) ;; - let all_files_seq { root = _; files; dirs } = - Seq.append - (Filename.Map.to_seq files - |> Seq.map ~f:(fun (file, payload) -> Path.Local.of_string file, payload)) - (Seq.concat - (Path.Local.Map.to_seq dirs - |> Seq.map ~f:(fun (dir, filenames) -> - Filename.Map.to_seq filenames - |> Seq.map ~f:(fun (filename, payload) -> - Path.Local.relative dir filename, payload)))) + let mem_any t name = Option.is_some (find_any t name) + + let find t name = + match find_any t name with + | Some (Left found) -> Some found + | Some (Right _) | None -> None ;; - let find { root; files; dirs } path = - let open Option.O in - let* path = - Path.Local.descendant (Path.Build.local path) ~of_:(Path.Build.local root) + let mem t name = Option.is_some (find t name) + + let find_dir t name = + match find_any t name with + | Some (Right found) -> Some found + | Some (Left _) | None -> None + ;; + + let mem_dir t name = Option.is_some (find_dir t name) + + let exists { contents; root = _ } ~f = + let rec aux { files; subdirs } = + Filename.Map.exists files ~f || Filename.Map.exists subdirs ~f:aux in - let* parent = Path.Local.parent path in - if Path.Local.is_root parent - then Filename.Map.find files (Path.Local.to_string path) - else - let* files = Path.Local.Map.find dirs parent in - Filename.Map.find files (Path.Local.basename path) + aux contents ;; - let mem t path = Option.is_some (find t path) + let all_files_seq { contents; root = _ } = + let rec aux path { files; subdirs } = + Seq.append + (Filename.Map.to_seq files + |> Seq.map ~f:(fun (file_name, payload) -> + Path.Local.relative path file_name, payload)) + (Seq.concat + (Filename.Map.to_seq subdirs + |> Seq.map ~f:(fun (dir_name, subdir_contents) -> + aux (Path.Local.relative path dir_name) subdir_contents))) + in + aux Path.Local.root contents + ;; - let find_dir { root; files; dirs } path = - match Path.Local.descendant (Path.Build.local path) ~of_:(Path.Build.local root) with - | Some dir when Path.Local.is_root dir -> Some files - | Some dir -> Path.Local.Map.find dirs dir - | None -> None + let all_dirs_seq { contents; root = _ } = + let rec aux path { subdirs; files = _ } = + Seq.concat + (Filename.Map.to_seq subdirs + |> Seq.map ~f:(fun (dir_name, dir_contents) -> + let dir = Path.Local.relative path dir_name in + Seq.cons dir (aux dir dir_contents))) + in + aux Path.Local.root contents ;; - let equal - { root = root1; files = files1; dirs = dirs1 } - { root = root2; files = files2; dirs = dirs2 } - ~equal - = - Path.Build.equal root1 root2 - && Filename.Map.equal files1 files2 ~equal - && Path.Local.Map.equal dirs1 dirs2 ~equal:(Filename.Map.equal ~equal) + (* All traversal functions in this module follow the same order: + - top-level files are processed. + - top-level directories are processed, if applicable. + - the content of the directories is then processed recursively. + + This explains why [root] is usually ignored and replaced by [Path.Local.root = .]: + we don't want to process the root directory itself. + *) + + let foldi { contents; root = _ } ~init ~f = + let rec aux path { files; subdirs } acc = + let acc' = + Filename.Map.foldi files ~init:acc ~f:(fun file_name payload acc -> + let file = Path.Local.relative path file_name in + f file (Some payload) acc) + in + Filename.Map.foldi subdirs ~init:acc' ~f:(fun dir_name dir_contents acc -> + let dir = Path.Local.relative path dir_name in + let acc' = f dir None acc in + aux dir dir_contents acc') + in + aux Path.Local.root contents init ;; - let exists { root = _; files; dirs } ~f = - Filename.Map.exists files ~f || Path.Local.Map.exists dirs ~f:(String.Map.exists ~f) + let iteri { contents; root = _ } ~f ~d = + let rec aux path { files; subdirs } = + Filename.Map.iteri files ~f:(fun file_name payload -> + let file = Path.Local.relative path file_name in + f file payload); + Filename.Map.iteri subdirs ~f:(fun dir_name dir_contents -> + let dir = Path.Local.relative path dir_name in + d dir; + aux dir dir_contents) + in + aux Path.Local.root contents ;; - let foldi { root = _; files; dirs } ~init ~f = - let acc = - Filename.Map.foldi files ~init ~f:(fun file acc -> - f (Path.Local.of_string file) acc) + let to_list_map { contents; root = _ } ~f = + let rec aux path { files; subdirs } = + let file_list = + Filename.Map.to_list_map files ~f:(fun file_name payload -> + f (Path.Local.relative path file_name) (Some payload)) + in + let dir_list = + Filename.Map.to_list_map subdirs ~f:(fun dir_name dir_contents -> + let dir = Path.Local.relative path dir_name in + let d = f dir None in + d :: aux dir dir_contents) + |> List.concat + in + file_list @ dir_list in - Path.Local.Map.foldi dirs ~init:acc ~f:(fun dir filenames acc -> - String.Map.foldi filenames ~init:acc ~f:(fun filename payload acc -> - f (Path.Local.relative dir filename) payload acc)) + aux Path.Local.root contents ;; - let iteri { root = _; files; dirs } ~f = - Filename.Map.iteri files ~f:(fun file acc -> f (Path.Local.of_string file) acc); - Path.Local.Map.iteri dirs ~f:(fun dir filenames -> - String.Map.iteri filenames ~f:(fun filename payload -> - f (Path.Local.relative dir filename) payload)) + let iter_files t ~f = iteri t ~f ~d:(fun _ -> ()) + + (* Slightly more efficient to not even look at the files. *) + let iter_dirs { contents; root = _ } ~f = + let rec aux path { subdirs; files = _ } = + Filename.Map.iteri subdirs ~f:(fun dir_name dir_contents -> + let dir = Path.Local.relative path dir_name in + f dir; + aux dir dir_contents) + in + aux Path.Local.root contents ;; module Path_traversal = Fiber.Make_parallel_map (Path.Local.Map) module Filename_traversal = Fiber.Make_parallel_map (String.Map) - let parallel_map { root; files; dirs } ~f = + let parallel_map { root; contents } ~f = let open Fiber.O in - let+ files, dirs = - Fiber.fork_and_join - (fun () -> - Filename_traversal.parallel_map files ~f:(fun file -> - f (Path.Local.of_string file))) - (fun () -> - Path_traversal.parallel_map dirs ~f:(fun dir files -> - Filename_traversal.parallel_map files ~f:(fun file payload -> - f (Path.Local.relative dir file) payload))) + let rec aux path { files; subdirs } = + let+ files, subdirs = + Fiber.fork_and_join + (fun () -> + Filename_traversal.parallel_map files ~f:(fun file_name -> + let file = Path.Local.relative path file_name in + f file)) + (fun () -> + Filename_traversal.parallel_map subdirs ~f:(fun dir_name -> + let dir = Path.Local.relative path dir_name in + aux dir)) + in + { files; subdirs } in - { root; files; dirs } + let+ contents = aux Path.Local.root contents in + { root; contents } ;; - let digest { root = _; files; dirs } = - let all_digests = - Filename.Map.values files - :: Path.Local.Map.to_list_map dirs ~f:(fun _ -> String.Map.values) + let digest { contents; root = _ } = + let rec all_digests _ { files; subdirs } = + let ffiles = Filename.Map.values files in + List.concat (ffiles :: Filename.Map.to_list_map subdirs ~f:all_digests) in - Digest.generic (List.concat all_digests) + Digest.generic (all_digests "ignored" contents) ;; exception Short_circuit + (* The odd type of [d] and [f] is due to the fact that [map_with_errors] + is used for a variety of things, not all "map-like". *) let map_with_errors - { root; files; dirs } + ?(d : (Path.Build.t -> (unit, 'e) result) option) + ~(f : Path.Build.t -> ('b, 'e) result) ~all_errors - ~(f : Path.Build.t -> 'a -> ('b, 'e) result) + { root; contents } = let errors = ref [] in - let f path a = - match f path a with + let f path = + match f path with | Ok s -> Some s | Error e -> errors := (path, e) :: !errors; if all_errors then None else raise_notrace Short_circuit in + let rec aux path { files; subdirs } = + let files = + Filename.Map.filter_mapi files ~f:(fun file _ -> + f (Path.Build.relative path file)) + in + let subdirs = + Filename.Map.mapi subdirs ~f:(fun dir subdirs_contents -> + let dir = Path.Build.relative path dir in + aux dir subdirs_contents) + in + (match d with + | None -> () + | Some f -> + (match f path with + | Ok () -> () + | Error e -> + errors := (path, e) :: !errors; + if all_errors then () else raise_notrace Short_circuit)); + { files; subdirs } + in let result = - try - let files = - Filename.Map.filter_mapi files ~f:(fun file -> - f (Path.Build.relative root file)) - in - let dirs = - Path.Local.Map.mapi dirs ~f:(fun dir -> - let dir = Path.Build.append_local root dir in - Filename.Map.filter_mapi ~f:(fun filename -> - f (Path.Build.relative dir filename))) - in - { root; files; dirs } - with - | Short_circuit -> { root; files = Filename.Map.empty; dirs = Path.Local.Map.empty } + try { root; contents = aux root contents } with + | Short_circuit -> { root; contents = empty } in match Nonempty_list.of_list !errors with | None -> Ok result | Some list -> Error list ;; - let to_dyn { root; files; dirs } = - Dyn.record - [ "root", Path.Build.to_dyn root - ; "files", Filename.Map.to_dyn Dyn.opaque files - ; "dirs", Path.Local.Map.to_dyn (Filename.Map.to_dyn Dyn.opaque) dirs - ] + let to_dyn { root; contents } = + let rec aux { files; subdirs } = + Dyn.record + [ "files", Filename.Map.to_dyn Dyn.opaque files + ; "dirs", Filename.Map.to_dyn aux subdirs + ] + in + Dyn.record [ "root", Path.Build.to_dyn root; "contents", aux contents ] ;; end diff --git a/src/dune_targets/dune_targets.mli b/src/dune_targets/dune_targets.mli index 5b024d1decf..126d2402b72 100644 --- a/src/dune_targets/dune_targets.mli +++ b/src/dune_targets/dune_targets.mli @@ -79,12 +79,20 @@ val all : t -> Path.Build.t list (** The set of targets produced by an action. Each target may be tagged with a payload, for example, the target's digest. *) module Produced : sig + (** All file and directory names are relative to the root (['a t]). *) + type 'a dir_contents = private + { files : 'a Filename.Map.t (* mapping file name -> 'a *) + ; subdirs : 'a dir_contents Filename.Map.t + (* mapping directory name -> 'a dir_contents *) + } + type 'a t = private - { root : Path.Build.t (** [files] and [dirs] are relative to [root] *) - ; files : 'a Filename.Map.t - ; dirs : 'a Filename.Map.t Path.Local.Map.t + { root : Path.Build.t + ; contents : 'a dir_contents } + val equal : 'a t -> 'a t -> equal:('a -> 'a -> bool) -> bool + module Error : sig type t @@ -93,38 +101,70 @@ module Produced : sig end (** Expand [targets : Validated.t] by recursively traversing directory targets - and collecting all contained files. *) + and collecting all contained files and directories. *) val of_validated : Validated.t -> (unit t, Error.t) result (** Construct from a set of files in the root directory. *) - val of_files : Path.Build.t -> 'a Path.Local.Map.t -> 'a t + val of_files : Path.Build.t -> 'a option Path.Local.Map.t -> 'a t - (** Union of [t.files] and all files in [t.dirs] as [Seq.t] for efficient traversal. + (** Union of all files and any [subdirs] in [t] as [Seq.t] for efficient traversal. The resulting [Path.Local.t]s are relative to [t.root]. *) val all_files_seq : 'a t -> (Path.Local.t * 'a) Seq.t + (** Passes in depth-first order on all the (sub)directories in the targets. + The resulting [Path.Local.t]s are relative to [t.root]. *) + val all_dirs_seq : 'a t -> Path.Local.t Seq.t + (** Check if a file is present in the targets. *) val mem : 'a t -> Path.Build.t -> bool + (* Check if a directory is present in the targets. *) + val mem_dir : 'a t -> Path.Build.t -> bool + + (* Check if a path is present (either as a file or as a directory) in the targets. *) + val mem_any : 'a t -> Path.Build.t -> bool + + (* Find the value associated with a file, or all the files of a subdirectory, if any. *) + val find_any : 'a t -> Path.Build.t -> ('a, 'a Filename.Map.t) either option + (** Find the value associated with the file, if any. *) val find : 'a t -> Path.Build.t -> 'a option (** Find all files in a directory target or a subdirectory. *) val find_dir : 'a t -> Path.Build.t -> 'a Filename.Map.t option - val equal : 'a t -> 'a t -> equal:('a -> 'a -> bool) -> bool val exists : 'a t -> f:('a -> bool) -> bool - val foldi : 'a t -> init:'acc -> f:(Path.Local.t -> 'a -> 'acc -> 'acc) -> 'acc - val iteri : 'a t -> f:(Path.Local.t -> 'a -> unit) -> unit + + (* All traversal functions in this module follow the same order: + - top-level files are processed. + - top-level directories are processed, if applicable. + - the content of the directories is then processed recursively. + + This explains why [root] is usually ignored and replaced by [Path.Local.root = .]: + we don't want to process the root directory itself. + *) + + val foldi : 'a t -> init:'acc -> f:(Path.Local.t -> 'a option -> 'acc -> 'acc) -> 'acc + val to_list_map : 'a t -> f:(Path.Local.t -> 'a option -> 'b) -> 'b list + val iter_files : 'a t -> f:(Path.Local.t -> 'a -> unit) -> unit + val iter_dirs : 'a t -> f:(Path.Local.t -> unit) -> unit + + (** Iterate on all [f]iles & [d]irs in the targets. + All [Path.Local.t]s are relative to [t.root]. *) + val iteri : 'a t -> f:(Path.Local.t -> 'a -> unit) -> d:(Path.Local.t -> unit) -> unit + val parallel_map : 'a t -> f:(Path.Local.t -> 'a -> 'b Fiber.t) -> 'b t Fiber.t (** Aggregate all content digests. *) val digest : Digest.t t -> Digest.t + (* The odd type of [d] and [f] is due to the fact that [map_with_errors] + is used for a variety of things, not all "map-like". *) val map_with_errors - : 'a t + : ?d:(Path.Build.t -> (unit, 'e) result) + -> f:(Path.Build.t -> ('b, 'e) result) -> all_errors:bool - -> f:(Path.Build.t -> 'a -> ('b, 'e) result) + -> 'a t -> ('b t, (Path.Build.t * 'e) Nonempty_list.t) result val to_dyn : _ t -> Dyn.t diff --git a/src/fs/fs.ml b/src/fs/fs.ml index 46bd1d4faa9..daf86c0776a 100644 --- a/src/fs/fs.ml +++ b/src/fs/fs.ml @@ -16,18 +16,18 @@ let dir_contents (dir : Path.t) = >>| Result.map ~f:(fun contents -> Fs_cache.Dir_contents.to_list contents |> List.map ~f:fst) | `Inside _ -> - let* () = Build_system.build_file dir in + let* () = Build_system.build_dir dir in Memo.return (Path.readdir_unsorted dir) ;; -let exists file kind = - Build_system.file_exists file +let exists path kind = + Build_system.file_exists path >>= function | false -> Memo.return false | true -> - let+ () = Build_system.build_file file in - (match Path.stat file with - | Ok { st_kind; _ } when kind = st_kind -> true + let+ () = Build_system.build_file path in + (match Path.stat path with + | Ok { st_kind; _ } -> kind = st_kind | _ -> false) ;; @@ -45,7 +45,8 @@ let dir_exists dir = | `Inside _ -> (* CR-rgrinberg: unfortunately, [Build_system.file_exists] always returns false for directories. *) - Memo.return true + (* CR-ElectreAAS: sike! [exists] now takes both into account! *) + exists dir Unix.S_DIR ;; let with_lexbuf_from_file file ~f = diff --git a/test/blackbox-tests/test-cases/directory-targets/subdirs-only.t b/test/blackbox-tests/test-cases/directory-targets/subdirs-only.t new file mode 100644 index 00000000000..0caf52f6481 --- /dev/null +++ b/test/blackbox-tests/test-cases/directory-targets/subdirs-only.t @@ -0,0 +1,23 @@ +We test that a directory target with only other subdirs can be +properly promoted. + + $ cat > dune-project < (lang dune 3.16) + > (using directory-targets 0.1) + > EOF + + $ cat > dune < (rule + > (targets (dir foo)) + > (mode (promote (until-clean))) + > (action + > (progn + > (run mkdir -p foo/bar) + > (run touch foo/bar/file1) + > (run mkdir -p foo/bar/baz/qux) + > (run touch foo/bar/baz/qux/file2)))) + > EOF + + $ dune build foo + $ ls foo/bar/baz/qux + file2 diff --git a/test/blackbox-tests/test-cases/dune-cache/empty-dir.t b/test/blackbox-tests/test-cases/dune-cache/empty-dir.t index 743b8020638..ddd7f531956 100644 --- a/test/blackbox-tests/test-cases/dune-cache/empty-dir.t +++ b/test/blackbox-tests/test-cases/dune-cache/empty-dir.t @@ -31,4 +31,5 @@ Restore it from cache. $ dune build output $ find _build/default/output | sort _build/default/output + _build/default/output/child _build/default/output/file diff --git a/test/blackbox-tests/test-cases/promote/deep-subdir.t b/test/blackbox-tests/test-cases/promote/deep-subdir.t index bb6d1d9d5c1..98eea3bddc9 100644 --- a/test/blackbox-tests/test-cases/promote/deep-subdir.t +++ b/test/blackbox-tests/test-cases/promote/deep-subdir.t @@ -35,26 +35,6 @@ This one works. Now, let's add a layer between base_file and deep_file: $ touch deep/a/b/deep_file $ touch deep/base_file - $ dune build deep_copied - File "dune", lines 1-8, characters 0-123: - 1 | (rule - 2 | (deps - 3 | (source_tree deep)) - 4 | (targets - 5 | (dir deep_copied)) - 6 | (mode promote) - 7 | (action - 8 | (run cp -r deep deep_copied))) - Error: Cannot promote files to "deep_copied/a/b". - Reason: opendir(deep_copied/a/b): No such file or directory - -> required by _build/default/deep_copied - [1] - -It does not work! Note that the `base_file` is required. For instance, move it -to `a/`, or remove it, and it works: - - $ mv deep/base_file deep/a/ $ dune build deep_copied - $ rm deep/a/base_file - $ dune build deep_copied +It now works!