Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unicode-normalize manifest and file paths before comparing #71

Merged
merged 2 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions developer certificates/David_Moles.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
I, David Moles ([email protected]) freely assign copyright for
my contributions to this project over as described below:


Developer Certificate of Origin
Version 1.1

Copyright (C) 2004, 2006 The Linux Foundation and its contributors.

Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.


Developer's Certificate of Origin 1.1

By making a contribution to this project, I certify that:

(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or

(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or

(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.

(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ResourceBundle;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -26,23 +27,36 @@ abstract public class AbstractPayloadFileExistsInManifestsVistor extends SimpleF

/**
* constructor must be called before using!
*
*
* @param ignoreHiddenFiles Should hidden files be ignored
*/
public AbstractPayloadFileExistsInManifestsVistor(final boolean ignoreHiddenFiles) {
super();
this.ignoreHiddenFiles = ignoreHiddenFiles;
}

@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
FileVisitResult result = FileVisitResult.CONTINUE;

if(ignoreHiddenFiles && PathUtils.isHidden(dir)){
logger.debug(messages.getString("skipping_hidden_file"), dir);
result = FileVisitResult.SKIP_SUBTREE;
}

return result;
}

/**
* Returns true if the path exists in the provided set of manifest paths, false otherwise.
* @param path The file path.
* @param manifestPaths The manifest paths.
* @return true if the path exists, false otherwise.
*/
protected static boolean inManifest(final Path path, final Set<Path> manifestPaths) {
final String normalizedPath = ManifestVerifier.toNormalizedString(path);
return manifestPaths.stream().anyMatch(
(mp) -> ManifestVerifier.toNormalizedString(mp).equals(normalizedPath)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,19 @@ public enum ManifestVerifier {; //using enum to enforce singleton
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

/**
* Verify that all the files in the payload directory are listed in the payload manifest and
* Verify that all the files in the payload directory are listed in the payload manifest and
* all files listed in all manifests exist.
*
*
* @param bag the bag which contains the manifests to check
* @param ignoreHiddenFiles to include hidden files when checking
*
*
* @throws IOException if there is an error while reading a file from the filesystem
* @throws MaliciousPathException if a path is outside the bag
* @throws InvalidBagitFileFormatException if a manifest is not formatted correctly
* @throws FileNotInPayloadDirectoryException if a file listed in a manifest is not in the payload directory
*/
public static void verifyManifests(final Bag bag, final boolean ignoreHiddenFiles)throws IOException{

final Set<Path> allFilesListedInManifests = getAllFilesListedInManifests(bag);
checkAllFilesListedInManifestExist(allFilesListedInManifests);

Expand All @@ -54,12 +54,21 @@ public static void verifyManifests(final Bag bag, final boolean ignoreHiddenFile
}
}

/**
* Returns the path as a String in {{java.text.Normalizer.Form#NFD}} (canonical) normalized form.
* @param path the path to normalize
* @return String the normalized string
*/
static String toNormalizedString(final Path path) {
return Normalizer.normalize(path.toString(), Normalizer.Form.NFD);
}

/*
* get the full path (absolute) of all the files listed in all the manifests
*/
private static Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException {
logger.debug(messages.getString("all_files_in_manifests"));

final Set<Path> filesListedInManifests = new HashSet<>();

try(DirectoryStream<Path> directoryStream = Files.newDirectoryStream(bag.getTagFileDir(), new ManifestFilter())){
Expand All @@ -78,7 +87,7 @@ private static Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOEx
*/
private static void checkAllFilesListedInManifestExist(final Set<Path> files) {
logger.info(messages.getString("check_all_files_in_manifests_exist"));

for (final Path file : files) {
if(!Files.exists(file)){
if(existsNormalized(file)){
Expand All @@ -91,21 +100,21 @@ private static void checkAllFilesListedInManifestExist(final Set<Path> files) {
}
}
}

/**
* if a file is parially normalized or of a different normalization then the manifest specifies it will fail the existence test.
* This method checks for that by normalizing what is on disk with the normalized filename and see if they match.
*
*
* @return true if the normalized filename matches one on disk in the specified folder
*/
private static boolean existsNormalized(final Path file){
boolean existsNormalized = false;
final String normalizedFile = Normalizer.normalize(file.toString(), Normalizer.Form.NFD);
final String normalizedFile = toNormalizedString(file);
final Path parent = file.getParent();
if(parent != null){
try(DirectoryStream<Path> files = Files.newDirectoryStream(parent)){
for(final Path fileToCheck : files){
final String normalizedFileToCheck = Normalizer.normalize(fileToCheck.toString(), Normalizer.Form.NFD);
final String normalizedFileToCheck = toNormalizedString(fileToCheck);
if(normalizedFile.equals(normalizedFileToCheck)){
existsNormalized = true;
break;
Expand All @@ -116,7 +125,7 @@ private static boolean existsNormalized(final Path file){
logger.error(messages.getString("error_reading_normalized_file"), parent, normalizedFile, e);
}
}

return existsNormalized;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.jscancella.verify.internal;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -29,7 +30,7 @@ public final class PayloadFileExistsInAllManifestsVistor extends AbstractPayload

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*
*
* @param manifests the set of manifests to check
* @param rootDir the root directory of the bag
* @param ignoreHiddenFiles if the checker should ignore hidden files or not
Expand All @@ -41,16 +42,16 @@ public PayloadFileExistsInAllManifestsVistor(final Set<Manifest> manifests, fina
}

@Override
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs){
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) throws IOException {
if(Files.isRegularFile(path)){
for(final Manifest manifest : manifests){
final Set<Path> relativePaths = manifest
.getEntries().stream()
.map(entry -> entry.getRelativeLocation())
.collect(Collectors.toSet());
final Path relativePath = rootDir.relativize(path);
if(!relativePaths.contains(relativePath)){

if(!inManifest(relativePath, relativePaths)){
final String formattedMessage = messages.getString("file_not_in_manifest_error");
throw new FileNotInManifestException(MessageFormatter.format(formattedMessage, path, manifest.getBagitAlgorithmName()).getMessage());
}
Expand All @@ -59,4 +60,5 @@ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attr
logger.debug(messages.getString("file_in_all_manifests"), path);
return FileVisitResult.CONTINUE;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public final class PayloadFileExistsInAtLeastOneManifestVistor extends AbstractP

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*
*
* @param filesListedInManifests the set of files listed in all the manifests
* @param ignoreHiddenFiles if the checker should ignore hidden files or not
*/
Expand All @@ -42,7 +42,7 @@ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attr
logger.debug(messages.getString("skipping_hidden_file"), path);
}
else {
if(Files.isRegularFile(path) && !filesListedInManifests.contains(path.toAbsolutePath())){
if(Files.isRegularFile(path) && !inManifest(path.toAbsolutePath(), filesListedInManifests)){
final String formattedMessage = messages.getString("file_not_in_any_manifest_error");
throw new FileNotInManifestException(MessageFormatter.format(formattedMessage, path).getMessage());
}
Expand Down
Loading