Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ODP-1101 - NIFI-12801 Add local file upload option in PutHDFS processor #4

Merged
merged 2 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,27 @@
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kerberos-user-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-resource-transfer</artifactId>
<version>1.23.2.3.3.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-file-resource-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-mock</artifactId>
<version>1.23.2.3.3.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-file-resource-service</artifactId>
<version>1.23.2.3.3.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.fileresource.service.api.FileResource;
import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.components.RequiredPermission;
import org.apache.nifi.expression.ExpressionLanguageScope;
Expand All @@ -54,8 +55,8 @@
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.transfer.ResourceTransferSource;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.StopWatch;
import org.ietf.jgss.GSSException;
Expand All @@ -78,6 +79,9 @@
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.Stream;
import static org.apache.nifi.processors.transfer.ResourceTransferProperties.FILE_RESOURCE_SERVICE;
import static org.apache.nifi.processors.transfer.ResourceTransferProperties.RESOURCE_TRANSFER_SOURCE;
import static org.apache.nifi.processors.transfer.ResourceTransferUtils.getFileResource;

/**
* This processor copies FlowFiles to HDFS.
Expand Down Expand Up @@ -246,6 +250,8 @@ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
props.add(REMOTE_GROUP);
props.add(COMPRESSION_CODEC);
props.add(IGNORE_LOCALITY);
props.add(RESOURCE_TRANSFER_SOURCE);
props.add(FILE_RESOURCE_SERVICE);
return props;
}

Expand Down Expand Up @@ -372,10 +378,11 @@ public Object run() {

// Write FlowFile to temp file on HDFS
final StopWatch stopWatch = new StopWatch(true);
session.read(putFlowFile, new InputStreamCallback() {
final ResourceTransferSource resourceTransferSource = ResourceTransferSource.valueOf(
context.getProperty(RESOURCE_TRANSFER_SOURCE).getValue());
try (final InputStream in = getFileResource(resourceTransferSource, context, flowFile.getAttributes())
.map(FileResource::getInputStream).orElseGet(() -> session.read(flowFile))) {

@Override
public void process(InputStream in) throws IOException {
OutputStream fos = null;
Path createdFile = null;
try {
Expand Down Expand Up @@ -420,7 +427,6 @@ public void process(InputStream in) throws IOException {
}
}

});
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.nifi.processors.hadoop;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
Expand All @@ -29,7 +30,11 @@
import org.apache.hadoop.util.Progressable;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.fileresource.service.StandardFileResourceService;
import org.apache.nifi.fileresource.service.api.FileResourceService;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processors.transfer.ResourceTransferProperties;
import org.apache.nifi.processors.transfer.ResourceTransferSource;
import org.apache.nifi.hadoop.KerberosProperties;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
Expand All @@ -53,13 +58,18 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.Set;
import java.util.Collections;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
Expand Down Expand Up @@ -635,6 +645,65 @@ public void testPutFileWithCloseException() throws IOException {
mockFileSystem.delete(p, true);
}

@Test
public void testPutFileFromLocalFile() throws Exception {
final FileSystem spyFileSystem = Mockito.spy(mockFileSystem);
final PutHDFS proc = new TestablePutHDFS(kerberosProperties, spyFileSystem);
final TestRunner runner = TestRunners.newTestRunner(proc);
runner.setProperty(PutHDFS.DIRECTORY, TARGET_DIRECTORY);
runner.setProperty(PutHDFS.CONFLICT_RESOLUTION, PutHDFS.REPLACE_RESOLUTION);
runner.setProperty(PutHDFS.WRITING_STRATEGY, PutHDFS.SIMPLE_WRITE);

//Adding StandardFileResourceService controller service
String attributeName = "file.path";

String serviceId = FileResourceService.class.getSimpleName();
FileResourceService service = new StandardFileResourceService();
byte[] FILE_DATA = "0123456789".getBytes(StandardCharsets.UTF_8);
byte[] EMPTY_CONTENT = new byte[0];
runner.addControllerService(serviceId, service);
runner.setProperty(service, StandardFileResourceService.FILE_PATH, String.format("${%s}", attributeName));
runner.enableControllerService(service);

runner.setProperty(ResourceTransferProperties.RESOURCE_TRANSFER_SOURCE, ResourceTransferSource.FILE_RESOURCE_SERVICE.getValue());
runner.setProperty(ResourceTransferProperties.FILE_RESOURCE_SERVICE, serviceId);
java.nio.file.Path tempFilePath = Files.createTempFile("PutHDFS_testPutFileFromLocalFile_", "");
Files.write(tempFilePath, FILE_DATA);

Map<String, String> attributes = new HashMap<>();

attributes.put(CoreAttributes.FILENAME.key(), FILE_NAME);
attributes.put(attributeName, tempFilePath.toString());
runner.enqueue(EMPTY_CONTENT, attributes);
runner.run();

/* MockFlowFile flowFile = assertFlowFile(EMPTY_CONTENT);
assertFlowFileAttributes(flowFile, DIRECTORY, FILE_NAME, FILE_DATA.length);
assertAzureFile(DIRECTORY, FILE_NAME, FILE_DATA);*/

runner.assertAllFlowFilesTransferred(PutHDFS.REL_SUCCESS, 1);
MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHDFS.REL_SUCCESS).get(0);
flowFile.assertContentEquals(EMPTY_CONTENT);

//assert HDFS File and Directory structures
assertTrue(spyFileSystem.exists(new Path(TARGET_DIRECTORY + "/" + FILE_NAME)));
assertEquals(FILE_NAME, flowFile.getAttribute(CoreAttributes.FILENAME.key()));
assertEquals(TARGET_DIRECTORY, flowFile.getAttribute(PutHDFS.ABSOLUTE_HDFS_PATH_ATTRIBUTE));
assertEquals("true", flowFile.getAttribute(PutHDFS.TARGET_HDFS_DIR_CREATED_ATTRIBUTE));
// If it runs with a real HDFS, the protocol will be "hdfs://", but with a local filesystem, just assert the filename.
assertTrue(flowFile.getAttribute(PutHDFS.HADOOP_FILE_URL_ATTRIBUTE).endsWith(TARGET_DIRECTORY + "/" + FILE_NAME));

verify(spyFileSystem, Mockito.never()).rename(any(Path.class), any(Path.class));

//assert Provenance events
Set<ProvenanceEventType> expectedEventTypes = Collections.singleton(ProvenanceEventType.SEND);
Set<ProvenanceEventType> actualEventTypes = runner.getProvenanceEvents().stream()
.map(ProvenanceEventRecord::getEventType)
.collect(Collectors.toSet());
assertEquals(expectedEventTypes, actualEventTypes);

}

private class TestablePutHDFS extends PutHDFS {

private KerberosProperties testKerberosProperties;
Expand Down
Loading