From b5fb1533bb3b9fa122818dee379bc78efe587bd4 Mon Sep 17 00:00:00 2001 From: Marek Horst Date: Thu, 24 Oct 2024 12:56:47 +0200 Subject: [PATCH] Closes #1485: Implement SoftwareHeritage Origins importer relying on the SH Graph dump instead of an API Removing an obsolete RESTful SoftwareHeritage Origins importer. --- .../ImportWorkflowRuntimeParameters.java | 12 - .../origins/SoftwareHeritageOriginEntry.java | 21 - .../SoftwareHeritageOriginsImporter.java | 443 ------------ .../SoftwareHeritageOriginsImporterTest.java | 659 ------------------ 4 files changed, 1135 deletions(-) delete mode 100644 iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginEntry.java delete mode 100644 iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporter.java delete mode 100644 iis-wf/iis-wf-import/src/test/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporterTest.java diff --git a/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/ImportWorkflowRuntimeParameters.java b/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/ImportWorkflowRuntimeParameters.java index 8fe337d79..cbaff7d6c 100644 --- a/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/ImportWorkflowRuntimeParameters.java +++ b/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/ImportWorkflowRuntimeParameters.java @@ -19,17 +19,6 @@ public final class ImportWorkflowRuntimeParameters { public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout"; public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout"; - public static final String IMPORT_SOFTWARE_HERITAGE_PAGE_SIZE = "import.software.heritage.page.size"; - public static final String IMPORT_SOFTWARE_HERITAGE_START_INDEX = "import.software.heritage.start.index"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_HOST = "import.software.heritage.endpoint.host"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_PORT = "import.software.heritage.endpoint.port"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_SCHEME = "import.software.heritage.endpoint.scheme"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_URI_ROOT = "import.software.heritage.endpoint.uri.root"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY = "import.software.heritage.endpoint.ratelimit.delay"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RETRY_COUNT = "import.software.heritage.endpoint.retry.count"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_READ_TIMEOUT = "import.software.heritage.endpoint.read.timeout"; - public static final String IMPORT_SOFTWARE_HERITAGE_ENDPOINT_CONNECTION_TIMEOUT = "import.software.heritage.endpoint.connection.timeout"; - public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location"; public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code"; public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename"; @@ -45,7 +34,6 @@ public final class ImportWorkflowRuntimeParameters { public static final String RESULTSET_READ_TIMEOUT_DEFAULT_VALUE = "60000"; public static final String RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; public static final String RESULTSET_PAGESIZE_DEFAULT_VALUE = "100"; - public static final String SOFTWARE_HERITAGE_PAGE_SIZE_DEFAULT_VALUE = "100"; private ImportWorkflowRuntimeParameters() {} diff --git a/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginEntry.java b/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginEntry.java deleted file mode 100644 index a82da3d2a..000000000 --- a/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginEntry.java +++ /dev/null @@ -1,21 +0,0 @@ -package eu.dnetlib.iis.wf.importer.software.origins; - -/** - * Origin entry retrieved from SoftwareHeritage endpoint. - * @author mhorst - * - */ -public class SoftwareHeritageOriginEntry { - - private String url; - - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } - -} diff --git a/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporter.java b/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporter.java deleted file mode 100644 index d84eafb22..000000000 --- a/iis-wf/iis-wf-import/src/main/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporter.java +++ /dev/null @@ -1,443 +0,0 @@ -package eu.dnetlib.iis.wf.importer.software.origins; - -import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_CONNECTION_TIMEOUT; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_HOST; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_PORT; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_READ_TIMEOUT; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RETRY_COUNT; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_SCHEME; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_ENDPOINT_URI_ROOT; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_PAGE_SIZE; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_SOFTWARE_HERITAGE_START_INDEX; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.SOFTWARE_HERITAGE_PAGE_SIZE_DEFAULT_VALUE; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.net.SocketTimeoutException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import org.apache.avro.file.DataFileWriter; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.http.Header; -import org.apache.http.HttpEntity; -import org.apache.http.HttpHost; -import org.apache.http.HttpRequest; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.util.EntityUtils; -import org.apache.log4j.Logger; - -import com.google.common.base.Preconditions; -import com.google.gson.Gson; -import com.google.gson.JsonSyntaxException; - -import eu.dnetlib.iis.common.WorkflowRuntimeParameters; -import eu.dnetlib.iis.common.counter.NamedCounters; -import eu.dnetlib.iis.common.counter.NamedCountersFileWriter; -import eu.dnetlib.iis.common.java.PortBindings; -import eu.dnetlib.iis.common.java.io.DataStore; -import eu.dnetlib.iis.common.java.io.FileSystemPath; -import eu.dnetlib.iis.common.java.porttype.AvroPortType; -import eu.dnetlib.iis.common.java.porttype.PortType; -import eu.dnetlib.iis.referenceextraction.softwareurl.schemas.SoftwareHeritageOrigin; -import eu.dnetlib.iis.wf.importer.HttpClientUtils; - -/** - * Importer module retrieving (incrementally) origins from Software Heritage RESTful endpoint. - * - * @author mhorst - * - */ -public class SoftwareHeritageOriginsImporter implements eu.dnetlib.iis.common.java.Process { - - private static final String DELIM_LINKS = ","; - private static final String DELIM_LINK_PARAM = ";"; - private static final String META_REL = "rel"; - private static final String META_NEXT = "next"; - private static final String HEADER_LINK = "Link"; - - public static final String DEFAULT_METAFILE_NAME = "meta.json"; - - protected static final String COUNTER_NAME_TOTAL = "TOTAL"; - - protected static final String OUTPUT_PROPERTY_NEXT_RECORD_INDEX = "next_record_index"; - - protected static final String PORT_OUT_ORIGINS = "origins"; - - private static final Logger log = Logger.getLogger(SoftwareHeritageOriginsImporter.class); - - private final static int progressLogInterval = 100000; - - private final NamedCountersFileWriter countersWriter = new NamedCountersFileWriter(); - - private final Map outputPorts = new HashMap(); - - // ------------------------ CONSTRUCTORS ------------------- - - public SoftwareHeritageOriginsImporter() { - outputPorts.put(PORT_OUT_ORIGINS, new AvroPortType(SoftwareHeritageOrigin.SCHEMA$)); - } - - // ------------------------ LOGIC -------------------------- - - @Override - public Map getInputPorts() { - return Collections.emptyMap(); - } - - @Override - public Map getOutputPorts() { - return outputPorts; - } - - @Override - public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { - - SoftwareHeritageOriginsImporterParams params = new SoftwareHeritageOriginsImporterParams(parameters); - - NamedCounters counters = new NamedCounters(new String[] { COUNTER_NAME_TOTAL }); - int currentCount = 0; - - if (StringUtils.isNotBlank(params.getShEndpointUriRoot()) - && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(params.getShEndpointUriRoot())) { - - try (DataFileWriter originsWriter = getWriter(FileSystem.get(conf), portBindings)) { - - long startTime = System.currentTimeMillis(); - - Gson gson = new Gson(); - - CloseableHttpClient httpclient = buildHttpClient(params.getConnectionTimeout(), params.getReadTimeout()); - - HttpHost target = new HttpHost(params.getShEndpointHost(), params.getShEndpointPort(), params.getShEndpointScheme()); - HttpRequest getRequest = new HttpGet(buildUri(params.getShEndpointUriRoot(), params.getStartElementIndex(), params.getPageSize())); - - log.info("executing first page request to " + target + " with: " + getRequest.toString()); - - int retryCount=0; - - while (getRequest != null) { - try (CloseableHttpResponse httpResponse = httpclient.execute(target, getRequest)) { - - int statusCode = httpResponse.getStatusLine().getStatusCode(); - - if (statusCode!=200) { - if (statusCode==429) { - //got throttled, delaying... - log.warn("SH endpoint rate limit reached, delaying for " + params.getDelayMillis() - + " ms, server response: " + EntityUtils.toString(httpResponse.getEntity())); - Thread.sleep(params.getDelayMillis()); - continue; - } else { - String errMessage = "got unhandled HTTP status code when accessing SH endpoint: " - + statusCode + ", full status: " + httpResponse.getStatusLine() - + ", server response: " + EntityUtils.toString(httpResponse.getEntity()); - if (retryCount < params.getMaxRetryCount()) { - retryCount++; - log.error(errMessage + ", number of retries left: " + (params.getMaxRetryCount()-retryCount)); - Thread.sleep(params.getDelayMillis()); - continue; - } else { - log.error("exceeding the allowed number of retries: " + params.getMaxRetryCount() + ", interrupting..."); - throw new RuntimeException(errMessage); - } - } - } else { - if (retryCount > 0) { - retryCount=0; - } - } - - HttpEntity entity = httpResponse.getEntity(); - if (entity != null) { - SoftwareHeritageOriginEntry[] entries = parsePage(EntityUtils.toString(entity), gson); - if (entries != null && entries.length > 0) { - for (SoftwareHeritageOriginEntry entry : entries) { - originsWriter.append(convertEntry(entry)); - counters.increment(COUNTER_NAME_TOTAL); - currentCount++; - if (currentCount % progressLogInterval == 0) { - log.info("current progress: " + currentCount + ", last package of " - + progressLogInterval + " processed in " - + ((System.currentTimeMillis() - startTime) / 1000) + " secs"); - startTime = System.currentTimeMillis(); - } - } - } - } - - getRequest = prepareNextRequest(httpResponse); - } catch (SocketTimeoutException e) { - if (retryCount < params.getMaxRetryCount()) { - retryCount++; - log.warn("got timeout exception while accessing SH endpoint, number of retries left: " - + (params.getMaxRetryCount() - retryCount), e); - Thread.sleep(params.getDelayMillis()); - continue; - } else { - log.error("exceeding the allowed number of retries: " + params.getMaxRetryCount() - + ", interrupting..."); - throw e; - } - } - } - - log.info("total number of processed records: " + currentCount); - } - } else { - log.warn("no endpoint URI provided, working in offline mode"); - } - - if (counters.currentValue(COUNTER_NAME_TOTAL) == 0) { - log.warn("no records imported from SH URI: " + params.getShEndpointUriRoot()); - } - countersWriter.writeCounters(counters, System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); - storeNextElementIndex(params.getStartElementIndex() + currentCount); - } - - /** - * Provides {@link SoftwareHeritageOrigin} writer consuming records. - */ - protected DataFileWriter getWriter(FileSystem fs, PortBindings portBindings) - throws IOException { - return DataStore.create(new FileSystemPath(fs, portBindings.getOutput().get(PORT_OUT_ORIGINS)), - SoftwareHeritageOrigin.SCHEMA$); - } - - /** - * Builds HTTP client issuing requests to SH endpoint. - */ - protected CloseableHttpClient buildHttpClient(int connectionTimeout, int readTimeout) { - return HttpClientUtils.buildHttpClient(connectionTimeout, readTimeout); - } - - protected static void storeNextElementIndex(int nextElementIndex) throws IOException { - File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); - - Properties props = new Properties(); - - if (file.exists()) { - // loading properties first, may include counters so we cannot override it - FileInputStream is = new FileInputStream(file); - try { - props.load(is); - } finally { - is.close(); - } - } - - props.put(OUTPUT_PROPERTY_NEXT_RECORD_INDEX, String.valueOf(nextElementIndex)); - - OutputStream os = new FileOutputStream(file); - try { - props.store(os, ""); - } finally { - os.close(); - } - } - - protected static String buildUri(String rootUri, int startElement, int pageSize) { - StringBuilder strBuilder = new StringBuilder(rootUri); - strBuilder.append("?page_token="); - strBuilder.append(startElement); - strBuilder.append("&origin_count="); - strBuilder.append(pageSize); - return strBuilder.toString(); - } - - protected static SoftwareHeritageOriginEntry[] parsePage(String originsPage, Gson gson) { - if (StringUtils.isNotBlank(originsPage)) { - try { - return gson.fromJson(originsPage, SoftwareHeritageOriginEntry[].class); - } catch (JsonSyntaxException e) { - throw new RuntimeException("invalid page contents: \n" + originsPage, e); - } - } else { - return new SoftwareHeritageOriginEntry[0]; - } - } - - protected static SoftwareHeritageOrigin convertEntry(SoftwareHeritageOriginEntry source) { - SoftwareHeritageOrigin.Builder resultBuilder = SoftwareHeritageOrigin.newBuilder(); - resultBuilder.setUrl(source.getUrl()); - return resultBuilder.build(); - } - - protected static String getNextLinkFromHeaders(Header[] headers) { - if (headers != null) { - for (int i = 0; i < headers.length; i++) { - if (HEADER_LINK.equals(headers[i].getName())) { - return getNextLinkFromHeader(headers[i].getValue()); - } - } - } - return null; - } - - protected static String getNextLinkFromHeader(String linkHeader) { - if (StringUtils.isNotBlank(linkHeader)) { - String[] links = linkHeader.split(DELIM_LINKS); - for (String link : links) { - String[] segments = link.split(DELIM_LINK_PARAM); - if (segments.length < 2) - continue; - - String linkPart = segments[0].trim(); - if (!linkPart.startsWith("<") || !linkPart.endsWith(">")) { - continue; - } - linkPart = linkPart.substring(1, linkPart.length() - 1); - - for (int i = 1; i < segments.length; i++) { - String[] rel = segments[i].trim().split("="); - if (rel.length < 2 || !META_REL.equals(rel[0])) { - continue; - } - String relValue = rel[1]; - if (relValue.startsWith("\"") && relValue.endsWith("\"")) { - relValue = relValue.substring(1, relValue.length() - 1); - } - if (META_NEXT.equals(relValue)) { - return linkPart; - } - - } - } - } - - return null; - } - - /** - * Prepares next request based on a link from header. Returns null when next page is not available. - */ - protected static HttpRequest prepareNextRequest(CloseableHttpResponse httpResponse) { - String nextUrl = getNextLinkFromHeaders(httpResponse.getAllHeaders()); - if (StringUtils.isNotBlank(nextUrl)) { - return new HttpGet(nextUrl); - } else { - return null; - } - } - - /** - * Set of parsed input parameters. - * - */ - static class SoftwareHeritageOriginsImporterParams { - - private final String shEndpointUriRoot; - - private final String shEndpointHost; - - private final String shEndpointScheme; - - private final int shEndpointPort; - - private final int startElementIndex; - - private final int pageSize; - - private final int connectionTimeout; - - private final int readTimeout; - - private final int delayMillis; - - private final int maxRetryCount; - - public SoftwareHeritageOriginsImporterParams(Map parameters) { - Preconditions.checkArgument(parameters.containsKey(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_URI_ROOT), - "unknown software heritage endpoint URI, required parameter '%s' is missing!", - IMPORT_SOFTWARE_HERITAGE_ENDPOINT_URI_ROOT); - this.shEndpointUriRoot = parameters.get(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_URI_ROOT); - - Preconditions.checkArgument(parameters.containsKey(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_HOST), - "unknown software heritage endpoint host, required parameter '%s' is missing!", - IMPORT_SOFTWARE_HERITAGE_ENDPOINT_HOST); - this.shEndpointHost = parameters.get(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_HOST); - - Preconditions.checkArgument(parameters.containsKey(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_SCHEME), - "unknown software heritage endpoint scheme (e.g. https), required parameter '%s' is missing!", - IMPORT_SOFTWARE_HERITAGE_ENDPOINT_SCHEME); - this.shEndpointScheme = parameters.get(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_SCHEME); - - Preconditions.checkArgument(parameters.containsKey(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_PORT), - "unknown software heritage endpoint port, required parameter '%s' is missing!", - IMPORT_SOFTWARE_HERITAGE_ENDPOINT_PORT); - this.shEndpointPort = Integer.parseInt(parameters.get(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_PORT)); - - Preconditions.checkArgument(parameters.containsKey(IMPORT_SOFTWARE_HERITAGE_START_INDEX), - "unknown software heritage start element, required parameter '%s' is missing!", - IMPORT_SOFTWARE_HERITAGE_START_INDEX); - this.startElementIndex = Integer.parseInt(WorkflowRuntimeParameters - .getParamValueWithUndefinedCheck(IMPORT_SOFTWARE_HERITAGE_START_INDEX, "1", parameters)); - - this.pageSize = Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_SOFTWARE_HERITAGE_PAGE_SIZE, - SOFTWARE_HERITAGE_PAGE_SIZE_DEFAULT_VALUE, parameters)); - - this.connectionTimeout = Integer.parseInt(WorkflowRuntimeParameters - .getParamValue(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_CONNECTION_TIMEOUT, "60000", parameters)); - this.readTimeout = Integer.parseInt(WorkflowRuntimeParameters - .getParamValue(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_READ_TIMEOUT, "60000", parameters)); - this.delayMillis = Integer.parseInt(WorkflowRuntimeParameters - .getParamValue(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY, "10000", parameters)); - - this.maxRetryCount = Integer.parseInt(WorkflowRuntimeParameters - .getParamValueWithUndefinedCheck(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RETRY_COUNT, "10", parameters)); - } - - public String getShEndpointUriRoot() { - return shEndpointUriRoot; - } - - public String getShEndpointHost() { - return shEndpointHost; - } - - public String getShEndpointScheme() { - return shEndpointScheme; - } - - public int getShEndpointPort() { - return shEndpointPort; - } - - public int getStartElementIndex() { - return startElementIndex; - } - - public int getPageSize() { - return pageSize; - } - - public int getConnectionTimeout() { - return connectionTimeout; - } - - public int getReadTimeout() { - return readTimeout; - } - - public int getDelayMillis() { - return delayMillis; - } - - public int getMaxRetryCount() { - return maxRetryCount; - } - - } - -} diff --git a/iis-wf/iis-wf-import/src/test/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporterTest.java b/iis-wf/iis-wf-import/src/test/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporterTest.java deleted file mode 100644 index 0ef62d1f7..000000000 --- a/iis-wf/iis-wf-import/src/test/java/eu/dnetlib/iis/wf/importer/software/origins/SoftwareHeritageOriginsImporterTest.java +++ /dev/null @@ -1,659 +0,0 @@ -package eu.dnetlib.iis.wf.importer.software.origins; - -import com.google.gson.Gson; -import eu.dnetlib.iis.common.WorkflowRuntimeParameters; -import eu.dnetlib.iis.common.java.PortBindings; -import eu.dnetlib.iis.common.java.porttype.AvroPortType; -import eu.dnetlib.iis.common.java.porttype.PortType; -import eu.dnetlib.iis.referenceextraction.softwareurl.schemas.SoftwareHeritageOrigin; -import org.apache.avro.file.DataFileWriter; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.http.*; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.message.BasicHeader; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.io.TempDir; -import org.mockito.ArgumentCaptor; -import org.mockito.Captor; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -import java.io.*; -import java.net.SocketTimeoutException; -import java.nio.file.Files; -import java.util.*; - -import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; -import static eu.dnetlib.iis.wf.importer.ImportWorkflowRuntimeParameters.*; -import static eu.dnetlib.iis.wf.importer.VerificationUtils.verifyReport; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.*; - -/** - * {@link SoftwareHeritageOriginsImporter} test class. - * @author mhorst - * - */ -@ExtendWith(MockitoExtension.class) -public class SoftwareHeritageOriginsImporterTest { - - private PortBindings portBindings; - - private Configuration conf; - - private Map parameters; - - @TempDir - public File tempFolder; - - @Mock - private CloseableHttpClient httpClient; - - @Mock - private DataFileWriter originWriter; - - @Captor - private ArgumentCaptor originCaptor; - - @Test - public void testGetNextLinkFromHeaderSingle() throws Exception { - // given - String linkHeader = "; rel=\"next\""; - - // execute - String result = SoftwareHeritageOriginsImporter.getNextLinkFromHeader(linkHeader); - - // assert - assertNotNull(result); - assertEquals("/api/next", result); - } - - @Test - public void testGetNextLinkFromHeaderNullOrBlank() throws Exception { - // execute & assert - assertNull(SoftwareHeritageOriginsImporter.getNextLinkFromHeader(null)); - assertNull(SoftwareHeritageOriginsImporter.getNextLinkFromHeader("")); - } - - @Test - public void testGetNextLinkFromHeaderMultiple() throws Exception { - // given - String linkHeader = "; rel=\"next\", ; rel=\"prev\""; - - // execute - String result = SoftwareHeritageOriginsImporter.getNextLinkFromHeader(linkHeader); - - // assert - assertNotNull(result); - assertEquals("/api/next", result); - } - - @Test - public void testGetNextLinkFromHeaders() throws Exception { - // given - Header header = new BasicHeader("Link", "; rel=\"next\""); - - // execute - String result = SoftwareHeritageOriginsImporter.getNextLinkFromHeaders(new Header[] {header}); - - // assert - assertNotNull(result); - assertEquals("/api/next", result); - } - - @Test - public void testGetNextLinkFromHeadersNullEmptyOrUnsupported() throws Exception { - // execute & assert - assertNull(SoftwareHeritageOriginsImporter.getNextLinkFromHeaders(null)); - assertNull(SoftwareHeritageOriginsImporter.getNextLinkFromHeaders(new Header[0])); - assertNull(SoftwareHeritageOriginsImporter.getNextLinkFromHeaders(new Header[] {new BasicHeader("unsupported", "value")})); - } - - @Test - public void testPrepareNextRequestUrlNotNull() throws Exception { - // given - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - Header header = new BasicHeader("Link", "; rel=\"next\""); - when(httpResponse.getAllHeaders()).thenReturn(new Header[] {header}); - - // execute - HttpRequest result = SoftwareHeritageOriginsImporter.prepareNextRequest(httpResponse); - - // assert - assertNotNull(result); - assertEquals("/api/next", result.getRequestLine().getUri()); - - } - - @Test - public void testPrepareNextRequestUrlIsNull() throws Exception { - // given - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - when(httpResponse.getAllHeaders()).thenReturn(null); - - // execute - HttpRequest result = SoftwareHeritageOriginsImporter.prepareNextRequest(httpResponse); - - // assert - assertNull(result); - } - - @Test - public void testConvertEntry() throws Exception { - // given - SoftwareHeritageOriginEntry source = buildSoftwareHeritageOriginEntry("someUrl"); - - // execute - SoftwareHeritageOrigin result = SoftwareHeritageOriginsImporter.convertEntry(source); - - // assert - assertNotNull(result); - assertEquals(source.getUrl(), result.getUrl()); - } - - @Test - public void testParsePage() throws Exception { - // given - Gson gson = new Gson(); - SoftwareHeritageOriginEntry entry1 = buildSoftwareHeritageOriginEntry("someUrl1"); - SoftwareHeritageOriginEntry entry2 = buildSoftwareHeritageOriginEntry("someUrl2"); - - // execute - SoftwareHeritageOriginEntry[] results = SoftwareHeritageOriginsImporter.parsePage( - gson.toJson(new SoftwareHeritageOriginEntry[] {entry1, entry2}), gson); - - // assert - assertNotNull(results); - assertEquals(2, results.length); - assertEquals(entry1.getUrl(), results[0].getUrl()); - assertEquals(entry2.getUrl(), results[1].getUrl()); - } - - @Test - public void testParseBlankPage() throws Exception { - // given - Gson gson = new Gson(); - - // execute - SoftwareHeritageOriginEntry[] results = SoftwareHeritageOriginsImporter.parsePage("", gson); - - // assert - assertNotNull(results); - assertEquals(0, results.length); - } - - @Test - public void testParseInvalidPage() throws Exception { - // given - Gson gson = new Gson(); - - // execute - assertThrows(RuntimeException.class, () -> SoftwareHeritageOriginsImporter.parsePage("invalid", gson)); - } - - @Test - public void testBuildUri() throws Exception { - // given - String rootUri = "rootUriPart"; - int startElement = 1; - int pageSize = 10; - - // execute - String result = SoftwareHeritageOriginsImporter.buildUri(rootUri, startElement, pageSize); - - // assert - assertNotNull(result); - assertEquals("rootUriPart?page_token=1&origin_count=10", result); - } - - @Test - public void testStoreNextElementIndex() throws Exception { - // given - File propertyFile = Files.createTempFile(tempFolder.toPath(), "testStoreNextElementIndex", "tmp").toFile(); - System.setProperty(WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME, propertyFile.getAbsolutePath()); - int nextElementIndex = 2; - - // execute - SoftwareHeritageOriginsImporter.storeNextElementIndex(nextElementIndex); - - // assert - Properties props = new Properties(); - try (FileInputStream fis = new FileInputStream(propertyFile)) { - props.load(fis); - assertEquals(String.valueOf(nextElementIndex), props.getProperty(SoftwareHeritageOriginsImporter.OUTPUT_PROPERTY_NEXT_RECORD_INDEX)); - } - } - - @Test - public void testGetOutputPorts() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = new SoftwareHeritageOriginsImporter(); - - // execute - Map result = importer.getOutputPorts(); - - // assert - assertNotNull(result); - assertNotNull(result.get(SoftwareHeritageOriginsImporter.PORT_OUT_ORIGINS)); - assertTrue(result.get(SoftwareHeritageOriginsImporter.PORT_OUT_ORIGINS) instanceof AvroPortType); - assertSame(SoftwareHeritageOrigin.SCHEMA$, ((AvroPortType) result.get(SoftwareHeritageOriginsImporter.PORT_OUT_ORIGINS)).getSchema()); - } - - @Test - public void testRunWithoutNextHeader() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - when(httpClient.execute(any(HttpHost.class),any(HttpGet.class))).thenReturn(httpResponse); - StatusLine statusLine = mock(StatusLine.class); - when(httpResponse.getStatusLine()).thenReturn(statusLine); - when(statusLine.getStatusCode()).thenReturn(200); - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse.getEntity()).thenReturn(httpEntity); - // preparing page contents - SoftwareHeritageOriginEntry entry = buildSoftwareHeritageOriginEntry("someUrl1"); - Gson gson = new Gson(); - String pageContents = gson.toJson(new SoftwareHeritageOriginEntry[] {entry}); - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - - // execute - importer.run(portBindings, conf, parameters); - - // assert - verify(originWriter, times(1)).append(originCaptor.capture()); - List origins = originCaptor.getAllValues(); - assertEquals(1, origins.size()); - SoftwareHeritageOrigin origin = origins.get(0); - assertNotNull(origin); - assertEquals(entry.getUrl(), origin.getUrl()); - verifyReport(1, SoftwareHeritageOriginsImporter.COUNTER_NAME_TOTAL); - } - - @Test - public void testRunWithNextHeader() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - CloseableHttpResponse httpResponse2 = mock(CloseableHttpResponse.class); - when(httpClient.execute(any(HttpHost.class),any(HttpGet.class))).thenReturn(httpResponse, httpResponse2); - - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(200); - - Gson gson = new Gson(); - SoftwareHeritageOriginEntry entry1 = buildSoftwareHeritageOriginEntry("someUrl1"); - SoftwareHeritageOriginEntry entry2 = buildSoftwareHeritageOriginEntry("someUrl2"); - - //1st response - { - Header header = new BasicHeader("Link", "; rel=\"next\""); - when(httpResponse.getAllHeaders()).thenReturn(new Header[] {header}); - - when(httpResponse.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = gson.toJson(new SoftwareHeritageOriginEntry[] {entry1}); - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - - } - //2nd response - { - when(httpResponse2.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse2.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = gson.toJson(new SoftwareHeritageOriginEntry[] {entry2}); - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - } - // execute - importer.run(portBindings, conf, parameters); - - // assert - verify(originWriter, times(2)).append(originCaptor.capture()); - List origins = originCaptor.getAllValues(); - assertEquals(2, origins.size()); - assertNotNull(origins.get(0)); - assertEquals(entry1.getUrl(), origins.get(0).getUrl()); - assertNotNull(origins.get(1)); - assertEquals(entry2.getUrl(), origins.get(1).getUrl()); - verifyReport(2, SoftwareHeritageOriginsImporter.COUNTER_NAME_TOTAL); - } - - @Test - public void testRunWithRetryBecauseOfThrottling() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY, "1"); - - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - CloseableHttpResponse httpResponse2 = mock(CloseableHttpResponse.class); - when(httpClient.execute(any(HttpHost.class),any(HttpGet.class))).thenReturn(httpResponse, httpResponse2); - - Gson gson = new Gson(); - SoftwareHeritageOriginEntry entry = buildSoftwareHeritageOriginEntry("someUrl1"); - - //1st response - { - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(429); - when(httpResponse.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = "throttling"; - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - - } - //2nd response - { - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(200); - when(httpResponse2.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse2.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = gson.toJson(new SoftwareHeritageOriginEntry[] {entry}); - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - } - // execute - importer.run(portBindings, conf, parameters); - - // assert - verify(originWriter, times(1)).append(originCaptor.capture()); - List origins = originCaptor.getAllValues(); - assertEquals(1, origins.size()); - assertNotNull(origins.get(0)); - assertEquals(entry.getUrl(), origins.get(0).getUrl()); - verifyReport(1, SoftwareHeritageOriginsImporter.COUNTER_NAME_TOTAL); - - } - - @Test - public void testRunWithRetryBecauseOfServerError() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY, "1"); - - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - CloseableHttpResponse httpResponse2 = mock(CloseableHttpResponse.class); - when(httpClient.execute(any(HttpHost.class),any(HttpGet.class))).thenReturn(httpResponse, httpResponse2); - - Gson gson = new Gson(); - SoftwareHeritageOriginEntry entry = buildSoftwareHeritageOriginEntry("someUrl1"); - - //1st response - { - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(503); - when(httpResponse.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = "SERVER ERROR"; - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - - } - //2nd response - { - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(200); - when(httpResponse2.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse2.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = gson.toJson(new SoftwareHeritageOriginEntry[] {entry}); - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - } - // execute - importer.run(portBindings, conf, parameters); - - // assert - verify(originWriter, times(1)).append(originCaptor.capture()); - List origins = originCaptor.getAllValues(); - assertEquals(1, origins.size()); - assertNotNull(origins.get(0)); - assertEquals(entry.getUrl(), origins.get(0).getUrl()); - verifyReport(1, SoftwareHeritageOriginsImporter.COUNTER_NAME_TOTAL); - } - - @Test - public void testRunWithRetryBecauseOfTimeoutException() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY, "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RETRY_COUNT, "1"); - - CloseableHttpResponse httpResponse2 = mock(CloseableHttpResponse.class); - when(httpClient.execute(any(HttpHost.class), any(HttpGet.class))).thenThrow(SocketTimeoutException.class) - .thenReturn(httpResponse2); - - Gson gson = new Gson(); - SoftwareHeritageOriginEntry entry = buildSoftwareHeritageOriginEntry("someUrl1"); - - //response - { - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(200); - when(httpResponse2.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse2.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = gson.toJson(new SoftwareHeritageOriginEntry[] {entry}); - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - } - // execute - importer.run(portBindings, conf, parameters); - - // assert - verify(originWriter, times(1)).append(originCaptor.capture()); - List origins = originCaptor.getAllValues(); - assertEquals(1, origins.size()); - assertNotNull(origins.get(0)); - assertEquals(entry.getUrl(), origins.get(0).getUrl()); - verifyReport(1, SoftwareHeritageOriginsImporter.COUNTER_NAME_TOTAL); - } - - @Test - public void testExceedTheAllowedRetriesAfterTimeoutException() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RATELIMIT_DELAY, "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RETRY_COUNT, "1"); - - when(httpClient.execute(any(HttpHost.class), any(HttpGet.class))).thenThrow(SocketTimeoutException.class); - - // execute - assertThrows(SocketTimeoutException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunWithoutRetryBecauseOfServerError() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "1"); - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_RETRY_COUNT, "0"); - - CloseableHttpResponse httpResponse = mock(CloseableHttpResponse.class); - when(httpClient.execute(any(HttpHost.class),any(HttpGet.class))).thenReturn(httpResponse); - - StatusLine statusLine = mock(StatusLine.class); - when(statusLine.getStatusCode()).thenReturn(503); - when(httpResponse.getStatusLine()).thenReturn(statusLine); - - HttpEntity httpEntity = mock(HttpEntity.class); - when(httpResponse.getEntity()).thenReturn(httpEntity); - - // preparing page contents - String pageContents = "SERVER ERROR"; - InputStream pageInputStream = new ByteArrayInputStream(pageContents.getBytes()); - when(httpEntity.getContentLength()).thenReturn(Long.valueOf(pageContents.length())); - when(httpEntity.getContent()).thenReturn(pageInputStream); - - // execute - assertThrows(RuntimeException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointUriMissing() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams(null, "somehost.com", "https", "8080", "1"); - - // execute - assertThrows(IllegalArgumentException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointUriUndefinedAkaOfflineMode() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams(WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE, "somehost.com", "https", "8080", "1"); - - // execute - importer.run(portBindings, conf, parameters); - - // assert - verify(originWriter, times(0)).append(originCaptor.capture()); - List origins = originCaptor.getAllValues(); - assertEquals(0, origins.size()); - verifyReport(0, SoftwareHeritageOriginsImporter.COUNTER_NAME_TOTAL); - } - - @Test - public void testRunEndpointHostMissing() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", null, "https", "8080", "1"); - - // execute - assertThrows(IllegalArgumentException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointSchemeMissing() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", null, "8080", "1"); - - // execute - assertThrows(IllegalArgumentException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointPortMissing() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", null, "1"); - - // execute - assertThrows(IllegalArgumentException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointPortInvalidValue() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "invalid", "1"); - - // execute - assertThrows(IllegalArgumentException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointStartIndexMissing() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", null); - - // execute - assertThrows(IllegalArgumentException.class, () -> importer.run(portBindings, conf, parameters)); - } - - @Test - public void testRunEndpointStartIndexInvalidValue() throws Exception { - // given - SoftwareHeritageOriginsImporter importer = initializeImporterParams("api/origins", "somehost.com", "https", "8080", "invalid"); - - // execute - assertThrows(NumberFormatException.class, () -> importer.run(portBindings, conf, parameters)); - } - - // ------------------------------ PRIVATE ------------------------------------- - - private SoftwareHeritageOriginsImporter initializeImporterParams(String uriRoot, String host, String scheme, String port, - String startElementIndex) throws Exception { - System.setProperty(OOZIE_ACTION_OUTPUT_FILENAME, - tempFolder.getAbsolutePath() + File.separatorChar + "test.properties"); - - Map output = new HashMap<>(); - output.put(SoftwareHeritageOriginsImporter.PORT_OUT_ORIGINS, new Path("/irrelevant/location/as/it/will/be/mocked")); - this.portBindings = new PortBindings(Collections.emptyMap(), output); - this.conf = new Configuration(); - this.parameters = new HashMap<>(); - if (uriRoot != null) { - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_URI_ROOT, uriRoot); - } - if (host != null) { - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_HOST, host); - } - if (scheme != null) { - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_SCHEME, scheme); - } - if (port != null) { - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_ENDPOINT_PORT, port); - } - if (startElementIndex != null) { - this.parameters.put(IMPORT_SOFTWARE_HERITAGE_START_INDEX, startElementIndex); - } - - return new SoftwareHeritageOriginsImporter() { - - @Override - protected DataFileWriter getWriter(FileSystem fs, PortBindings portBindings) throws IOException { - return originWriter; - } - - @Override - protected CloseableHttpClient buildHttpClient(int connectionTimeout, int readTimeout) { - return httpClient; - } - - }; - } - - private SoftwareHeritageOriginEntry buildSoftwareHeritageOriginEntry(String url) { - SoftwareHeritageOriginEntry entry = new SoftwareHeritageOriginEntry(); - entry.setUrl(url); - return entry; - } - -}