Skip to content

Commit

Permalink
Merge pull request #246 from clarin-eric/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
wowasa authored Jun 4, 2024
2 parents 738589c + c51cd50 commit c7f9d93
Show file tree
Hide file tree
Showing 36 changed files with 420 additions and 287 deletions.
Binary file modified .mvn/wrapper/maven-wrapper.jar
Binary file not shown.
6 changes: 3 additions & 3 deletions .mvn/wrapper/maven-wrapper.properties
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.5/apache-maven-3.8.5-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.1.1/maven-wrapper-3.1.1.jar
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.6/apache-maven-3.9.6-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 6.4.1
- fix for rendering issue in tables (issue #245)
- adding back to current link in historic reports (issue #242)
- re-engineering exception handling to stop thread pool (re-opend issue #218)
- adding identification and timeout for schema look-up (re-opened issues #222, #231)

# 6.4.0
- allowing proxy usage for external HTTP calls (issue #227)
- sending User-Agent string with HTTP calls (issue #222)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import eu.clarin.cmdi.curation.api.report.instance.CMDInstanceReport;
import eu.clarin.cmdi.curation.api.report.linkchecker.LinkcheckerDetailReport;
import eu.clarin.cmdi.curation.api.report.profile.CMDProfileReport;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;

import java.net.MalformedURLException;
import java.net.URL;
Expand Down Expand Up @@ -67,7 +68,7 @@ public interface CurationModule {
* @param path the path
* @return the collection report
*/
public CollectionReport processCollection(Path path);
public CollectionReport processCollection(Path path) throws MalFunctioningProcessorException;

/**
* Gets linkchecker detail reports.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import eu.clarin.cmdi.curation.api.report.linkchecker.LinkcheckerDetailReport.StatusDetailReport;
import eu.clarin.cmdi.curation.api.report.profile.CMDProfileReport;
import eu.clarin.cmdi.curation.api.utils.FileNameEncoder;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import eu.clarin.cmdi.curation.pph.conf.PPHConfig;
import eu.clarin.linkchecker.persistence.model.AggregatedStatus;
import eu.clarin.linkchecker.persistence.model.Status;
Expand Down Expand Up @@ -80,7 +81,12 @@ public CMDProfileReport processCMDProfile(String profileId) {
*/
@Override
public CMDProfileReport processCMDProfile(URL schemaLocation) {
return ctx.getBean(CMDProfile.class, schemaLocation.toString(), "1.x").generateReport();
try {
return ctx.getBean(CMDProfile.class, schemaLocation.toString(), "1.x").generateReport();
}
catch (MalFunctioningProcessorException e) {
throw new RuntimeException(e);
}
}

/**
Expand Down Expand Up @@ -117,7 +123,7 @@ public CMDInstanceReport processCMDInstance(Path path) {
try {
return ctx.getBean(CMDInstance.class, path, Files.size(path), "testProvider").generateReport();
}
catch (IOException|BeansException e) {
catch (IOException | BeansException | MalFunctioningProcessorException e) {

throw new RuntimeException(e);

Expand Down Expand Up @@ -160,7 +166,7 @@ public CMDInstanceReport processCMDInstance(URL url) {

return report;
}
catch (IOException e) {
catch (IOException | MalFunctioningProcessorException e) {

throw new RuntimeException(e);

Expand All @@ -174,7 +180,7 @@ public CMDInstanceReport processCMDInstance(URL url) {
* @return the collection report
*/
@Override
public CollectionReport processCollection(Path path) {
public CollectionReport processCollection(Path path) throws MalFunctioningProcessorException {

return ctx.getBean(CMDCollection.class, path).generateReport();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import eu.clarin.cmdi.curation.api.entity.CMDProfile;
import eu.clarin.cmdi.curation.api.processor.CMDProfileProcessor;
import eu.clarin.cmdi.curation.api.report.profile.CMDProfileReport;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;
Expand All @@ -21,7 +22,7 @@ public class ProfileReportCache {
CMDProfileProcessor processor;

@Cacheable(value = "publicProfileReportCache", key = "#profile.schemaLocation", condition = "!'instance'.equals(@apiConfig.getMode())")
public CMDProfileReport getProfileReport(CMDProfile profile) {
public CMDProfileReport getProfileReport(CMDProfile profile) throws MalFunctioningProcessorException {

return processor.process(profile);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import eu.clarin.cmdi.curation.api.processor.CMDInstanceProcessor;
import eu.clarin.cmdi.curation.api.report.instance.CMDInstanceReport;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import eu.clarin.cmdi.vlo.importer.CMDIData;
import eu.clarin.cmdi.vlo.importer.CMDIRecordProcessor;
import eu.clarin.cmdi.vlo.importer.processor.ValueSet;
Expand Down Expand Up @@ -71,7 +72,7 @@ public Optional<CMDIData<Map<String, List<ValueSet>>>> getCmdiData() {
return this.cmdiData;
}

public CMDInstanceReport generateReport() {
public CMDInstanceReport generateReport() throws MalFunctioningProcessorException {

return processor.process(this);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import eu.clarin.cmdi.curation.api.cache.ProfileReportCache;
import eu.clarin.cmdi.curation.api.report.profile.CMDProfileReport;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import lombok.Data;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Scope;
Expand All @@ -28,7 +29,7 @@ public CMDProfile(String schemaLocation, String cmdiVersion) {
this.cmdiVersion = cmdiVersion;
}

public CMDProfileReport generateReport() {
public CMDProfileReport generateReport() throws MalFunctioningProcessorException {
return profileReportCache.getProfileReport(this);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package eu.clarin.cmdi.curation.api.exception;

public class MalFunctioningProcessorException extends Exception{
public MalFunctioningProcessorException() {
}

public MalFunctioningProcessorException(Throwable cause) {
super(cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import eu.clarin.cmdi.curation.api.report.instance.CMDInstanceReport;
import eu.clarin.cmdi.curation.api.subprocessor.AbstractSubprocessor;
import eu.clarin.cmdi.curation.api.subprocessor.instance.*;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import jakarta.annotation.PostConstruct;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
Expand All @@ -30,16 +31,17 @@ private void init() {
.map(abstactSubprocessorClass -> ctx.getBean(abstactSubprocessorClass)).collect(Collectors.toList());
}

public CMDInstanceReport process(CMDInstance instance){
public CMDInstanceReport process(CMDInstance instance) throws MalFunctioningProcessorException {

final CMDInstanceReport instanceReport = new CMDInstanceReport();


this.subprocessors
.stream()
.takeWhile(p -> instanceReport.isProcessable)
.forEach(subprocessor -> subprocessor.process(instance, instanceReport));
for(AbstractSubprocessor subprocessor : this.subprocessors) {

if(!instanceReport.isProcessable) break;

subprocessor.process(instance, instanceReport);
}

return instanceReport;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import eu.clarin.cmdi.curation.api.subprocessor.profile.ProfileConceptHandler;
import eu.clarin.cmdi.curation.api.subprocessor.profile.ProfileFacetHandler;
import eu.clarin.cmdi.curation.api.subprocessor.profile.ProfileHeaderHandler;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Scope;
Expand All @@ -23,7 +24,7 @@ public class CMDProfileProcessor {
@Autowired
ApplicationContext ctx;

public CMDProfileReport process(CMDProfile profile) {
public CMDProfileReport process(CMDProfile profile) throws MalFunctioningProcessorException {

CMDProfileReport report = new CMDProfileReport();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
*/
package eu.clarin.cmdi.curation.api.subprocessor;

import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;

/**
* The type Abstract subprocessor.
*
Expand All @@ -17,6 +19,6 @@ public abstract class AbstractSubprocessor<E, R> {
* @param entity the entity
* @param report the report
*/
public abstract void process(E entity, R report);
public abstract void process(E entity, R report) throws MalFunctioningProcessorException;

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import eu.clarin.cmdi.curation.api.report.collection.sec.ProfileReport.Profile;
import eu.clarin.cmdi.curation.api.report.collection.sec.ResProxyReport.InvalidReference;
import eu.clarin.cmdi.curation.api.report.instance.CMDInstanceReport;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import eu.clarin.linkchecker.persistence.model.AggregatedStatus;
import eu.clarin.linkchecker.persistence.repository.AggregatedStatusRepository;
import eu.clarin.linkchecker.persistence.repository.UrlRepository;
Expand All @@ -32,8 +33,7 @@
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.*;
import java.util.stream.Stream;

/**
Expand All @@ -53,7 +53,9 @@ public class CollectionAggregator {
@Autowired
private UrlRepository uRep;

private Map<String, Collection<String>> mdSelfLinks = new HashMap<String, Collection<String>>();
private final Map<String, Collection<String>> mdSelfLinks = new HashMap<String, Collection<String>>();

private int counter;

/**
* Process.
Expand All @@ -69,7 +71,28 @@ public void process(CMDCollection collection, CollectionReport collectionReport)

collectionReport.fileReport.collectionRoot = conf.getDirectory().getDataRoot().relativize(collection.getPath()).toString();

ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(conf.getThreadpoolSize());
//ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(conf.getThreadpoolSize());
ThreadPoolExecutor executor = new ThreadPoolExecutor(conf.getThreadpoolSize(), conf.getThreadpoolSize(), 1, TimeUnit.DAYS, new LinkedBlockingQueue<>()){

@Override
protected void afterExecute(Runnable r, Throwable t) {

if (t == null
&& r instanceof Future<?>
&& ((Future<?>)r).isDone()) {
try {
((Future<?>) r).get();
}
catch (CancellationException | ExecutionException | InterruptedException e) {
t = e;
}
}
if(t != null){
log.debug("", t);
this.shutdownNow();
}
}
};

try {
Files.walkFileTree(collection.getPath(), new FileVisitor<Path>() {
Expand All @@ -95,15 +118,22 @@ public FileVisitResult visitFile(Path filePath, BasicFileAttributes attrs) throw

collectionReport.fileReport.size += attrs.size();

CMDInstance instance = ctx.getBean(CMDInstance.class, filePath, attrs.size(),
final CMDInstance instance = ctx.getBean(CMDInstance.class, filePath, attrs.size(),
collectionReport.fileReport.provider);

executor.submit(() -> {
executor.execute(() -> {

CMDInstanceReport instanceReport = null;
try {
instanceReport = instance.generateReport();
}
catch (MalFunctioningProcessorException e) {
throw new RuntimeException(e);
}

CMDInstanceReport instanceReport = instance.generateReport();
addReport(collectionReport, instanceReport);
addReport(collectionReport, instanceReport);

}); // end executor.submit
}); // end executor.execute

return FileVisitResult.CONTINUE;
}
Expand All @@ -130,17 +160,14 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx

executor.shutdown();

while (!executor.isTerminated()) {
try {
Thread.sleep(1000);
}
catch (InterruptedException ex) {
log.error("Error occured while waiting for the threadpool to terminate.");
}
try {
executor.awaitTermination(1, TimeUnit.HOURS);
}
catch (InterruptedException e) {
log.error("Error occured while waiting for the threadpool to terminate.");
}

calculateAverages(collectionReport);

}

/**
Expand Down Expand Up @@ -233,13 +260,11 @@ public synchronized void addReport(CollectionReport collectionReport, CMDInstanc
.get()
.count++
);
collectionReport.facetReport.aggregatedScore += instanceReport.facetReport.score;
}
else {
collectionReport.fileReport.numOfFilesNonProcessable++;
}

collectionReport.facetReport.aggregatedScore += instanceReport.facetReport.score;

}

private void calculateAverages(CollectionReport collectionReport) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
import eu.clarin.cmdi.curation.api.report.profile.sec.ConceptReport;
import eu.clarin.cmdi.curation.api.subprocessor.AbstractSubprocessor;
import eu.clarin.cmdi.curation.api.xml.XPathValueService;
import eu.clarin.cmdi.curation.ccr.exception.CCRServiceNotAvailableException;
import eu.clarin.cmdi.curation.api.exception.MalFunctioningProcessorException;
import eu.clarin.cmdi.curation.cr.CRService;
import eu.clarin.cmdi.curation.cr.exception.CRServiceStorageException;
import eu.clarin.cmdi.curation.cr.exception.NoProfileCacheEntryException;
import eu.clarin.cmdi.curation.cr.profile_parser.CMDINode;
import eu.clarin.cmdi.curation.pph.exception.PPHServiceNotAvailableException;
import eu.clarin.cmdi.vlo.importer.processor.ValueSet;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -60,7 +63,7 @@ public class InstanceFacetProcessor extends AbstractSubprocessor<CMDInstance, CM
* @param instanceReport the instance report
*/
@Override
public void process(CMDInstance instance, CMDInstanceReport instanceReport) {
public void process(CMDInstance instance, CMDInstanceReport instanceReport) throws MalFunctioningProcessorException {

if(instance.getCmdiData().isEmpty()) {

Expand All @@ -87,8 +90,8 @@ public void process(CMDInstance instance, CMDInstanceReport instanceReport) {


// the key of the facetValuesMap is the target facet name
instanceReport.facetReport.coverages.stream().forEach(coverage -> {
if (coverage.coveredByInstance = facetValuesMap.keySet().contains(coverage.name)) { // initialization and test!
instanceReport.facetReport.coverages.forEach(coverage -> {
if (coverage.coveredByInstance = facetValuesMap.containsKey(coverage.name)) { // initialization and test!
instanceReport.facetReport.numOfFacetsCoveredByInstance++;
}
});
Expand Down Expand Up @@ -189,8 +192,8 @@ public void process(CMDInstance instance, CMDInstanceReport instanceReport) {
instanceReport.details.add(new Detail(Severity.FATAL, "file", "can't parse file '" + instance.getPath().getFileName() + "'"));
instanceReport.isProcessable = false;
}
catch (CRServiceStorageException e) {
throw new RuntimeException(e);
catch (CRServiceStorageException | PPHServiceNotAvailableException | CCRServiceNotAvailableException e) {
throw new MalFunctioningProcessorException(e);
}
}
}
Expand Down
Loading

0 comments on commit c7f9d93

Please sign in to comment.