diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f582d5..1ebdba9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: - name: Upload logs on build failure if: failure() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test-results path: | diff --git a/README.md b/README.md index f1f75f0..940e96e 100644 --- a/README.md +++ b/README.md @@ -62,10 +62,11 @@ It will use the unapi interface to get the data for each ppn. #### SRU ```properties -importer.sru-sources.gvk.query-pattern=importer.sru-sources.gvk.query-pattern=pica.aed={date} and ((pica.sge="615" and (pica.exk="dfi aktuell" or pica.exk="afa" or pica.exk="dfi compact" or pica.exk="tondokument" or pica.exk="video" or pica.exk="abschlussarbeit" or pica.exk="Zeitschriftenaufsatz" or pica.exk="Sicherheitskopie" or pica.exk="GFfK" or pica.exk="PA-Volltext")) or (pica.sge="lg 3" and (pica.exk="Karikatur" or pica.exk="PA-Volltext" or pica.exk="Presseartikel" or pica.exk="Sicherheitskopie"))) +importer.sru-sources.gvk.query-pattern=pica.aed={date} and ((pica.sge="615" and (pica.exk="dfi aktuell" or pica.exk="afa" or pica.exk="dfi compact" or pica.exk="tondokument" or pica.exk="video" or pica.exk="abschlussarbeit" or pica.exk="Zeitschriftenaufsatz" or pica.exk="Sicherheitskopie" or pica.exk="GFfK" or pica.exk="PA-Volltext")) or (pica.sge="lg 3" and (pica.exk="Karikatur" or pica.exk="PA-Volltext" or pica.exk="Presseartikel" or pica.exk="Sicherheitskopie"))) importer.sru-sources.gvk.url=https://sru.k10plus.de/gvk importer.sru-sources.gvk.date-overwrite=2024-01-01 -importer.sru-sources.gvk.oldest-date=2020-01-01 +importer.sru-sources.gvk.oldest-date=2024-05-24 +importer.sru-sources.gvk.newest-date=2024-05-31 ``` The importer looks what is the newest record in the database and uses this date as the date parameter for the query. diff --git a/src/main/java/de/vzg/oai_importer/JobService.java b/src/main/java/de/vzg/oai_importer/JobService.java index beed96b..a3fd718 100644 --- a/src/main/java/de/vzg/oai_importer/JobService.java +++ b/src/main/java/de/vzg/oai_importer/JobService.java @@ -7,17 +7,22 @@ import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import javax.xml.transform.TransformerException; import org.mycore.oai.pmh.OAIException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.data.domain.Page; +import org.springframework.data.domain.PageImpl; import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Service; import de.vzg.oai_importer.foreign.Configuration; import de.vzg.oai_importer.foreign.jpa.ForeignEntity; import de.vzg.oai_importer.foreign.jpa.ForeignEntityRepository; +import de.vzg.oai_importer.importer.FileBased; import de.vzg.oai_importer.importer.Importer; import de.vzg.oai_importer.mapping.jpa.Mapping; import de.vzg.oai_importer.mycore.MyCoReSynchronizeService; @@ -48,7 +53,6 @@ public class JobService { @Autowired ForeignEntityRepository repo; - public Page listImportableRecords(String jobID, Pageable pageable) { ImportJobConfiguration jobConfig = configuration.getJobs().get(jobID); String targetConfigId = jobConfig.getTargetConfigId(); @@ -81,12 +85,13 @@ public Map> testMapping(String jobID, boolean updat Configuration source = configuration.getCombinedConfig().get(sourceConfigId); final List foreignEntities = new ArrayList<>(); - if(updatable) { + if (updatable) { Page> toUpdate = importerService - .detectUpdateableEntities(sourceConfigId, source, target.getUrl(), Pageable.unpaged()); + .detectUpdateableEntities(sourceConfigId, source, target.getUrl(), Pageable.unpaged()); toUpdate.forEach(pair -> foreignEntities.add(pair.first())); } else { - Page toImport = importerService.detectImportableEntities(sourceConfigId, source, target.getUrl(), Pageable.unpaged()); + Page toImport + = importerService.detectImportableEntities(sourceConfigId, source, target.getUrl(), Pageable.unpaged()); toImport.forEach(foreignEntities::add); } @@ -110,8 +115,7 @@ public void runJob(String name) throws OAIException, IOException, URISyntaxExcep MyCoReTargetConfiguration target = configuration.getTargets().get(targetConfigId); - - Configuration source = configuration.getCombinedConfig().get(sourceConfigId); + Configuration source = configuration.getCombinedConfig().get(sourceConfigId); Page records = importerService .detectImportableEntities(sourceConfigId, source, target.getUrl(), Pageable.unpaged()); @@ -143,6 +147,141 @@ public void runJob(String name) throws OAIException, IOException, URISyntaxExcep }); } + public Page>> listImportableFiles(String name, Pageable pageable){ + ImportJobConfiguration jobConfig = configuration.getJobs().get(name); + String targetConfigId = jobConfig.getTargetConfigId(); + String sourceConfigId = jobConfig.getSourceConfigId(); + + MyCoReTargetConfiguration target = configuration.getTargets().get(targetConfigId); + + Configuration source = configuration.getCombinedConfig().get(sourceConfigId); + + Page> records + = importerService.detectUpdateableEntities(sourceConfigId, source, target.getUrl(), pageable); + + Importer importer = context.getBean(jobConfig.getImporter(), Importer.class); + importer.setConfig(jobConfig.getImporterConfig()); + log.info("Checking if files are updatable for {} records", records.getTotalElements()); + + if (importer instanceof FileBased e) { + return records.map(pair -> { + ForeignEntity record = pair.first(); + MyCoReObjectInfo myCoReObjectInfo = pair.second(); + + log.info("Checking record {} and mycore object {}", record.getForeignId(), + myCoReObjectInfo.getMycoreId()); + + try { + return Map.entry(record, e.listImportableFiles(target, record)); + } catch (IOException | URISyntaxException ex) { + throw new RuntimeException(ex); + } + }); + } + return Page.empty(); + } + + public Map.Entry> runJobFileCheckFor(String name, String foreignID) + throws IOException, URISyntaxException { + ImportJobConfiguration jobConfig = configuration.getJobs().get(name); + String targetConfigId = jobConfig.getTargetConfigId(); + String sourceConfigId = jobConfig.getSourceConfigId(); + + MyCoReTargetConfiguration target = configuration.getTargets().get(targetConfigId); + + Configuration source = configuration.getCombinedConfig().get(sourceConfigId); + + MyCoReObjectInfo myCoReObjectInfo = mycoreRepo.findFirstByRepositoryAndImportURLAndImportID(target.getUrl(), + jobConfig.getSourceConfigId(), foreignID); + + ForeignEntity foreign = repo.findFirstByConfigIdAndForeignId(sourceConfigId, foreignID); + + Importer importer = context.getBean(jobConfig.getImporter(), Importer.class); + importer.setConfig(jobConfig.getImporterConfig()); + if (importer instanceof FileBased e) { + e.listMissingFiles(target, myCoReObjectInfo, foreign); + return Map.entry(foreign, e.listMissingFiles(target, myCoReObjectInfo, foreign)); + } + return null; + } + + public Page>> runJobFileCheck(String name, Pageable pageable) { + ImportJobConfiguration jobConfig = configuration.getJobs().get(name); + String targetConfigId = jobConfig.getTargetConfigId(); + String sourceConfigId = jobConfig.getSourceConfigId(); + + MyCoReTargetConfiguration target = configuration.getTargets().get(targetConfigId); + + Configuration source = configuration.getCombinedConfig().get(sourceConfigId); + + Page> records + = importerService.detectUpdateableEntities(sourceConfigId, source, target.getUrl(), pageable); + + Importer importer = context.getBean(jobConfig.getImporter(), Importer.class); + importer.setConfig(jobConfig.getImporterConfig()); + log.info("Checking if files are updatable for {} records", records.getTotalElements()); + + if (importer instanceof FileBased e) { + return new PageImpl<>(records.stream().parallel().map(pair -> { + ForeignEntity record = pair.first(); + MyCoReObjectInfo myCoReObjectInfo = pair.second(); + + log.info("Checking record {} and mycore object {}", record.getForeignId(), + myCoReObjectInfo.getMycoreId()); + + List missingFiles = null; + try { + missingFiles = e.listMissingFiles(target, myCoReObjectInfo, record); + } catch (IOException | URISyntaxException ex) { + throw new RuntimeException(ex); + } + return Map.entry(record, missingFiles); + }).collect(Collectors.toList())); + } else { + log.info("Importer {} is not a FileBased importer", jobConfig.getImporter()); + } + + return Page.empty(); + } + + public Page>> runJobFileImport(String name, Pageable pageable) { + ImportJobConfiguration jobConfig = configuration.getJobs().get(name); + String targetConfigId = jobConfig.getTargetConfigId(); + String sourceConfigId = jobConfig.getSourceConfigId(); + + MyCoReTargetConfiguration target = configuration.getTargets().get(targetConfigId); + + Configuration source = configuration.getCombinedConfig().get(sourceConfigId); + + Page> records + = importerService.detectUpdateableEntities(sourceConfigId, source, target.getUrl(), pageable); + + Importer importer = context.getBean(jobConfig.getImporter(), Importer.class); + importer.setConfig(jobConfig.getImporterConfig()); + log.info("Checking if files are updatable for {} records", records.getTotalElements()); + + if (importer instanceof FileBased e) { + return records.map(pair -> { + ForeignEntity record = pair.first(); + MyCoReObjectInfo myCoReObjectInfo = pair.second(); + + log.info("Checking record {} and mycore object {}", record.getForeignId(), + myCoReObjectInfo.getMycoreId()); + + try { + List fixedFiles = e.fixMissingFiles(target, myCoReObjectInfo, record); + return Map.entry(record, fixedFiles); + } catch (IOException | URISyntaxException ex) { + throw new RuntimeException(ex); + } + }); + } else { + log.info("Importer {} is not a FileBased importer", jobConfig.getImporter()); + } + + return Page.empty(); + } + public void runUpdateJob(String name) { ImportJobConfiguration jobConfig = configuration.getJobs().get(name); String targetConfigId = jobConfig.getTargetConfigId(); @@ -174,7 +313,7 @@ public void runUpdateJob(String name) { log.info("Records with errors: {}", errorRecords); } - public void importSingleDocument(String jobID, String recordID) { + public void importSingleDocument(String jobID, String recordID) throws IOException, URISyntaxException, TransformerException { ImportJobConfiguration jobConfig = configuration.getJobs().get(jobID); String sourceConfigId = jobConfig.getSourceConfigId(); @@ -184,6 +323,12 @@ public void importSingleDocument(String jobID, String recordID) { Importer importer = context.getBean(jobConfig.getImporter(), Importer.class); importer.setConfig(jobConfig.getImporterConfig()); importer.importRecord(target, testRecord); + + try { + myCoReSynchronizeService.synchronize(target); + } catch (IOException | URISyntaxException e) { + throw new RuntimeException(e); + } } public void updateSingleDocument(String jobID, String recordID) { diff --git a/src/main/java/de/vzg/oai_importer/OaiImporterCLIApplication.java b/src/main/java/de/vzg/oai_importer/OaiImporterCLIApplication.java index ad80aa6..7f48436 100644 --- a/src/main/java/de/vzg/oai_importer/OaiImporterCLIApplication.java +++ b/src/main/java/de/vzg/oai_importer/OaiImporterCLIApplication.java @@ -1,10 +1,14 @@ package de.vzg.oai_importer; import java.io.IOException; +import java.util.Date; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.jdom2.JDOMException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.WebApplicationType; import org.springframework.boot.autoconfigure.AutoConfiguration; @@ -14,6 +18,8 @@ import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.FilterType; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; import org.springframework.shell.standard.ShellComponent; import org.springframework.shell.standard.ShellMethod; import org.springframework.shell.standard.ShellOption; @@ -21,6 +27,7 @@ import de.vzg.oai_importer.foreign.Configuration; import de.vzg.oai_importer.foreign.Harvester; import de.vzg.oai_importer.foreign.jpa.ForeignEntity; +import de.vzg.oai_importer.importer.PPNLIST2MyCoReImporter; import de.vzg.oai_importer.mycore.MyCoReSynchronizeService; import de.vzg.oai_importer.mycore.MyCoReTargetConfiguration; import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfo; @@ -39,6 +46,8 @@ public class OaiImporterCLIApplication { @Autowired MyCoReSynchronizeService myCoReSynchronizeService; @Autowired + ApplicationContext context; + @Autowired private ImporterConfiguration configuration; @Autowired private JobService jobService; @@ -71,9 +80,12 @@ public void updateSource(@ShellOption() String job, @ShellOption(defaultValue = = (Harvester) applicationContext.getBean(source.getHarvester()); List updatedRecords = bean.update(sourceConfigId, source, onlyMissing); updatedRecords.forEach(record -> LOGGER.info("Updated record {}", record.getForeignId())); + } catch (RuntimeException e) { + LOGGER.error("Error while updating source of job {}", job, e.getCause()); } catch (Exception e) { LOGGER.error("Error while updating source of job {}", job, e); } + } @ShellMethod(key = "update-target", value = "Updates the target of a job") @@ -117,6 +129,99 @@ public void runImporter(@ShellOption() String job) { } } + @ShellMethod(key = "run-importer-file-check", value = "Runs the importer file check") + public void runImporterFileCheck(@ShellOption() String job) { + if (checkJobPresent(job)) { + return; + } + + LOGGER.info("Running job {}", job); + try { + + jobService.runJobFileCheck(job, Pageable.unpaged()) + .stream().filter(record -> !record.getValue().isEmpty()) + .forEach(record -> LOGGER.info("Record {} is missing {} files", record.getKey().getForeignId(), + record.getValue().stream().collect(Collectors.joining(", ")))); + } catch (Exception e) { + LOGGER.error("Error while running job {}", job, e); + } + } + + @ShellMethod(key = "run-importer-file-check-for-record", value = "Runs the importer file check for a record") + public void runImporterFileCheckForRecord(@ShellOption() String job, @ShellOption() String recordId) { + if (checkJobPresent(job)) { + return; + } + + LOGGER.info("Running job {} for record {}", job, recordId); + try { + Map.Entry> foreignEntityListEntry + = jobService.runJobFileCheckFor(job, recordId); + if (foreignEntityListEntry != null) { + LOGGER.info("Record {} is missing {} files", foreignEntityListEntry.getKey().getForeignId(), + foreignEntityListEntry.getValue().size()); + } else { + LOGGER.info("Record {} not found", recordId); + } + } catch (Exception e) { + LOGGER.error("Error while running job {}", job, e); + } + } + + @ShellMethod(key = "run-importer-file-import", value = "Runs the importer which imports files to existing records") + public void runImporterFileImport(@ShellOption() String job) { + if (checkJobPresent(job)) { + return; + } + + LOGGER.info("Running job {}", job); + try { + jobService.runJobFileImport(job, Pageable.unpaged()).stream().filter(record -> !record.getValue().isEmpty()) + .forEach(record -> { + LOGGER.info("Added missing files to {}:{}", record.getKey().getForeignId(), + record.getValue().stream().collect(Collectors.joining(", "))); + }); + } catch (Exception e) { + LOGGER.error("Error while running job {}", job, e); + } + } + + @ShellMethod(key = "check-never-should-have-been-imported", + value = "Checks if records have been imported that should not have been imported") + public void checkNeverShouldHaveBeenImported(@ShellOption() String job) { + if (checkJobPresent(job)) { + return; + } + ImportJobConfiguration importJobConfiguration = jobService.configuration.getJobs().get(job); + PPNLIST2MyCoReImporter importer + = context.getBean(importJobConfiguration.getImporter(), PPNLIST2MyCoReImporter.class); + importer.setConfig(importJobConfiguration.getImporterConfig()); + + LOGGER.info("Running job {}", job); + Pageable pageable = Pageable.ofSize(10000); + Page> pairs; + try { + do { + Date date = new Date(); + pairs = jobService.listUpdateableRecords(job, pageable); + LOGGER.info("Retrieved {} records in {}", pairs.getNumberOfElements(), new Date().getTime() - date.getTime()); + pairs.stream().filter(pair -> { + try { + return importer.shouldNotBeImported(pair.first(), pair.second()); + } catch (IOException | JDOMException e) { + return true; + } + }).forEach(pair -> { + LOGGER.info("Record {} with id {} should not have been imported", pair.first().getForeignId(), + pair.second().getMycoreId()); + }); + pageable = pageable.next(); + } while(pairs.hasNext()); + } catch (Exception e) { + LOGGER.error("Error while running job {}", job, e); + } + } + @ShellMethod(key = "run-update", value = "Runs the update") public void runUpdate(@ShellOption() String job) { if (checkJobPresent(job)) { diff --git a/src/main/java/de/vzg/oai_importer/PicaUtils.java b/src/main/java/de/vzg/oai_importer/PicaUtils.java index dd716f4..103138d 100644 --- a/src/main/java/de/vzg/oai_importer/PicaUtils.java +++ b/src/main/java/de/vzg/oai_importer/PicaUtils.java @@ -18,49 +18,142 @@ package de.vzg.oai_importer; -import org.jdom2.Document; -import org.jdom2.Element; - import java.time.LocalDateTime; import java.time.OffsetDateTime; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.Optional; +import java.util.function.Predicate; import java.util.stream.Stream; +import org.jdom2.Attribute; +import org.jdom2.Document; +import org.jdom2.Element; + public class PicaUtils { public static Stream getPicaField(Document root, String tag, String code) { return root.getRootElement().getChildren("datafield", Namespaces.PICA_NAMESPACE) - .stream() - .filter(e -> e.getAttributeValue("tag").equals(tag)) - .map(element -> element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals(code)) - .findFirst()) - .filter(Optional::isPresent) - .map(Optional::get) - .map(Element::getText); + .stream() + .filter(e -> e.getAttributeValue("tag").equals(tag)) + .map(element -> element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals(code)) + .findFirst()) + .filter(Optional::isPresent) + .map(Optional::get) + .map(Element::getText); } + // TODO: Datum wann der gesamte Datensatz zuletzt geändert wurde, eventuell wäre es besser das Datum aus dem + // richtigen exemplar zu nehmen public static List getModifiedDate(Element rootElement) { return rootElement.getChildren("datafield", Namespaces.PICA_NAMESPACE) - .stream() - .filter(e -> e.getAttributeValue("tag").equals("001B")) - .map(element -> { - String p0 - = element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals("0")) - .findFirst().get().getText(); - - String time - = element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals("t")) - .findFirst().get().getText(); - - String date = p0.split(":")[1]; - - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("dd-MM-yy HH:mm:ss.SSS"); - OffsetDateTime dateTime = LocalDateTime.parse(date + " " + time, formatter) - .atOffset(OffsetDateTime.now().getOffset()); - return dateTime; - }).toList(); + .stream() + .filter(e -> e.getAttributeValue("tag").equals("001B")) + .map(element -> { + String p0 + = element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals("0")) + .findFirst().get().getText(); + + String time + = element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals("t")) + .findFirst().get().getText(); + + String date = p0.split(":")[1]; + + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("dd-MM-yy HH:mm:ss.SSS"); + OffsetDateTime dateTime = LocalDateTime.parse(date + " " + time, formatter) + .atOffset(OffsetDateTime.now().getOffset()); + return dateTime; + }).toList(); } + public static List getCreatedDate(Element rootElement) { + return rootElement.getChildren("datafield", Namespaces.PICA_NAMESPACE) + .stream() + .filter(e -> e.getAttributeValue("tag").equals("001A")) + .map(element -> { + String p0 + = element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals("0")) + .findFirst().get().getText(); + + String time + = element.getChildren().stream().filter(e -> e.getAttributeValue("code").equals("t")) + .findFirst().get().getText(); + + String date = p0.split(":")[1]; + + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("dd-MM-yy HH:mm:ss.SSS"); + OffsetDateTime dateTime = LocalDateTime.parse(date + " " + time, formatter) + .atOffset(OffsetDateTime.now().getOffset()); + return dateTime; + }).toList(); + } + + /** + * Returns a list of subrecords. A subrecord is a list of elements that belong together, starting with a datafield + * with tag 101@ and ending just before the next datafield with tag 101@. + * + * @param rootElement the record to get the subrecords from + * @return a list of subrecords each containing a list of elements + */ + public static List> getSubRecords(Element rootElement) { + List> subrecords = new ArrayList<>(); + + ArrayList currentSubRecord = null; + for (Element datafield : rootElement.getChildren("datafield", Namespaces.PICA_NAMESPACE)) { + // this means a new subrecord starts + Attribute attribute = datafield.getAttribute("tag"); + if (attribute != null) { + String tag = attribute.getValue(); + if ("101@".equals(tag)) { + if (currentSubRecord != null) { + // add the previous subrecord + subrecords.add(currentSubRecord); + } + currentSubRecord = new ArrayList<>(); + } + } + + if (currentSubRecord != null) { + currentSubRecord.add(datafield); + } + } + if (currentSubRecord != null) { + subrecords.add(currentSubRecord); + } + return subrecords; + } + + + /** + * Checks if the given subrecord contains all required tag code values. + * @param subRecordChildren the subrecord to check + * @param required the required tag code values + * @return true if the subrecord contains all required tag code values + */ + public static boolean matchingSubRecord(List subRecordChildren, List required) { + Predicate tagCodeMatching = currentRequiredTagCode -> { + for (Element subRecordChild : subRecordChildren) { + String tag = subRecordChild.getAttributeValue("tag"); + if (tag == null) { + continue; + } + + for (Element subfield : subRecordChild.getChildren("subfield", Namespaces.PICA_NAMESPACE)) { + String code = subfield.getAttributeValue("code"); + String value = subfield.getText(); + if (currentRequiredTagCode.code.equals(code) && currentRequiredTagCode.value.equalsIgnoreCase(value)) { + return true; + } + } + } + return false; + }; + + return required.stream().allMatch(tagCodeMatching); + } + + public record TagCodeValue(String tag, String code, String value) { + } } diff --git a/src/main/java/de/vzg/oai_importer/controller/JobsController.java b/src/main/java/de/vzg/oai_importer/controller/JobsController.java index 3896c8c..e18abe4 100644 --- a/src/main/java/de/vzg/oai_importer/controller/JobsController.java +++ b/src/main/java/de/vzg/oai_importer/controller/JobsController.java @@ -6,6 +6,8 @@ import java.util.Map; import java.util.stream.IntStream; +import javax.xml.transform.TransformerException; + import org.mycore.oai.pmh.OAIException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; @@ -25,7 +27,6 @@ import de.vzg.oai_importer.mapping.jpa.Mapping; import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfo; - @Controller @RequestMapping("/jobs") @PreAuthorize("hasAnyAuthority('job')") @@ -46,42 +47,68 @@ public String listJobs(Model model) { @RequestMapping("/{jobID}/") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String showJob(@PathVariable("jobID") String jobID, - @RequestParam(defaultValue = "0") int page, - @RequestParam(defaultValue = "100") int size, - Model model) { + @RequestParam(defaultValue = "0") int page, + @RequestParam(defaultValue = "100") int size, + Model model) { Page records = jobService.listImportableRecords(jobID, Pageable.ofSize(size).withPage(page)); model.addAttribute("records", records); model.addAttribute("jobID", jobID); model.addAttribute("pages", IntStream.rangeClosed(1, records.getTotalPages()) - .boxed()); + .boxed()); return "job_records"; } + @RequestMapping("/{jobID}/fileCheck") + public String runFileCheckJob(@PathVariable("jobID") String jobID, + @RequestParam(defaultValue = "0") int page, + @RequestParam(defaultValue = "100") int size, Model model) { + + Page>> records + = jobService.listImportableFiles(jobID, Pageable.ofSize(size).withPage(page)); + model.addAttribute("records", records); + model.addAttribute("jobID", jobID); + model.addAttribute("pages", IntStream.rangeClosed(1, records.getTotalPages()).boxed()); + + return "job_records_file_check"; + } + + @RequestMapping("/{jobID}/update/fileCheck") + public String runFileCheckUpdateJob(@PathVariable("jobID") String jobID, + @RequestParam(defaultValue = "0") int page, + @RequestParam(defaultValue = "100") int size, Model model) { + Page>> records + = jobService.runJobFileCheck(jobID, Pageable.ofSize(size).withPage(page)); + model.addAttribute("records", records); + model.addAttribute("jobID", jobID); + model.addAttribute("pages", IntStream.rangeClosed(1, records.getTotalPages()).boxed()); + + return "job_records_file_check_update"; + } + @RequestMapping("/{jobID}/update/") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String showUpdateJob(@PathVariable("jobID") String jobID, - @RequestParam(defaultValue = "0") int page, - @RequestParam(defaultValue = "100") int size, - Model model, - @RequestParam(value = "success", required = false) String success) { - Page> records = - jobService.listUpdateableRecords(jobID, Pageable.ofSize(size).withPage(page)); + @RequestParam(defaultValue = "0") int page, + @RequestParam(defaultValue = "100") int size, + Model model, + @RequestParam(value = "success", required = false) String success) { + Page> records + = jobService.listUpdateableRecords(jobID, Pageable.ofSize(size).withPage(page)); model.addAttribute("records", records); model.addAttribute("jobID", jobID); model.addAttribute("pages", IntStream.rangeClosed(1, records.getTotalPages()) - .boxed()); - if(success != null && (success.equals("true") || success.equals("false"))) { + .boxed()); + if (success != null && (success.equals("true") || success.equals("false"))) { model.addAttribute("success", success); } return "job_update"; } - @RequestMapping("/{jobID}/testMapping") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String runTestJob(@PathVariable("jobID") String jobID, Model model, - @RequestParam(value = "update", required = false, defaultValue = "false") boolean update) { + @RequestParam(value = "update", required = false, defaultValue = "false") boolean update) { Map> records = jobService.testMapping(jobID, update); model.addAttribute("records", records); @@ -92,7 +119,7 @@ public String runTestJob(@PathVariable("jobID") String jobID, Model model, @RequestMapping("/{jobID}/test/{recordID}") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String runTestJob(@PathVariable("jobID") String jobID, - @PathVariable("recordID") String recordID, Model model) { + @PathVariable("recordID") String recordID, Model model) { model.addAttribute("jobID", jobID); model.addAttribute("recordID", recordID); model.addAttribute("result", jobService.test(jobID, recordID)); @@ -103,7 +130,7 @@ public String runTestJob(@PathVariable("jobID") String jobID, @RequestMapping("/{jobID}/import/{recordID}") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String runJob(@PathVariable("jobID") String jobID, @PathVariable("recordID") String recordID, - RedirectAttributes redirectAttributes) { + RedirectAttributes redirectAttributes) throws IOException, URISyntaxException, TransformerException { jobService.importSingleDocument(jobID, recordID); redirectAttributes.addAttribute("success", "true"); @@ -114,7 +141,7 @@ public String runJob(@PathVariable("jobID") String jobID, @PathVariable("recordI @RequestMapping("/{jobID}/update/update") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String updateJob(@PathVariable("jobID") String jobID, - RedirectAttributes redirectAttributes) { + RedirectAttributes redirectAttributes) { jobService.runUpdateJob(jobID); redirectAttributes.addAttribute("success", "true"); @@ -124,7 +151,7 @@ public String updateJob(@PathVariable("jobID") String jobID, @RequestMapping("/{jobID}/update/{recordID}") @PreAuthorize("hasAnyAuthority('job-' + #jobID)") public String updateJob(@PathVariable("jobID") String jobID, @PathVariable("recordID") String recordID, - RedirectAttributes redirectAttributes) { + RedirectAttributes redirectAttributes) { jobService.updateSingleDocument(jobID, recordID); redirectAttributes.addAttribute("success", "true"); @@ -133,7 +160,8 @@ public String updateJob(@PathVariable("jobID") String jobID, @PathVariable("reco } @RequestMapping("/{jobID}/import") - public String runJob(@PathVariable("jobID") String jobID, Model model) throws IOException, URISyntaxException, OAIException { + public String runJob(@PathVariable("jobID") String jobID, Model model) + throws IOException, URISyntaxException, OAIException { model.addAttribute("jobID", jobID); jobService.runJob(jobID); return "job_test"; diff --git a/src/main/java/de/vzg/oai_importer/dfi/DFIConditions.java b/src/main/java/de/vzg/oai_importer/dfi/DFIConditions.java new file mode 100644 index 0000000..009c853 --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/dfi/DFIConditions.java @@ -0,0 +1,84 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.dfi; + +import de.vzg.oai_importer.PicaUtils; + +public class DFIConditions { + public static final PicaUtils.TagCodeValue SIEGEL_615 = new PicaUtils.TagCodeValue("209A", "B", "615"); + public static final PicaUtils.TagCodeValue SIEGEL_LG3 = new PicaUtils.TagCodeValue("209A", "B", "Lg 3"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_DFI_AKTUELL + = new PicaUtils.TagCodeValue("237A", "a", "dfi aktuell"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_AFA + = new PicaUtils.TagCodeValue("237A", "a", "afa"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_DFI_COMPACT + = new PicaUtils.TagCodeValue("237A", "a", "dfi compact"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_TONDOKUMENT + = new PicaUtils.TagCodeValue("237A", "a", "tondokument"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_VIDEO + = new PicaUtils.TagCodeValue("237A", "a", "video"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_ABSCHLUSSARBEIT + = new PicaUtils.TagCodeValue("237A", "a", "abschlussarbeit"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_ZEITSCHRIFTENAUFSATZ + = new PicaUtils.TagCodeValue("237A", "a", "Zeitschriftenaufsatz"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_SICHERHEITSKOPIE + = new PicaUtils.TagCodeValue("237A", "a", "Sicherheitskopie"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_PRESSEARTIKEL + = new PicaUtils.TagCodeValue("237A", "a", "Presseartikel"); + + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_KARIKATUR + = new PicaUtils.TagCodeValue("237A", "a", "Karikatur"); + + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_PA_VOLLLTEXT + = new PicaUtils.TagCodeValue("237A", "a", "PA-Volltext"); + + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_GFFK + = new PicaUtils.TagCodeValue("237A", "a", "GFfK"); + + public static final PicaUtils.TagCodeValue SIEGEL_615_KOMMENTAR_PRESSEMAPPE + = new PicaUtils.TagCodeValue("237A", "a", "Pressemappe"); + + + public static final PicaUtils.TagCodeValue SIEGEL_LG3_SICHERHEITSKOPIE + = new PicaUtils.TagCodeValue("209A", "a", "Sicherheitskopie"); + + public static final PicaUtils.TagCodeValue SIEGEL_LG3_PRESSEARTIKEL + = new PicaUtils.TagCodeValue("209A", "a", "Presseartikel"); + + public static final PicaUtils.TagCodeValue SIEGEL_LG3_KARIKATUR + = new PicaUtils.TagCodeValue("209A", "a", "Karikatur"); + + public static final PicaUtils.TagCodeValue SIEGEL_LG3_PA_VOLLLTEXT + = new PicaUtils.TagCodeValue("209A", "a", "PA-Volltext"); + + public static final PicaUtils.TagCodeValue SIEGEL_LG3_PRESSEMAPPE + = new PicaUtils.TagCodeValue("209A", "a", "Pressemappe"); +} diff --git a/src/main/java/de/vzg/oai_importer/dfi/DFIFileRightsDetector.java b/src/main/java/de/vzg/oai_importer/dfi/DFIFileRightsDetector.java new file mode 100644 index 0000000..1b125c3 --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/dfi/DFIFileRightsDetector.java @@ -0,0 +1,165 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.dfi; + +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_ABSCHLUSSARBEIT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_AFA; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_DFI_AKTUELL; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_DFI_COMPACT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_GFFK; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_KARIKATUR; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_PA_VOLLLTEXT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_PRESSEARTIKEL; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_PRESSEMAPPE; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_SICHERHEITSKOPIE; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_TONDOKUMENT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_VIDEO; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_ZEITSCHRIFTENAUFSATZ; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_KARIKATUR; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_PA_VOLLLTEXT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_PRESSEARTIKEL; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_PRESSEMAPPE; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_SICHERHEITSKOPIE; + +import java.util.List; + +import org.jdom2.Element; +import org.springframework.stereotype.Service; + +import de.vzg.oai_importer.PicaUtils; +import de.vzg.oai_importer.importer.FileRightsDetector; +import lombok.extern.log4j.Log4j2; + +@Log4j2 +@Service("DFIFileRightsDetector") +public class DFIFileRightsDetector implements FileRightsDetector { + + + + @Override + public boolean isPublic(Element record) { + + + for (List subRecord : PicaUtils.getSubRecords(record)) { + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615))){ + + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_DFI_AKTUELL))){ + log.info("DFI Aktuell found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_AFA))){ + log.info("AFA found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_DFI_COMPACT))){ + log.info("DFI Compact found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_TONDOKUMENT))){ + log.info("Tondokument found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_VIDEO))){ + log.info("Video found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_ABSCHLUSSARBEIT))){ + log.info("Abschlussarbeit found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_ZEITSCHRIFTENAUFSATZ))){ + log.info("Zeitschriftenaufsatz found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_SICHERHEITSKOPIE))){ + log.info("Sicherheitskopie found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_PRESSEARTIKEL))){ + log.info("Presseartikel found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_KARIKATUR))){ + log.info("Karikatur found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_PA_VOLLLTEXT))){ + log.info("PA-Volltext found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_GFFK))) { + log.info("GFfK found"); + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_PRESSEMAPPE))) { + log.info("Pressemappe found"); + return false; + } + } + + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3))) { + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_SICHERHEITSKOPIE))){ + log.info("Sicherheitskopie found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_PRESSEARTIKEL))){ + log.info("Presseartikel found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_KARIKATUR))){ + log.info("Karikatur found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_PA_VOLLLTEXT))){ + log.info("PA-Volltext found"); + return false; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_PRESSEMAPPE))) { + log.info("Pressemappe found"); + return false; + } + } + + } + + + return false; + + } +} diff --git a/src/main/java/de/vzg/oai_importer/dfi/DFISRURecordFilter.java b/src/main/java/de/vzg/oai_importer/dfi/DFISRURecordFilter.java new file mode 100644 index 0000000..51f4d0a --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/dfi/DFISRURecordFilter.java @@ -0,0 +1,167 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.dfi; + +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_ABSCHLUSSARBEIT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_AFA; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_DFI_AKTUELL; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_DFI_COMPACT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_GFFK; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_KARIKATUR; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_PA_VOLLLTEXT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_PRESSEARTIKEL; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_PRESSEMAPPE; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_SICHERHEITSKOPIE; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_TONDOKUMENT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_VIDEO; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_615_KOMMENTAR_ZEITSCHRIFTENAUFSATZ; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_KARIKATUR; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_PA_VOLLLTEXT; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_PRESSEARTIKEL; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_PRESSEMAPPE; +import static de.vzg.oai_importer.dfi.DFIConditions.SIEGEL_LG3_SICHERHEITSKOPIE; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.List; + +import org.jdom2.Document; +import org.jdom2.Element; +import org.springframework.stereotype.Service; + +import de.vzg.oai_importer.PicaUtils; +import de.vzg.oai_importer.foreign.sru.SRURecordFilter; +import lombok.extern.java.Log; + +@Service("DFISRURecordFilter") +@Log +public class DFISRURecordFilter implements SRURecordFilter { + + + + @Override + public boolean filter(Document record, LocalDate day) { + Element recordRoot = record.getRootElement(); + + + + + for (List subRecord : PicaUtils.getSubRecords(recordRoot)) { + List required = new java.util.ArrayList<>(); + required.add(SIEGEL_615); + if(day != null) { + PicaUtils.TagCodeValue date = new PicaUtils.TagCodeValue("201B", "0", + day.format(DateTimeFormatter.ofPattern("dd-MM-yy"))); + required.add(date); + + } + if(PicaUtils.matchingSubRecord(subRecord, required)){ + + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_DFI_AKTUELL))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_AFA))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_DFI_COMPACT))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_TONDOKUMENT))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_VIDEO))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_ABSCHLUSSARBEIT))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_ZEITSCHRIFTENAUFSATZ))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_SICHERHEITSKOPIE))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_PRESSEARTIKEL))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_KARIKATUR))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_PA_VOLLLTEXT))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_GFFK))) { + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_615_KOMMENTAR_PRESSEMAPPE))) { + return true; + } + } + + required = new java.util.ArrayList<>(); + required.add(SIEGEL_LG3); + if(day != null) { + PicaUtils.TagCodeValue date = new PicaUtils.TagCodeValue("201B", "0", + day.format(DateTimeFormatter.ofPattern("dd-MM-yy"))); + required.add(date); + + } + if(PicaUtils.matchingSubRecord(subRecord, required)) { + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_SICHERHEITSKOPIE))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_PRESSEARTIKEL))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_KARIKATUR))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_PA_VOLLLTEXT))){ + return true; + } + + if(PicaUtils.matchingSubRecord(subRecord, List.of(SIEGEL_LG3_PRESSEMAPPE))) { + return true; + } + } + + } + + + return false; + } +} diff --git a/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntity.java b/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntity.java index ffa4f47..63fdb8a 100644 --- a/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntity.java +++ b/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntity.java @@ -33,7 +33,7 @@ public class ForeignEntity { @Column(name = "foreign_id", length = 100, nullable = false) private String foreignId; - @Column(length = 512000, nullable = false) + @Column(columnDefinition = "TEXT", nullable = false) private String metadata; @Column(nullable = false) diff --git a/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntityRepository.java b/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntityRepository.java index 782310f..adae857 100644 --- a/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntityRepository.java +++ b/src/main/java/de/vzg/oai_importer/foreign/jpa/ForeignEntityRepository.java @@ -21,7 +21,9 @@ public interface ForeignEntityRepository extends ListPagingAndSortingRepository< "(SELECT m.importID FROM MyCoReObjectInfo m where m.importURL = ?2 AND m.repository = ?3)") Page findImportableEntities(String oaiConfig, String oaiSource, String targetRepository, Pageable pageable); - @Query("SELECT fe, oi FROM ForeignEntity fe, MyCoReObjectInfo oi WHERE fe.configId = ?1 AND fe.isDeleted = false AND fe.foreignId = oi.importID AND oi.importURL = ?2 AND oi.repository = ?3 order by fe.datestamp desc") + //@Query("SELECT fe, oi FROM ForeignEntity fe, MyCoReObjectInfo oi WHERE fe.configId = ?1 AND fe.isDeleted = false AND fe.foreignId = oi.importID AND oi.importURL = ?2 AND oi.repository = ?3 order by fe.datestamp desc") + @SuppressWarnings("checkstyle:LineLength") + @Query("SELECT fe, oi FROM ForeignEntity fe JOIN MyCoReObjectInfo oi ON fe.foreignId = oi.importID WHERE fe.configId = ?1 AND fe.isDeleted = false AND oi.importURL = ?2 AND oi.repository = ?3") Page findUpdateableEntities(String oaiConfig, String oaiSource, String targetRepository, Pageable pageable); @Query("SELECT MAX(r.datestamp) FROM ForeignEntity r WHERE r.configId = ?1") diff --git a/src/main/java/de/vzg/oai_importer/foreign/ppnlist/PPNListHarvester.java b/src/main/java/de/vzg/oai_importer/foreign/ppnlist/PPNListHarvester.java index 65f5238..3c9682b 100644 --- a/src/main/java/de/vzg/oai_importer/foreign/ppnlist/PPNListHarvester.java +++ b/src/main/java/de/vzg/oai_importer/foreign/ppnlist/PPNListHarvester.java @@ -55,61 +55,59 @@ public List update(String configID, PPNListConfiguration source, } }); - var al = new ArrayList<>(ppns); + List al = new ArrayList<>(ppns); var count1 = new AtomicInteger(al.size()); - List missing = al.stream() + al = al.stream() .filter(ppn -> { if(!onlyMissing){ return true; } - log.info("Checking PPN " + ppn + " (" + count1.decrementAndGet() + " remaining)"); + //log.info("Checking PPN " + ppn + " (" + count1.decrementAndGet() + " remaining)"); return recordRepository.findFirstByConfigIdAndForeignId(configID, ppn) == null; - }).toList(); - - var count = new AtomicInteger(missing.size()); - missing.stream() - .parallel() - .forEach(ppn -> { - log.info("Processing PPN " + ppn + " (" + count.decrementAndGet() + " remaining)"); - ForeignEntity record + }).collect(Collectors.toList()); + + var count = new AtomicInteger(al.size()); + for (String ppn : al) { + log.info("Processing PPN " + ppn + " (" + count.decrementAndGet() + " remaining)"); + ForeignEntity record = Optional.ofNullable(recordRepository.findFirstByConfigIdAndForeignId(configID, ppn)) - .orElseGet(ForeignEntity::new); - record.setConfigId(configID); - record.setForeignId(ppn); - record.setDeleted(false); - record.setDatestamp(OffsetDateTime.now()); - - try { - URL url = new URL("https://unapi.k10plus.de/?id=gvk:ppn:" + ppn + "&format=picaxml"); - try (var is = url.openStream(); var isr = new InputStreamReader(is); - var br = new BufferedReader(isr)) { - String metadata = br.lines().collect(Collectors.joining("\n")); - record.setMetadata(metadata); - } catch (IOException e) { - throw new RuntimeException(e); - } - } catch (MalformedURLException e) { - throw new RuntimeException(e); + .orElseGet(ForeignEntity::new); + record.setConfigId(configID); + record.setForeignId(ppn); + record.setDeleted(false); + record.setDatestamp(OffsetDateTime.now()); + + try { + URL url = new URL("https://unapi.k10plus.de/?id=gvk:ppn:" + ppn + "&format=picaxml"); + try (var is = url.openStream(); var isr = new InputStreamReader(is); + var br = new BufferedReader(isr)) { + String metadata = br.lines().collect(Collectors.joining("\n")); + record.setMetadata(metadata); + } catch (IOException e) { + log.error("Error while fetching PPN " + ppn, e); + continue; } + } catch (MalformedURLException e) { + log.error("Error while fetching PPN " + ppn, e); + continue; + } - try (var sr = new StringReader(record.getMetadata())) { - Document doc = new SAXBuilder().build(sr); - Element rootElement = doc.getRootElement(); - List modifiedList = PicaUtils.getModifiedDate(rootElement); - modifiedList.stream().findFirst().ifPresent(record::setDatestamp); - } catch (IOException | JDOMException e) { - throw new RuntimeException(e); - } + try (var sr = new StringReader(record.getMetadata())) { + Document doc = new SAXBuilder().build(sr); + Element rootElement = doc.getRootElement(); + List modifiedList = PicaUtils.getModifiedDate(rootElement); + modifiedList.stream().findFirst().ifPresent(record::setDatestamp); + } catch (IOException | JDOMException e) { + log.error("Error while parsing PPN " + ppn, e); + continue; + } - if (record.getMetadata().length() > 512000) { - log.warn("Metadata too long for PPN " + ppn); - return; - } + recordRepository.save(record); + result.add(record); + } - recordRepository.save(record); - result.add(record); - }); + log.info("Completed processing PPNs"); return result; } diff --git a/src/main/java/de/vzg/oai_importer/foreign/sru/SRUConfiguration.java b/src/main/java/de/vzg/oai_importer/foreign/sru/SRUConfiguration.java index 82f3b5c..955dd18 100644 --- a/src/main/java/de/vzg/oai_importer/foreign/sru/SRUConfiguration.java +++ b/src/main/java/de/vzg/oai_importer/foreign/sru/SRUConfiguration.java @@ -34,6 +34,10 @@ public class SRUConfiguration implements Configuration { private LocalDate dateOverwrite; + private LocalDate newestDate; + + private String recordFilterService; + @Override public String getName() { return "SRU: " + queryPattern; diff --git a/src/main/java/de/vzg/oai_importer/foreign/sru/SRUHarvester.java b/src/main/java/de/vzg/oai_importer/foreign/sru/SRUHarvester.java index d6b0c19..7c54a13 100644 --- a/src/main/java/de/vzg/oai_importer/foreign/sru/SRUHarvester.java +++ b/src/main/java/de/vzg/oai_importer/foreign/sru/SRUHarvester.java @@ -44,6 +44,7 @@ import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.ApplicationContext; import org.springframework.stereotype.Service; import de.vzg.oai_importer.PicaUtils; @@ -56,9 +57,13 @@ @Log4j2 public class SRUHarvester implements Harvester { + @Autowired + ApplicationContext context; + public static final String RECORD_DATA_XPATH = "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData"; public static final String SRU_HARVESTER = "SRUHarvester"; private static final String NUMBER_OF_RECORDS_XPATH = "/zs:searchRetrieveResponse/zs:numberOfRecords"; + @Autowired private ForeignEntityRepository recordRepository; @@ -72,8 +77,8 @@ public String buildLink(SRUConfiguration source, LocalDate day, int startRecord, "&recordSchema=picaxml&startRecord=" + startRecord; } - public List getDaysSince(LocalDate since) { - LocalDate now = LocalDate.now(); + public List getDaysSince(LocalDate since, LocalDate until) { + LocalDate now = until == null ? LocalDate.now() : until.plusDays(1); return since.datesUntil(now).toList(); } @@ -129,7 +134,7 @@ public List update(String configID, SRUConfiguration source, bool List result = new ArrayList<>(); - List days = getDaysSince(oldestDate); + List days = getDaysSince(oldestDate, source.getNewestDate()); for (LocalDate day : days) { SRUResponse resp = null; String link = null; @@ -140,7 +145,9 @@ public List update(String configID, SRUConfiguration source, bool log.info("Harvesting from {}", link); resp = harvest(link); for (Document picaRecord : resp.records()) { - processRecord(configID, picaRecord, result); + if(filterRecord(source, picaRecord, day)) { + processRecord(configID, picaRecord, result, day); + } } startRecord += 100; } while (resp.numberOfRecords() > startRecord); @@ -153,12 +160,33 @@ public List update(String configID, SRUConfiguration source, bool return result; } - private boolean processRecord(String configID, Document picaRecord, List result) { + private boolean filterRecord(SRUConfiguration config, Document picaRecord, LocalDate day) { + Optional ppnField = PicaUtils.getPicaField(picaRecord, "003@", "0").findFirst(); + if (ppnField.isEmpty()) { + log.warn("No PPN found in record {}", new XMLOutputter().outputString(picaRecord)); + return false; + } + + if (config.getRecordFilterService() == null || config.getRecordFilterService().isBlank()) { + return true; + } + + SRURecordFilter filter = context.getBean(config.getRecordFilterService(), SRURecordFilter.class); + String recordName = ppnField.get(); + log.info("Checking record {}", recordName); + boolean filterResult = filter.filter(picaRecord, day); + if (!filterResult) { + log.info("Record {} filtered out", recordName); + } + return filterResult; + } + + private boolean processRecord(String configID, Document picaRecord, List result, LocalDate day) { String metadata = new XMLOutputter().outputString(picaRecord); Optional ppnField = PicaUtils.getPicaField(picaRecord, "003@", "0").findFirst(); if (ppnField.isEmpty()) { - log.warn("No PPN found in record {}", metadata); + log.warn("No PPN found in record {}", new XMLOutputter().outputString(picaRecord)); return false; } String ppn = ppnField.get(); @@ -175,11 +203,6 @@ private boolean processRecord(String configID, Document picaRecord, List modifiedList = PicaUtils.getModifiedDate(picaRecord.getRootElement()); modifiedList.stream().findFirst().ifPresent(foreignEntity::setDatestamp); - if (foreignEntity.getMetadata().length() > 512000) { - log.warn("Metadata too long for PPN " + ppn); - return false; - } - foreignEntity.setDeleted(false); recordRepository.save(foreignEntity); diff --git a/src/main/java/de/vzg/oai_importer/foreign/sru/SRURecordFilter.java b/src/main/java/de/vzg/oai_importer/foreign/sru/SRURecordFilter.java new file mode 100644 index 0000000..c8002aa --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/foreign/sru/SRURecordFilter.java @@ -0,0 +1,37 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.foreign.sru; + +import org.jdom2.Document; +import org.springframework.stereotype.Service; + +import java.time.LocalDate; + +@Service +public interface SRURecordFilter { + + /** + * Check if the record should be imported + * + * @param record the record to check + * @param day + * @return true if the record should be imported + */ + boolean filter(Document record, LocalDate day); +} diff --git a/src/main/java/de/vzg/oai_importer/importer/FileBased.java b/src/main/java/de/vzg/oai_importer/importer/FileBased.java new file mode 100644 index 0000000..cdd2863 --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/importer/FileBased.java @@ -0,0 +1,40 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.importer; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.List; + +import de.vzg.oai_importer.foreign.jpa.ForeignEntity; +import de.vzg.oai_importer.mycore.MyCoReTargetConfiguration; +import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfo; + +public interface FileBased { + + List listImportableFiles(MyCoReTargetConfiguration target, ForeignEntity record) + throws IOException, URISyntaxException; + + List listMissingFiles(MyCoReTargetConfiguration target, MyCoReObjectInfo info, ForeignEntity record) + throws IOException, URISyntaxException; + + List fixMissingFiles(MyCoReTargetConfiguration target, MyCoReObjectInfo info, ForeignEntity record) + throws IOException, URISyntaxException; + +} diff --git a/src/main/java/de/vzg/oai_importer/importer/FileRightsDetector.java b/src/main/java/de/vzg/oai_importer/importer/FileRightsDetector.java new file mode 100644 index 0000000..36e8544 --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/importer/FileRightsDetector.java @@ -0,0 +1,28 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.importer; + +import org.jdom2.Element; +import org.springframework.stereotype.Service; + +@Service +public interface FileRightsDetector { + + boolean isPublic(Element record); +} diff --git a/src/main/java/de/vzg/oai_importer/importer/Importer.java b/src/main/java/de/vzg/oai_importer/importer/Importer.java index eaf8422..ddc0c2c 100644 --- a/src/main/java/de/vzg/oai_importer/importer/Importer.java +++ b/src/main/java/de/vzg/oai_importer/importer/Importer.java @@ -1,5 +1,7 @@ package de.vzg.oai_importer.importer; +import java.io.IOException; +import java.net.URISyntaxException; import java.util.List; import java.util.Map; @@ -10,10 +12,12 @@ import de.vzg.oai_importer.mycore.MyCoReTargetConfiguration; import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfo; +import javax.xml.transform.TransformerException; + @Service public interface Importer { - boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity record); + boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity record) throws TransformerException, IOException, URISyntaxException; boolean updateRecord(MyCoReTargetConfiguration target, ForeignEntity record, MyCoReObjectInfo object); diff --git a/src/main/java/de/vzg/oai_importer/importer/PPNLIST2MyCoReImporter.java b/src/main/java/de/vzg/oai_importer/importer/PPNLIST2MyCoReImporter.java index e43a879..9312ab5 100644 --- a/src/main/java/de/vzg/oai_importer/importer/PPNLIST2MyCoReImporter.java +++ b/src/main/java/de/vzg/oai_importer/importer/PPNLIST2MyCoReImporter.java @@ -6,6 +6,7 @@ import java.io.InputStream; import java.io.StringReader; import java.net.HttpURLConnection; +import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -14,6 +15,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -28,6 +30,7 @@ import javax.xml.transform.stream.StreamSource; import org.jdom2.Document; +import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.Namespace; import org.jdom2.input.SAXBuilder; @@ -38,28 +41,38 @@ import org.mycore.pica2mods.xsl.Pica2ModsXSLTURIResolver; import org.mycore.pica2mods.xsl.model.Pica2ModsConfig; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.ApplicationContext; import org.springframework.stereotype.Service; import de.vzg.oai_importer.ImporterService; import de.vzg.oai_importer.PicaUtils; +import de.vzg.oai_importer.dfi.DFISRURecordFilter; import de.vzg.oai_importer.foreign.jpa.ForeignEntity; import de.vzg.oai_importer.mapping.jpa.Mapping; import de.vzg.oai_importer.mycore.MODSUtil; import de.vzg.oai_importer.mycore.MyCoReRestAPIService; import de.vzg.oai_importer.mycore.MyCoReTargetConfiguration; +import de.vzg.oai_importer.mycore.MyCoReUtil; +import de.vzg.oai_importer.mycore.api.model.MyCoReFileListDirectory; +import de.vzg.oai_importer.mycore.api.model.MyCoReFileListFile; import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfo; import lombok.SneakyThrows; import lombok.extern.log4j.Log4j2; @Service("PPNLIST2MyCoReImporter") @Log4j2 -public class PPNLIST2MyCoReImporter implements Importer { +public class PPNLIST2MyCoReImporter implements Importer, FileBased { + @Autowired MyCoReRestAPIService restAPIService; - private Map config; + + @Autowired + ApplicationContext context; Namespace picaxml = Namespace.getNamespace("info:srw/schema/5/picaXML-v1.0"); + private Map config; + private static Document getForeignEntityDocument(ForeignEntity record) { Document doc1; try (var sr = new StringReader(record.getMetadata())) { @@ -91,25 +104,42 @@ private static List resolveFiles(ForeignEntity record, List filePP } } // in the new file path we check for the direct file name, because there is no subfolder - int count = 1; - Path currentFilePath; - while (true) { - currentFilePath = newPath.resolve(s + "-" + count + ".pdf"); - if (!Files.exists(currentFilePath)) { - break; - } - files.add(currentFilePath); - count++; + try { + Files.list(newPath) + .filter(p -> { + String fileNameStr = p.getFileName().toString(); + return fileNameStr.startsWith(s + "-") || fileNameStr.startsWith(s + " -"); + }) + .forEach(files::add); + } catch (IOException e) { + log.error("Error while listing new files for record {}", record.getId(), e); } } return files; } - @SneakyThrows + public boolean isPublic(String ppn, Element record) { + String fileRightsDetector = this.config.get("file-rights-detector-service"); + if (fileRightsDetector == null) { + return true; + } + boolean aPublic = this.context.getBean(fileRightsDetector, FileRightsDetector.class).isPublic(record); + log.info("The record {} is {}", ppn, aPublic ? "public" : "not public"); + + return aPublic; + } + + @Override - public boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity record) { + public boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity record) throws TransformerException, IOException, URISyntaxException { + // this should be the ppn from the record in the field 003S@0 or 007G@0 + Document picaXML = getForeignEntityDocument(record); // Convert to Mods - var mods = convertToMods(target, record); + List fileURLs = PicaUtils.getPicaField(picaXML, "017C", "u") + .toList(); + boolean free = !fileURLs.isEmpty() && isPublic(record.getForeignId(), picaXML.getRootElement()); + + var mods = convertToMods(target, record, free, picaXML); var object = MODSUtil.wrapInMyCoReFrame(mods, config.get("base-id"), config.get("status")); MODSUtil.setRecordInfo(object, record.getForeignId(), record.getConfigId()); @@ -120,9 +150,7 @@ public boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity reco var newFilesPath = config.get("new-file-path"); var newPath = Paths.get(newFilesPath); - // this should be the ppn from the record in the field 003S@0 or 007G@0 - Document picaXML = getForeignEntityDocument(record); - var filePPN = Stream.of(PicaUtils.getPicaField(picaXML, "003@0", "0"), + var filePPN = Stream.of(PicaUtils.getPicaField(picaXML, "003@", "0"), PicaUtils.getPicaField(picaXML, "007G", "0")) .flatMap(s -> s) .distinct() @@ -133,12 +161,10 @@ public boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity reco log.info("Found {} files for record {} in filesystem with ppn {}", files.size(), record.getForeignId(), filePPN); - List fileURLs = PicaUtils.getPicaField(picaXML, "017C", "u") - .toList(); - List filteredFileURLs = fileURLs.stream() .filter(url -> (url.startsWith("http") || url.startsWith("https")) && - (url.toLowerCase(Locale.ROOT).endsWith(".pdf") || url.contains("//www.dfi.de/"))) + (url.toLowerCase(Locale.ROOT).endsWith(".pdf") || url.contains("//www.dfi.de/")) + && !url.contains("bnpparibas")) .toList(); // transfer everything @@ -173,17 +199,7 @@ public boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity reco .map(PPNLIST2MyCoReImporter::extractFileName).findFirst().get() : files.get(0).getFileName().toString(); - List classifications; - if (fileURLs.isEmpty()) { - classifications = List.of("derivate_types:content", "mir_access:ipAddressRange"); - } else { - classifications = List.of("derivate_types:content"); - } - - String derivateURL = restAPIService.postDerivate(target, mycoreID, "0", maindoc, classifications, - Collections.emptyList()); - log.info("Created derivate {} for record {}", derivateURL, record.getId()); - String derivateID = derivateURL.substring(derivateURL.lastIndexOf("/") + 1); + String derivateID = createDerivate(target, record, free, mycoreID, maindoc); // prefer files over URL if (!files.isEmpty()) { @@ -218,8 +234,24 @@ public boolean importRecord(MyCoReTargetConfiguration target, ForeignEntity reco return true; } + private String createDerivate(MyCoReTargetConfiguration target, ForeignEntity record, boolean free, String mycoreID, + String maindoc) throws IOException, URISyntaxException { + List classifications; + if (!free) { + classifications = List.of("derivate_types:content", "mir_access:ipAddressRange"); + } else { + classifications = List.of("derivate_types:content"); + } + + String derivateURL = restAPIService.postDerivate(target, mycoreID, "0", maindoc, classifications, + Collections.emptyList()); + log.info("Created derivate {} for record {}", derivateURL, record.getId()); + String derivateID = derivateURL.substring(derivateURL.lastIndexOf("/") + 1); + return derivateID; + } + public ImporterService.Pair downloadFile(String url, int redirectCount) throws IOException { - if(redirectCount > 5) { + if (redirectCount > 5) { throw new IOException("Too many redirects"); } HttpURLConnection con = (HttpURLConnection) new URL(url).openConnection(); @@ -236,7 +268,7 @@ public ImporterService.Pair downloadFile(String url, int redirec if (real.startsWith("/") && url.contains("//")) { real = url.substring(0, url.indexOf('/', url.indexOf("//") + 2)) + real.substring(1); } - return downloadFile(real, redirectCount+1); + return downloadFile(real, redirectCount + 1); } else { try (InputStream is = con.getInputStream()) { byte[] bytes = is.readAllBytes(); @@ -247,13 +279,16 @@ public ImporterService.Pair downloadFile(String url, int redirec } } - - @SneakyThrows @Override public String testRecord(MyCoReTargetConfiguration target, ForeignEntity record) { - - String mods = convertToMods(target, record); + // this should be the ppn from the record in the field 003S@0 or 007G@0 + Document picaXML = getForeignEntityDocument(record); + // Convert to Mods + List fileURLs = PicaUtils.getPicaField(picaXML, "017C", "u") + .toList(); + boolean free = !fileURLs.isEmpty() && isPublic(record.getForeignId(), picaXML.getRootElement()); + String mods = convertToMods(target, record, free, picaXML); Document document = MODSUtil.wrapInMyCoReFrame(mods, config.get("base-id"), config.get("status")); MODSUtil.setRecordInfo(document, record.getForeignId(), record.getConfigId()); @@ -264,7 +299,13 @@ public String testRecord(MyCoReTargetConfiguration target, ForeignEntity record) @SneakyThrows @Override public boolean updateRecord(MyCoReTargetConfiguration target, ForeignEntity record, MyCoReObjectInfo objectInfo) { - var mods = convertToMods(target, record); + // this should be the ppn from the record in the field 003S@0 or 007G@0 + Document picaXML = getForeignEntityDocument(record); + // Convert to Mods + List fileURLs = PicaUtils.getPicaField(picaXML, "017C", "u") + .toList(); + boolean free = !fileURLs.isEmpty() && isPublic(record.getForeignId(), picaXML.getRootElement()); + var mods = convertToMods(target, record, free, picaXML); var object = MODSUtil.wrapInMyCoReFrame(mods, config.get("base-id"), config.get("status")); MODSUtil.setRecordInfo(object, record.getForeignId(), record.getConfigId()); @@ -274,11 +315,9 @@ public boolean updateRecord(MyCoReTargetConfiguration target, ForeignEntity reco return true; } - private String convertToMods(MyCoReTargetConfiguration target, ForeignEntity record) throws TransformerException { + private String convertToMods(MyCoReTargetConfiguration target, ForeignEntity record, boolean free, Document picaXml) + throws TransformerException { String resultStr; - Document doc1 = getForeignEntityDocument(record); - List fileURLs = PicaUtils.getPicaField(doc1, "017C", "u") - .toList(); Pica2ModsConfig pica2ModsConfig = new Pica2ModsConfig(); pica2ModsConfig.setUnapiUrl("https://unapi.k10plus.de/"); @@ -302,11 +341,11 @@ private String convertToMods(MyCoReTargetConfiguration target, ForeignEntity rec transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); transformer.setParameter("WebApplicationBaseURL", pica2ModsConfig.getMycoreUrl()); - transformer.setParameter("RestrictedAccess", fileURLs.isEmpty() ? "true" : "false"); + transformer.setParameter("RestrictedAccess", !free ? "true" : "false"); ByteArrayOutputStream baos = new ByteArrayOutputStream(); javax.xml.transform.Result result = new javax.xml.transform.stream.StreamResult(baos); - transformer.transform(new JDOMSource(doc1), result); + transformer.transform(new JDOMSource(picaXml), result); resultStr = baos.toString(StandardCharsets.UTF_8); return resultStr; } @@ -316,6 +355,144 @@ public List checkMapping(MyCoReTargetConfiguration target, ForeignEntit return Collections.emptyList(); } + @Override + public List listMissingFiles(MyCoReTargetConfiguration target, MyCoReObjectInfo info, + ForeignEntity record) throws IOException, URISyntaxException { + // check files + var davPath = config.get("file-path"); + var path = Paths.get(davPath); + + var newFilesPath = config.get("new-file-path"); + var newPath = Paths.get(newFilesPath); + + // this should be the ppn from the record in the field 003S@0 or 007G@0 + Document picaXML = getForeignEntityDocument(record); + var filePPN = Stream.of(PicaUtils.getPicaField(picaXML, "003@", "0"), + PicaUtils.getPicaField(picaXML, "007G", "0")) + .flatMap(s -> s) + .distinct() + .collect(Collectors.toList()); + + List files = resolveFiles(record, filePPN, path, newPath); + + Document object = restAPIService.getObject(target, info.getMycoreId()); + List derivateIDs = MyCoReUtil.getDerivateIDs(object); + HashSet notExistingFiles = new HashSet<>(files); + for (String derivateID : derivateIDs) { + MyCoReFileListDirectory fileListRepository + = restAPIService.getFiles(target, info.getMycoreId(), derivateID); + if (fileListRepository == null || fileListRepository.getFiles() == null) { + log.error("No files found for record {} in derivate {}", record.getId(), derivateID); + continue; + } + fileListRepository.getFiles().stream() + .map(MyCoReFileListFile::getName) + .forEach(ftrm -> { + notExistingFiles.removeIf(p -> p.getFileName().toString().equals(ftrm)); + }); + } + if (!derivateIDs.isEmpty() && !notExistingFiles.isEmpty()) { + log.info("Missing files but derivate present for record {} in derivate {} (skip em)", record.getId(), + derivateIDs); + return List.of(); + } + return new ArrayList<>(notExistingFiles).stream().map(p -> p.getFileName().toString()).toList(); + } + + public List fixMissingFiles(MyCoReTargetConfiguration target, MyCoReObjectInfo info, ForeignEntity record) + throws IOException, URISyntaxException { + // check files + var davPath = config.get("file-path"); + var path = Paths.get(davPath); + + var newFilesPath = config.get("new-file-path"); + var newPath = Paths.get(newFilesPath); + + // this should be the ppn from the record in the field 003S@0 or 007G@0 + Document picaXML = getForeignEntityDocument(record); + var filePPN = Stream.of(PicaUtils.getPicaField(picaXML, "003@", "0"), + PicaUtils.getPicaField(picaXML, "007G", "0")) + .flatMap(s -> s) + .distinct() + .collect(Collectors.toList()); + + List files = resolveFiles(record, filePPN, path, newPath); + + Document object = restAPIService.getObject(target, info.getMycoreId()); + List derivateIDs = MyCoReUtil.getDerivateIDs(object); + HashSet notExistingFiles = new HashSet<>(files); + for (String derivateID : derivateIDs) { + MyCoReFileListDirectory fileListRepository + = restAPIService.getFiles(target, info.getMycoreId(), derivateID); + if (fileListRepository == null || fileListRepository.getFiles() == null) { + log.error("No files found for record {} in derivate {}", record.getId(), derivateID); + continue; + } + fileListRepository.getFiles().stream() + .map(MyCoReFileListFile::getName) + .forEach(ftrm -> { + notExistingFiles.removeIf(p -> p.getFileName().toString().equals(ftrm)); + }); + } + + if (notExistingFiles.isEmpty()) { + log.info("No missing files for record {}", record.getId()); + return List.of(); + } + + String derivateID = derivateIDs.stream().findFirst().orElse(null); + if (derivateID == null) { + List fileURLs = PicaUtils.getPicaField(picaXML, "017C", "u") + .toList(); + + boolean free = !fileURLs.isEmpty() && isPublic(record.getForeignId(), picaXML.getRootElement()); + derivateID + = createDerivate(target, record, free, info.getMycoreId(), files.get(0).getFileName().toString()); + } + + for (Path p : files) { + log.info("Import file {} to {}", p.getFileName(), info.getMycoreId()); + restAPIService.putFiles(target, info.getMycoreId(), derivateID, p.getFileName().toString(), + Files.newInputStream(p)); + log.info("Imported file {} to {}", p.getFileName(), info.getMycoreId()); + } + + return files.stream().map(p -> p.getFileName().toString()).toList(); + } + + @Override + public List listImportableFiles(MyCoReTargetConfiguration target, ForeignEntity record) + throws IOException, URISyntaxException { + // check files + var davPath = config.get("file-path"); + var path = Paths.get(davPath); + + var newFilesPath = config.get("new-file-path"); + var newPath = Paths.get(newFilesPath); + + // this should be the ppn from the record in the field 003S@0 or 007G@0 + Document picaXML = getForeignEntityDocument(record); + var filePPN = Stream.of(PicaUtils.getPicaField(picaXML, "003@", "0"), + PicaUtils.getPicaField(picaXML, "007G", "0")) + .flatMap(s -> s) + .distinct() + .collect(Collectors.toList()); + + List files = resolveFiles(record, filePPN, path, newPath); + return files.stream().map(p -> p.getFileName().toString()).toList(); + } + + public boolean shouldNotBeImported(ForeignEntity record, MyCoReObjectInfo info) throws IOException, JDOMException { + String metadata = record.getMetadata(); + SAXBuilder saxBuilder = new SAXBuilder(); + + Document doc = saxBuilder.build(new StringReader(metadata)); + DFISRURecordFilter dfisruRecordFilter = new DFISRURecordFilter(); + boolean filter = dfisruRecordFilter.filter(doc, null); + //log.info("Record {} with mycore id {} should not have been imported", record.getId(), info.getMycoreId()); + return !filter; + } + @Override public void setConfig(Map importerConfig) { this.config = importerConfig; diff --git a/src/main/java/de/vzg/oai_importer/mycore/MyCoReRestAPIService.java b/src/main/java/de/vzg/oai_importer/mycore/MyCoReRestAPIService.java index a662483..1b704e4 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/MyCoReRestAPIService.java +++ b/src/main/java/de/vzg/oai_importer/mycore/MyCoReRestAPIService.java @@ -25,6 +25,7 @@ import de.vzg.oai_importer.mycore.api.MyCoReObjectQuery; import de.vzg.oai_importer.mycore.api.impl.ApacheHttpClientTransferLayer; import de.vzg.oai_importer.mycore.api.impl.MyCoReV2JDOMClient; +import de.vzg.oai_importer.mycore.api.model.MyCoReFileListDirectory; import de.vzg.oai_importer.mycore.api.model.MyCoReObjectList; @Service @@ -213,6 +214,18 @@ public void putFiles(MyCoReTargetConfiguration target, String objectID, String d client.putFile(url, objectID, derivativeID, authenticate, "/"+filename, is); } + public MyCoReFileListDirectory getFiles(MyCoReTargetConfiguration target, String objectID, String derivativeID) + throws IOException, URISyntaxException { + String url = target.getUrl(); + MyCoReV2JDOMClient client = new MyCoReV2JDOMClient(new ApacheHttpClientTransferLayer()); + + String authenticate = authenticate(target); + if (authenticate == null) { + throw new IOException("Could not authenticate"); + } + return client.getFiles(url, objectID, derivativeID, authenticate); + } + public List getClassificationCategories(MyCoReTargetConfiguration target, String classId) throws IOException, URISyntaxException { String url = target.getUrl(); diff --git a/src/main/java/de/vzg/oai_importer/mycore/MyCoReSynchronizeService.java b/src/main/java/de/vzg/oai_importer/mycore/MyCoReSynchronizeService.java index 6d226aa..82ee4f5 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/MyCoReSynchronizeService.java +++ b/src/main/java/de/vzg/oai_importer/mycore/MyCoReSynchronizeService.java @@ -12,7 +12,6 @@ import de.vzg.oai_importer.mycore.api.MyCoReObjectQuery; import de.vzg.oai_importer.mycore.api.model.MyCoReObjectList; -import de.vzg.oai_importer.mycore.api.model.MyCoReObjectListEntry; import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfo; import de.vzg.oai_importer.mycore.jpa.MyCoReObjectInfoRepository; import lombok.extern.log4j.Log4j2; @@ -46,50 +45,60 @@ public List synchronize(MyCoReTargetConfiguration target) thro while (hasMore) { MyCoReObjectList objects = mycoreRest.getObjects(target, query); - for (MyCoReObjectListEntry entry : objects.getEntries()) { - MyCoReObjectInfo info = mycoreRepo.findByMycoreIdAndRepository(entry.getObjectID(), target.getUrl()); - if (info == null) { - info = new MyCoReObjectInfo(); - } - info.setMycoreId(entry.getObjectID()); - info.setRepository(target.getUrl()); - - Document object = mycoreRest.getObject(target, entry.getObjectID()); - - String parent = MODSUtil.getParent(object); - info.setParentMycoreId(parent); - - OffsetDateTime createDate = MODSUtil.getCreateDate(object); - info.setCreated(createDate); - - OffsetDateTime lastModified = MODSUtil.getLastModified(object); - info.setLastModified(lastModified); - - MODSUtil.MODSRecordInfo recordInfo = MODSUtil.getRecordInfo(object); - if (recordInfo != null) { - String id = recordInfo.id(); - if (id != null) { - info.setImportID(id); - } - String source = recordInfo.url(); - if (source != null) { - info.setImportURL(source); - } - } - - String createdBy = MODSUtil.getCreatedBy(object); - if (createdBy == null) { - log.error("Could not extract createdBy from " + entry.getObjectID()); - continue; - } - info.setCreatedBy(createdBy); - - String state = MODSUtil.getState(object); - info.setState(state); - - infos.add(info); - mycoreRepo.save(info); - } + objects.getEntries().stream() + .parallel() + .forEach(entry -> { + MyCoReObjectInfo info = mycoreRepo + .findByMycoreIdAndRepository(entry.getObjectID(), target.getUrl()); + if (info == null) { + info = new MyCoReObjectInfo(); + } + info.setMycoreId(entry.getObjectID()); + info.setRepository(target.getUrl()); + + Document object = null; + try { + object = mycoreRest.getObject(target, entry.getObjectID()); + } catch (IOException|URISyntaxException e) { + log.error("Could not fetch object " + entry.getObjectID(), e); + return; + } + + String parent = MODSUtil.getParent(object); + info.setParentMycoreId(parent); + + OffsetDateTime createDate = MODSUtil.getCreateDate(object); + info.setCreated(createDate); + + OffsetDateTime lastModified = MODSUtil.getLastModified(object); + info.setLastModified(lastModified); + + MODSUtil.MODSRecordInfo recordInfo = MODSUtil.getRecordInfo(object); + if (recordInfo != null) { + String id = recordInfo.id(); + if (id != null) { + info.setImportID(id); + } + String source = recordInfo.url(); + if (source != null) { + info.setImportURL(source); + } + } + + String createdBy = MODSUtil.getCreatedBy(object); + if (createdBy == null) { + log.error("Could not extract createdBy from " + entry.getObjectID()); + return; + } + info.setCreatedBy(createdBy); + + String state = MODSUtil.getState(object); + info.setState(state); + + infos.add(info); + mycoreRepo.save(info); + }); + hasMore = objects.getEntries().size() == 1000; query.setOffset(query.getOffset() + 1000); } diff --git a/src/main/java/de/vzg/oai_importer/mycore/MyCoReUtil.java b/src/main/java/de/vzg/oai_importer/mycore/MyCoReUtil.java index 47c4f59..f95c740 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/MyCoReUtil.java +++ b/src/main/java/de/vzg/oai_importer/mycore/MyCoReUtil.java @@ -1,17 +1,20 @@ package de.vzg.oai_importer.mycore; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + import org.jdom2.Document; import org.jdom2.Element; -import java.util.Optional; - public class MyCoReUtil { - public static Document createDerivate(String base_id, String parent, String mainFile) { + public static Document createDerivate(String baseId, String parent, String mainFile) { Element mycorederivateElement = new Element("mycorederivate"); Document document = new Document(mycorederivateElement); - mycorederivateElement.setAttribute("ID", base_id + "_00000000"); + mycorederivateElement.setAttribute("ID", baseId + "_00000000"); Element derivateElement = new Element("derivate"); mycorederivateElement.addContent(derivateElement); @@ -55,6 +58,20 @@ public static Document createDerivate(String base_id, String parent, String main return document; } + public static List getDerivateIDs(Document insertedDerivate) { + return Optional.ofNullable(insertedDerivate.getRootElement()) + .map(e -> e.getChild("structure")) + .stream() + .map(e -> e.getChild("derobjects")) + .filter(Objects::nonNull) + .flatMap(e -> e.getChildren("derobject").stream()) + .filter(Objects::nonNull) + .map(e -> e.getAttributeValue("href", MODSUtil.XLINK_NAMESPACE)) + .filter(Objects::nonNull) + + .collect(Collectors.toList()); + } + public static void setMainFile(Document insertedDerivate, String mainFile) { Optional.ofNullable(insertedDerivate.getRootElement()) .map(e -> e.getChild("derivate")) diff --git a/src/main/java/de/vzg/oai_importer/mycore/api/MCRV2RestClient.java b/src/main/java/de/vzg/oai_importer/mycore/api/MCRV2RestClient.java index fa1627d..4505b11 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/api/MCRV2RestClient.java +++ b/src/main/java/de/vzg/oai_importer/mycore/api/MCRV2RestClient.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import de.vzg.oai_importer.mycore.api.model.MyCoReFileListDirectory; import de.vzg.oai_importer.mycore.api.model.MyCoReObjectList; import de.vzg.oai_importer.mycore.api.transfer.RequestParameterAdapter; import de.vzg.oai_importer.mycore.api.transfer.ResultMapper; @@ -307,6 +308,29 @@ public String putFile(String repositoryURL, } + public MyCoReFileListDirectory getFiles(String url, String objectID, String derivativeID, String authenticate) + throws IOException, URISyntaxException { + HashMap> parameters = new HashMap<>(); + HashMap headers = new HashMap<>(); + + applyAuth(authenticate, headers); + adaptRequestParameters(parameters, headers); + + TransferResult result = transferLayer.get( + url + API_V_2_OBJECTS + "/" + objectID + "/derivates/" + derivativeID + "/contents", headers, parameters); + int i = result.statusCode(); + + if (i != 200) { + throw new RuntimeException("Error while getting files: " + result.statusMessage()); + } + + try (InputStream inputStream = result.inputStream()) { + return resultMapper.mapFileList(inputStream); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + private void applyQueryParameters(MyCoReObjectQuery query, HashMap> parameters) { DateTimeFormatter formatter = DateTimeFormatter.ofPattern("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH) .withZone(ZoneId.of("GMT")); diff --git a/src/main/java/de/vzg/oai_importer/mycore/api/impl/JDOM2ResultMapper.java b/src/main/java/de/vzg/oai_importer/mycore/api/impl/JDOM2ResultMapper.java index af90d7d..c4d760f 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/api/impl/JDOM2ResultMapper.java +++ b/src/main/java/de/vzg/oai_importer/mycore/api/impl/JDOM2ResultMapper.java @@ -1,15 +1,17 @@ package de.vzg.oai_importer.mycore.api.impl; -import de.vzg.oai_importer.mycore.api.transfer.ResultMapper; -import de.vzg.oai_importer.mycore.api.model.MyCoReObjectList; -import jakarta.xml.bind.JAXBContext; -import jakarta.xml.bind.JAXBException; +import java.io.IOException; +import java.io.InputStream; + import org.jdom2.Document; import org.jdom2.JDOMException; import org.jdom2.input.SAXBuilder; -import java.io.IOException; -import java.io.InputStream; +import de.vzg.oai_importer.mycore.api.model.MyCoReFileListDirectory; +import de.vzg.oai_importer.mycore.api.model.MyCoReObjectList; +import de.vzg.oai_importer.mycore.api.transfer.ResultMapper; +import jakarta.xml.bind.JAXBContext; +import jakarta.xml.bind.JAXBException; public class JDOM2ResultMapper implements ResultMapper { @@ -31,4 +33,14 @@ public MyCoReObjectList mapObjectList(InputStream is) throws IOException { throw new IOException(e); } } + + @Override + public MyCoReFileListDirectory mapFileList(InputStream is) throws IOException { + try { + JAXBContext context = JAXBContext.newInstance(MyCoReFileListDirectory.class); + return (MyCoReFileListDirectory) context.createUnmarshaller().unmarshal(is); + } catch (JAXBException e) { + throw new IOException(e); + } + } } diff --git a/src/main/java/de/vzg/oai_importer/mycore/api/model/MyCoReFileListDirectory.java b/src/main/java/de/vzg/oai_importer/mycore/api/model/MyCoReFileListDirectory.java new file mode 100644 index 0000000..5fc183c --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/mycore/api/model/MyCoReFileListDirectory.java @@ -0,0 +1,81 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.mycore.api.model; + +import java.time.Instant; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonFormat; + +import jakarta.xml.bind.annotation.XmlAttribute; +import jakarta.xml.bind.annotation.XmlElement; +import jakarta.xml.bind.annotation.XmlRootElement; +import jakarta.xml.bind.annotation.adapters.XmlJavaTypeAdapter; + +@XmlRootElement(name = "directory") +public class MyCoReFileListDirectory { + + private Instant lastModified; + + private String name; + + private List directories; + + private List files; + + @XmlElement(name = "directory", required = false) + public List getDirectories() { + return directories; + } + + public void setDirectories(List directories) { + this.directories = directories; + } + + @XmlElement(name = "file", required = false) + public List getFiles() { + return files; + } + + public void setFiles(List files) { + this.files = files; + } + + @XmlAttribute(name = "lastModified", required = true) + @XmlJavaTypeAdapter(value = MyCoReInstantXMLAdapter.class) + @JsonFormat(shape = JsonFormat.Shape.STRING) + public Instant getLastModified() { + return lastModified; + } + + public void setLastModified(Instant lastModified) { + this.lastModified = lastModified; + } + + @XmlAttribute(name = "name", required = false) + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + +} diff --git a/src/main/java/de/vzg/oai_importer/mycore/api/model/MyCoReFileListFile.java b/src/main/java/de/vzg/oai_importer/mycore/api/model/MyCoReFileListFile.java new file mode 100644 index 0000000..efb1475 --- /dev/null +++ b/src/main/java/de/vzg/oai_importer/mycore/api/model/MyCoReFileListFile.java @@ -0,0 +1,88 @@ +/* + * This file is part of *** M y C o R e *** + * See http://www.mycore.de/ for details. + * + * MyCoRe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MyCoRe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with MyCoRe. If not, see . + */ + +package de.vzg.oai_importer.mycore.api.model; + +import java.time.Instant; + +import com.fasterxml.jackson.annotation.JsonFormat; + +import jakarta.xml.bind.annotation.XmlAttribute; +import jakarta.xml.bind.annotation.XmlRootElement; +import jakarta.xml.bind.annotation.adapters.XmlJavaTypeAdapter; + +@XmlRootElement(name = "file") +public class MyCoReFileListFile { + + private Instant lastModified; + + private String name; + + private String md5; + + private String mimeType; + + private long size; + + @XmlAttribute(name = "lastModified", required = true) + @XmlJavaTypeAdapter(value = MyCoReInstantXMLAdapter.class) + @JsonFormat(shape = JsonFormat.Shape.STRING) + public Instant getLastModified() { + return lastModified; + } + + public void setLastModified(Instant lastModified) { + this.lastModified = lastModified; + } + + @XmlAttribute(name = "name", required = true) + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @XmlAttribute(name = "lastModified", required = true) + public String getMd5() { + return md5; + } + + public void setMd5(String md5) { + this.md5 = md5; + } + + @XmlAttribute(name = "mimeType", required = true) + public String getMimeType() { + return mimeType; + } + + public void setMimeType(String mimeType) { + this.mimeType = mimeType; + } + + @XmlAttribute(name = "size", required = true) + public long getSize() { + return size; + } + + public void setSize(long size) { + this.size = size; + } +} diff --git a/src/main/java/de/vzg/oai_importer/mycore/api/transfer/ResultMapper.java b/src/main/java/de/vzg/oai_importer/mycore/api/transfer/ResultMapper.java index f852c5b..d243fa2 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/api/transfer/ResultMapper.java +++ b/src/main/java/de/vzg/oai_importer/mycore/api/transfer/ResultMapper.java @@ -1,5 +1,6 @@ package de.vzg.oai_importer.mycore.api.transfer; +import de.vzg.oai_importer.mycore.api.model.MyCoReFileListDirectory; import de.vzg.oai_importer.mycore.api.model.MyCoReObjectList; import java.io.IOException; @@ -10,4 +11,5 @@ public interface ResultMapper { MyCoReObjectList mapObjectList(InputStream is) throws IOException; + MyCoReFileListDirectory mapFileList(InputStream is) throws IOException; } diff --git a/src/main/java/de/vzg/oai_importer/mycore/jpa/MyCoReObjectInfo.java b/src/main/java/de/vzg/oai_importer/mycore/jpa/MyCoReObjectInfo.java index 53e8462..524c63c 100644 --- a/src/main/java/de/vzg/oai_importer/mycore/jpa/MyCoReObjectInfo.java +++ b/src/main/java/de/vzg/oai_importer/mycore/jpa/MyCoReObjectInfo.java @@ -17,8 +17,20 @@ @Entity @Table(name = "mycore_object_info", - uniqueConstraints = { @UniqueConstraint(columnNames = { "mycore_id", "repository" }) - }) + uniqueConstraints = { @UniqueConstraint(columnNames = { "mycore_id", "repository" }), + }, + indexes = { + @jakarta.persistence.Index(name = "mycore_id_idx", columnList = "mycore_id"), + @jakarta.persistence.Index(name = "repository_idx", columnList = "repository"), + @jakarta.persistence.Index(name = "parent_mycore_id_idx", columnList = "parent_mycore_id"), + @jakarta.persistence.Index(name = "import_id_idx", columnList = "import_id"), + @jakarta.persistence.Index(name = "import_source_idx", columnList = "import_source"), + @jakarta.persistence.Index(name = "last_modified_idx", columnList = "last_modified"), + @jakarta.persistence.Index(name = "created_idx", columnList = "created"), + @jakarta.persistence.Index(name = "createdBy_idx", columnList = "createdBy"), + @jakarta.persistence.Index(name = "state_idx", columnList = "state") + } +) @Getter @Setter @NoArgsConstructor diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index e2e3045..7e35b95 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,7 +1,30 @@ -spring.datasource.url=jdbc:h2:file:./data/database;AUTO_SERVER=TRUE +spring.datasource.url=jdbc:h2:file:../data/database;AUTO_SERVER=TRUE spring.datasource.driverClassName=org.h2.Driver +spring.jpa.database-platform=org.hibernate.dialect.H2Dialect spring.datasource.username=sa spring.datasource.password=password -spring.jpa.database-platform=org.hibernate.dialect.H2Dialect spring.jpa.generate-ddl=true +#importer.sru-sources.gvk.query-pattern=pica.sxn={date} and ((pica.sge="615" and (pica.exk="pressemappe" or pica.exk="dfi aktuell" or pica.exk="afa" or pica.exk="dfi compact" or pica.exk="tondokument" or pica.exk="video" or pica.exk="abschlussarbeit" or pica.exk="Zeitschriftenaufsatz" or pica.exk="Sicherheitskopie" or pica.exk="GFfK" or pica.exk="PA-Volltext")) or (pica.sge="lg 3" and (pica.exk="pressemappe" or pica.exk="Karikatur" or pica.exk="PA-Volltext" or pica.exk="Presseartikel" or pica.exk="Sicherheitskopie"))) +#importer.sru-sources.gvk.url=https://sru.k10plus.de/opac-de-627 +#importer.sru-sources.gvk.date-overwrite= +#importer.sru-sources.gvk.newest-date= +#importer.sru-sources.gvk.record-filter-service=DFISRURecordFilter +# +##importer.ppn-lists.gvk.file-paths=/Users/paschty/ppnlist.txt +# +#importer.targets.dfi.url=https://repositorium.dfi.de/ +#importer.targets.dfi.user= +#importer.targets.dfi.password= +# +#importer.jobs.import-dfi.source-config-id=gvk +#importer.jobs.import-dfi.target-config-id=dfi +#importer.jobs.import-dfi.importer=PPNLIST2MyCoReImporter +#importer.jobs.import-dfi.importer-config.base-id=dfi_mods +#importer.jobs.import-dfi.importer-config.status=imported +#importer.jobs.import-dfi.importer-config.file-path=/Users/paschty/Downloads/dfi/dfiexport/ +#importer.jobs.import-dfi.importer-config.new-file-path=/Users/paschty/Downloads/dfi/new/ +#importer.jobs.import-dfi.importer-config.stylesheet=xsl/pica2mods_dfi.xsl +#importer.jobs.import-dfi.importer-config.file-rights-detector-service=DFIFileRightsDetector +#importer.jobs.import-dfi.auto=false + diff --git a/src/main/resources/templates/job_records.html b/src/main/resources/templates/job_records.html index 2dd31c6..2f120b3 100644 --- a/src/main/resources/templates/job_records.html +++ b/src/main/resources/templates/job_records.html @@ -20,6 +20,7 @@

Zu importierende Dokumente

diff --git a/src/main/resources/templates/job_records_file_check.html b/src/main/resources/templates/job_records_file_check.html new file mode 100644 index 0000000..1b2483e --- /dev/null +++ b/src/main/resources/templates/job_records_file_check.html @@ -0,0 +1,138 @@ + + + + + + +
+
+
+
+
+
+
+
+
+

Zu importierende Dokumente

+
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + +
IdMetadatenDatumGelöschtAktion
+ + + + + + + + + + + + + + + Test + Import +
+
+
+
+
+ +
+
+
+
+ + + \ No newline at end of file diff --git a/src/main/resources/templates/job_update.html b/src/main/resources/templates/job_update.html index b2da77b..debe332 100644 --- a/src/main/resources/templates/job_update.html +++ b/src/main/resources/templates/job_update.html @@ -30,6 +30,7 @@

Bereits importierte Dokumente

@@ -106,7 +107,7 @@ th:href="@{'/jobs/' + ${jobID} + '/update/?page=' + ${page -1}}" th:text="${page}">1
  • Nächste + th:href="@{'/jobs/' + ${jobID} + '/update/?page=' +${records.getNumber()+1}}">Nächste
  • diff --git a/src/main/resources/xsl/dfi/pica2mods-dfi-relatedItem.xsl b/src/main/resources/xsl/dfi/pica2mods-dfi-relatedItem.xsl index cee228d..71ad992 100644 --- a/src/main/resources/xsl/dfi/pica2mods-dfi-relatedItem.xsl +++ b/src/main/resources/xsl/dfi/pica2mods-dfi-relatedItem.xsl @@ -246,6 +246,28 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/resources/xsl/pica2mods_dfi.xsl b/src/main/resources/xsl/pica2mods_dfi.xsl index f4d363b..62868e1 100644 --- a/src/main/resources/xsl/pica2mods_dfi.xsl +++ b/src/main/resources/xsl/pica2mods_dfi.xsl @@ -90,26 +90,26 @@ - + - + - + - + - + - +