Skip to content

Commit

Permalink
[ALLI-7732] Add rules for manipulating fields in datasources.ini. (#128)
Browse files Browse the repository at this point in the history
Rules allow copying, moving and deleting of fields.
  • Loading branch information
EreMaijala authored Jan 10, 2023
1 parent 0c4ab6b commit 9498ed1
Show file tree
Hide file tree
Showing 6 changed files with 239 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Anything marked with [**BC**] is known to affect backward compatibility with pre
- Colorized output by message type.
- Added support for hierarchical categories based on [HILCC](https://www1.columbia.edu/sec/cu/libraries/bts/hilcc/). See [useHILCC driver param](https://github.com/NatLibFi/RecordManager/wiki/Data-Source-Configuration#possible-settings-for-driverparams) for more information.
- [**BC**] Added support for UNICODE folding of key fields. Enabled by default and replaces the internal folding table, but can be disabled or configured with the `Site/key_folding_rules` setting in recordmanager.ini. It is recommended that `./console records:renormalize` is run to update all keys in the database to use the rules.
- Added support for specifying rules (fieldRules[] in datasources.ini) for copying, moving and deleting fields before they are sent to Solr.

### Changed

Expand Down
12 changes: 11 additions & 1 deletion conf/datasources.ini.sample
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
; solrIdPrefix Prefix to be used with the record ID's in Solr (optional, overrides idPrefix but does not modify the prefix used in the Database).
; indexMergedParts Whether to index merged component parts also separately with hidden_component_boolean field set to true. Defaults to true.
; indexUnprefixedIds Whether to index record ID's without the source prefix. Default is false.
; {field}_mapping A mapping file (.ini-style) to be used to map values of {field} when updating Solr index. Useful for e.g. mapping multiple location codes to one.
; {field}_mapping A mapping file (.ini-style) to be used to map values of a field when updating Solr index. Useful for e.g. mapping multiple location codes to one.
; institutionInBuilding If building hierarchy is in use, what to add as institution on the top level:
; institution Add the institution code (default)
; source Add the source id
Expand All @@ -74,6 +74,16 @@
; extraFields[] An array of extra fields added to each record from this data source when sending records to Solr.
; Use format fieldname:contents, e.g.
; extraFields[] = sector_str_mv:library
; fieldRules[] An array of rules for changing fields before they're sent to Solr. Allows deleting, copying and moving of fields.
; Rules are processed in order before processing mappings and converting hierarchical facets.
; The following rules are available:
; delete <field> Removes a field
; copy <from> <to> [default] Copies field <from> to <to>. A default value can be specified in case the field is empty or doesn't exist.
; move <from> <to> [default] Moves field <from> to <to>. A default value can be specified in case the field is empty or doesn't exist.
; Examples:
; fieldRules[] = "delete collection"
; fieldRules[] = "copy building building2_str_mv MAIN"
; fieldRules[] = "move author author2"
; driverParams[] An array of extra parameters that can be used to customize record driver behavior
; enrichments[] An array of enrichment class names that can be used to enrich records. ",final" can be appended to indicate that enrichment is executed to the
; final record after mappings etc. have been processed. By default it is executed before mappings, normalization etc.
Expand Down
102 changes: 94 additions & 8 deletions src/RecordManager/Base/Solr/SolrUpdater.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*
* PHP version 7
*
* Copyright (C) The National Library of Finland 2012-2022.
* Copyright (C) The National Library of Finland 2012-2023.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
Expand Down Expand Up @@ -61,6 +61,38 @@ class SolrUpdater
{
use \RecordManager\Base\Record\CreateRecordTrait;

/**
* Field processing rule for copy
*
* @var int
*/
public const RULE_COPY = 1;

/**
* Field processing rule for delete
*
* @var int
*/
public const RULE_DELETE = 2;

/**
* Field processing rule for move
*
* @var int
*/
public const RULE_MOVE = 3;

/**
* Mappings from keywords to field processing rules
*
* @var array
*/
protected $ruleMap = [
'copy' => self::RULE_COPY,
'delete' => self::RULE_DELETE,
'move' => self::RULE_MOVE,
];

/**
* Database
*
Expand Down Expand Up @@ -1718,6 +1750,23 @@ protected function initDatasources($dataSourceConfig = null)
if (isset($settings['index']) && !$settings['index']) {
$this->nonIndexedSources[] = $source;
}

foreach ($settings['fieldRules'] ?? [] as $ruleStr) {
$ruleParts = explode(' ', $ruleStr);
$rule['op'] = $this->ruleMap[$ruleParts[0]] ?? null;
if (null === $rule['op']
|| (self::RULE_DELETE === $rule['op'] && empty($ruleParts[1]))
|| (self::RULE_DELETE !== $rule['op'] && empty($ruleParts[2]))
) {
throw new \Exception(
"Invalid field rule for $source: '$ruleStr'"
);
}
$rule['src'] = $ruleParts[1];
$rule['dst'] = $ruleParts[2] ?? null;
$rule['extra'] = $ruleParts[3] ?? null;
$this->settings[$source]['fieldProcessingRules'][] = $rule;
}
}
}

Expand Down Expand Up @@ -2132,9 +2181,12 @@ protected function augmentAndProcessFields(
$this->addInstitutionToBuilding($data, $source, $settings);
}

// Process any field rules:
$this->processFieldRules($source, $data);

// Map field values according to any mapping files
if (!$this->disableMappings) {
$data = $this->fieldMapper->mapValues($source, $data);
$this->fieldMapper->mapValues($source, $data);
}

// Special case: Special values for building (institution/location).
Expand Down Expand Up @@ -2278,12 +2330,48 @@ protected function normalizeFields(array &$data)
}
}

/**
* Process any field rules
*
* @param string $source Source ID
* @param array<string, string|array<int, string>> $data Field array
*
* @return void
*
* @psalm-suppress DuplicateArrayKey
*/
protected function processFieldRules(string $source, array &$data): void
{
foreach ($this->settings[$source]['fieldProcessingRules'] ?? [] as $rule) {
$src = $rule['src'];
if (!($fieldValue = ($data[$src] ?? null) ?: $rule['extra'])) {
continue;
}
$dst = $rule['dst'];
if (in_array($rule['op'], [self::RULE_COPY, self::RULE_MOVE])) {
if (!isset($data[$dst])) {
$data[$dst] = $fieldValue;
} else {
$data[$dst] = [
...(array)$data[$dst],
...(array)$fieldValue
];
}
}
if (in_array($rule['op'], [self::RULE_DELETE, self::RULE_MOVE])
&& isset($data[$src])
) {
unset($data[$src]);
}
}
}

/**
* Prefix building with institution code according to the settings
*
* @param array $data Record data
* @param string $source Source ID
* @param array $settings Data source settings
* @param array<string, string|array<int, string>> $data Record data
* @param string $source Source ID
* @param array $settings Data source settings
*
* @return void
*/
Expand Down Expand Up @@ -2318,9 +2406,7 @@ protected function addInstitutionToBuilding(&$data, $source, $settings)
// mapping tables
if (is_array($building)) {
// Predefined hierarchy, prepend to it
if (!empty($building)) {
array_unshift($building, $institutionCode);
}
array_unshift($building, $institutionCode);
} elseif ($building !== '') {
$building = "$institutionCode/$building";
} elseif ('building' === $field) {
Expand Down
8 changes: 4 additions & 4 deletions src/RecordManager/Base/Utils/FieldMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*
* PHP version 7
*
* Copyright (C) The National Library of Finland 2012-2017.
* Copyright (C) The National Library of Finland 2012-2023.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
Expand Down Expand Up @@ -159,12 +159,12 @@ public function mapFormat(string $source, array $format): array
/**
* Map all fields in an array
*
* @param string $source Source ID
* @param array $data Fields to process
* @param string $source Source ID
* @param array<string, string|array<int, string>> $data Fields to process
*
* @return array
*/
public function mapValues($source, $data)
public function mapValues($source, &$data)
{
$settings = $this->settings[$source];
foreach ($settings['mappingFiles'] as $field => $mappingFile) {
Expand Down
134 changes: 128 additions & 6 deletions tests/RecordManagerTest/Base/Solr/SolrUpdaterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*
* PHP version 7
*
* Copyright (C) The National Library of Finland 2020-2021.
* Copyright (C) The National Library of Finland 2020-2023.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
Expand Down Expand Up @@ -98,7 +98,7 @@ public function testProcessSingleRecord()
);

$date = strtotime('2020-10-20 13:01:00');
$mongoRecord = [
$dbRecord = [
'_id' => $record->getID(),
'oai_id' => '',
'linking_id' => $record->getLinkingIDs(),
Expand All @@ -111,7 +111,7 @@ public function testProcessSingleRecord()
'original_data' => $record->serialize(),
'normalized_data' => null,
];
$result = $solrUpdater->processSingleRecord($mongoRecord);
$result = $solrUpdater->processSingleRecord($dbRecord);

$maxlen = function ($array) {
return max(
Expand Down Expand Up @@ -147,13 +147,135 @@ function ($s) {
$this->assertEquals(40, mb_strlen($record['title_sort'], 'UTF-8'));
}

/**
* Data provider for testFieldProcessingRules
*
* @return array
*/
public function processSingleRecordProvider(): array
{
return [
[
[
'copy foo newfield'
],
[]
],
[
[
'copy foo newfield DEFAULT'
],
[
'newfield' => 'DEFAULT',
]
],
[
[
'copy institution newfield'
],
[
'newfield' => 'Test',
]
],
[
[
'delete institution'
],
[
'institution' => null,
]
],
[
[
'copy institution newfield',
'copy record_format newfield',
'delete institution',
],
[
'newfield' => [
'Test',
'marc',
],
'institution' => null,
]
],
[
[
'move institution newfield DEFAULT',
'move institution newfield DEFAULT2',
],
[
'newfield' => [
'Test',
'DEFAULT2',
],
'institution' => null,
]
],
];
}

/**
* Test field processing rules
*
* @param array $rules Field processing rules
* @param array $expected Expected results
*
* @dataProvider processSingleRecordProvider
*
* @return void
*/
public function testFieldProcessingRules(array $rules, array $expected): void
{
$solrUpdater = $this->getSolrUpdater(
[
'test' => [
'fieldRules' => $rules,
],
],
);

$record = $this->createMarcRecord(
\RecordManager\Base\Record\Marc::class,
'marc-broken.xml'
);

$date = strtotime('2020-10-20 13:01:00');
$dbRecord = [
'_id' => $record->getID(),
'oai_id' => '',
'linking_id' => $record->getLinkingIDs(),
'source_id' => 'test',
'deleted' => false,
'created' => $date,
'updated' => $date,
'date' => $date,
'format' => 'marc',
'original_data' => $record->serialize(),
'normalized_data' => null,
];
$result = $solrUpdater->processSingleRecord($dbRecord);

$this->assertIsArray($result['records'][0]);
$record = $result['records'][0];
foreach ($expected as $field => $value) {
$this->assertEquals($value, $record[$field] ?? null, $field);
}
}

/**
* Create SolrUpdater
*
* @param array $dsConfigOverrides Data source config overrides
*
* @return SolrUpdater
*/
protected function getSolrUpdater()
protected function getSolrUpdater(array $dsConfigOverrides = []): SolrUpdater
{
$dsConfig = array_merge_recursive(
$this->dataSourceConfig,
$dsConfigOverrides
);
$logger = $this->createMock(Logger::class);
$metadataUtils = new \RecordManager\Base\Utils\MetadataUtils(
RECMAN_BASE_PATH,
Expand All @@ -162,7 +284,7 @@ protected function getSolrUpdater()
);
$record = new \RecordManager\Base\Record\Marc(
[],
$this->dataSourceConfig,
$dsConfig,
$logger,
$metadataUtils,
function ($data) {
Expand All @@ -181,7 +303,7 @@ function ($data) {
);
$solrUpdater = new SolrUpdater(
$this->config,
$this->dataSourceConfig,
$dsConfig,
null,
$logger,
$recordPM,
Expand Down
1 change: 1 addition & 0 deletions tests/RecordManagerTest/Base/Utils/FieldMapperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public function testMapValues()
$mapped = $fieldMapper->mapValues('test', $record);

$this->assertIsArray($mapped);
$this->assertEquals($expected, $record);
$this->assertEquals($expected, $mapped);
}

Expand Down

0 comments on commit 9498ed1

Please sign in to comment.