-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28 from JCSDA-internal/feature/add_preprocess
Add a pre-process method
- Loading branch information
Showing
7 changed files
with
307 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
Data Pre-Processor | ||
================== | ||
|
||
The preprocessor looks for the DIRECT_INCLUDE= keyword in the input yaml and | ||
concatenates the associated file at this point in the input file. The result | ||
is written to the output file or standard out if - is specified. | ||
|
||
It is expected that the keyword in the input yaml file will take the following | ||
format: | ||
|
||
.. code-block:: yaml | ||
DIRECT_INCLUDE=/path/to/file/to/be/included | ||
Command line useage | ||
------------------- | ||
|
||
.. code-block:: bash | ||
yp-preprocessor input-file-name -o output-file-name [options] | ||
Type ``yp-preprocessor --help`` for a list of options. See :doc:`cli` for | ||
detail. | ||
|
||
Python useage | ||
------------- | ||
|
||
.. code-block:: python | ||
from yamlprocessor.datapreprocessor import DataPreProcessor | ||
preprocessor = DataPreProcessor() | ||
preprocessor.add_replacements_map(keymap) # optional line | ||
preprocessor.process_yaml(input_file, output_file) | ||
Examples | ||
-------- | ||
|
||
Consider an input YAML file containing the following data: | ||
|
||
.. code-block:: yaml | ||
DIRECT_INCLUDE=a.yaml | ||
hello: | ||
- location: *planet | ||
targets: | ||
- human | ||
- cat | ||
- dog | ||
If ``a.yaml`` contains: | ||
|
||
.. code-block:: yaml | ||
_: | ||
- &planet earth | ||
Running the preprocessor on the input YAML file will yield the following | ||
output: | ||
|
||
.. code-block:: yaml | ||
_: | ||
- &planet earth | ||
hello: | ||
- location: *planet | ||
targets: | ||
- human | ||
- cat | ||
- dog | ||
The preprocessor simply concatenates the contents of `a.yaml` at the correct | ||
place in the input yaml file. This file can then be passed to `yp-data` for | ||
parsing. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ User Guide And Reference | |
|
||
install | ||
basic-usage | ||
data-preprocessor | ||
data-process | ||
schema-process | ||
cli | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#!/usr/bin/env python3 | ||
"""The datapreprocessor looks for the DIRECT_INCLUDE= keyword in the input | ||
yaml and concatenates the associated file at this point in the input | ||
file. The result is written to the output file or standard out if - is | ||
specified. | ||
Example usage: | ||
python datapreprocessor.py <input file> -o <output file> \ | ||
--define JOPA_AUX=/path/to/my/file | ||
python datapreprocessor.py <input file> -o- \ | ||
--define JOPA_AUX=/path/to/my/file | ||
""" | ||
|
||
import argparse | ||
import os | ||
import re | ||
import sys | ||
|
||
|
||
class DataPreProcessor: | ||
|
||
def __init__(self): | ||
self.replacements = os.environ.copy() | ||
|
||
def __replace_placeholders(self, text): | ||
# Create a regex pattern that matches $VAR or ${VAR} | ||
pattern = re.compile(r'\$\{(\w+)\}|\$(\w+)') | ||
|
||
# Function to get the replacement value from env_vars | ||
def replacer(match): | ||
var_name = match.group(1) or match.group(2) | ||
return self.replacements.get(var_name, match.group(0)) | ||
|
||
# Substitute the placeholders with actual values | ||
return pattern.sub(replacer, text) | ||
|
||
def add_replacements_map(self, replacements): | ||
self.replacements.update(replacements) | ||
|
||
def process_yaml(self, in_yaml, out_yaml): | ||
# read yaml file | ||
src_file = open(in_yaml, 'r') | ||
lines = src_file.readlines() | ||
src_file.close() | ||
|
||
# process yaml file | ||
new_line = [] | ||
for iline in lines: | ||
# look for specific pattern in each line | ||
if 'DIRECT_INCLUDE=' in iline: | ||
# retrieve header file | ||
yaml_header_File = iline.split('=')[1].rstrip() | ||
# replace variables in the string | ||
yaml_header_File = self.__replace_placeholders( | ||
yaml_header_File) | ||
# open header file | ||
with open(yaml_header_File, 'r') as file: | ||
auxFileData = file.read() | ||
# update lines for new file | ||
new_line.append(auxFileData) | ||
else: | ||
new_line.append(iline) | ||
# save the result | ||
if out_yaml == '-': | ||
out_file = sys.stdout | ||
else: | ||
out_file = open(out_yaml, 'w') | ||
out_file.writelines(new_line) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
description="Process input and output " | ||
"files with multiple --define options." | ||
) | ||
|
||
# Positional argument for input | ||
parser.add_argument('input_file', type=str, help='Input file') | ||
|
||
# Output file specified | ||
parser.add_argument( | ||
'--output-file', '-o', | ||
metavar='FILENAME', | ||
action="store", | ||
help='Name of output file, "-" for STDOUT' | ||
) | ||
|
||
# Optional | ||
parser.add_argument( | ||
'--define', '-D', | ||
action='append', | ||
help='Key-value pairs in the format key=value', default=[] | ||
) | ||
parser.add_argument( | ||
'--no-environment', '-i', | ||
action='store_true', | ||
default=False, | ||
help='Do not use environment variables in variable substitutions') | ||
|
||
# Parse arguments and print for sanity checking | ||
args = parser.parse_args() | ||
print(f"Input file: {args.input_file}", file=sys.stderr) | ||
print(f"Output file: {args.output_file}", file=sys.stderr) | ||
print(f"Defines: {args.define}", file=sys.stderr) | ||
|
||
# Process define arguments into a dictionary for adding to the | ||
# environment variable dictionary | ||
key_value_pairs = {} | ||
if args.define: | ||
for item in args.define: | ||
key, value = item.split('=') | ||
key_value_pairs[key] = value | ||
|
||
# Run preprocessor | ||
preprocessor = DataPreProcessor() | ||
if args.no_environment: | ||
preprocessor.replacements.clear() | ||
preprocessor.add_replacements_map(key_value_pairs) | ||
preprocessor.process_yaml(args.input_file, args.output_file) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import pytest | ||
from ruamel.yaml import YAML | ||
|
||
from ..datapreprocessor import ( | ||
DataPreProcessor) | ||
|
||
|
||
@pytest.fixture | ||
def yaml(): | ||
return YAML(typ='safe', pure=True) | ||
|
||
|
||
def test_main_0(tmp_path, yaml): | ||
"""Test main, basic.""" | ||
yaml_0 = """ | ||
DIRECT_INCLUDE=$FILE_PATH/aux.yaml | ||
data: | ||
brain: *banana | ||
tel: *groups | ||
""" | ||
yaml_1 = """ | ||
DIRECT_INCLUDE=${FILE_PATH}/aux.yaml | ||
data: | ||
brain: *banana | ||
tel: *groups | ||
""" | ||
yaml_2 = """ | ||
_: | ||
- &banana 1 | ||
- &groups [4, 5, 6] | ||
""" | ||
reference = """ | ||
_: | ||
- &banana 1 | ||
- &groups [4, 5, 6] | ||
data: | ||
brain: *banana | ||
tel: *groups | ||
""" | ||
infilename = tmp_path / 'in_0.yaml' | ||
with infilename.open('w') as infile: | ||
infile.write(yaml_0) | ||
|
||
infilename = tmp_path / 'in_1.yaml' | ||
with infilename.open('w') as infile: | ||
infile.write(yaml_1) | ||
|
||
auxfilename = tmp_path / 'aux.yaml' | ||
with auxfilename.open('w') as auxfile: | ||
auxfile.write(yaml_2) | ||
|
||
# Run preprocessor | ||
preprocessor = DataPreProcessor() | ||
keymap = {"FILE_PATH": str(tmp_path)} | ||
preprocessor.add_replacements_map(keymap) | ||
# Setup reference | ||
ref_yaml = yaml.load(reference) | ||
# Test first style input | ||
outfilename0 = tmp_path / 'test_0.yaml' | ||
preprocessor.process_yaml(tmp_path / 'in_0.yaml', outfilename0) | ||
assert yaml.load(outfilename0.open()) == ref_yaml | ||
# Test second style input | ||
outfilename1 = tmp_path / 'test_1.yaml' | ||
preprocessor.process_yaml(tmp_path / 'in_1.yaml', outfilename1) | ||
assert yaml.load(outfilename1.open()) == ref_yaml |