diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index f67f869d15..e1130dd4ee 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -12,10 +12,10 @@ jobs: # Needed to post comments and issues issues: write steps: - - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 with: submodules: recursive - - uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d + - uses: actions/setup-node@8f152de45cc393bb48ce5d89d36b731f54556e65 with: node-version-file: "build/.nvmrc" cache: "npm" @@ -26,7 +26,7 @@ jobs: working-directory: build - name: Create an issue or comment if bad links are detected if: failure() - uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea with: script: | // Read the markdown linkcheck report diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c4628fee2a..a4f844acc0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,7 +8,7 @@ jobs: name: Package Release runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 with: submodules: recursive - uses: actions/setup-java@v3 diff --git a/.github/workflows/status.yml b/.github/workflows/status.yml index 31ac78ebca..cf461b828a 100644 --- a/.github/workflows/status.yml +++ b/.github/workflows/status.yml @@ -18,14 +18,14 @@ jobs: name: Status Checks runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 with: submodules: recursive - uses: actions/setup-java@v3 with: distribution: "temurin" java-version: "17" - - uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d + - uses: actions/setup-node@8f152de45cc393bb48ce5d89d36b731f54556e65 with: node-version-file: "build/.nvmrc" cache: "npm" diff --git a/build/metaschema-xslt b/build/metaschema-xslt index bd4359a035..7d9fbfa84e 160000 --- a/build/metaschema-xslt +++ b/build/metaschema-xslt @@ -1 +1 @@ -Subproject commit bd4359a0354d3a9452633a8ed915ec9e915d5431 +Subproject commit 7d9fbfa84e78e4ba4dd950ad39c65738b7b66697 diff --git a/build/pom.xml b/build/pom.xml index 2f04d26516..3532b87e4d 100644 --- a/build/pom.xml +++ b/build/pom.xml @@ -48,7 +48,7 @@ org.apache.maven.plugins maven-dependency-plugin - 3.6.0 + 3.6.1 copy-dependencies diff --git a/decisions/0007-implementation-agnostic-tests.md b/decisions/0007-implementation-agnostic-tests.md new file mode 100644 index 0000000000..c56793b4bd --- /dev/null +++ b/decisions/0007-implementation-agnostic-tests.md @@ -0,0 +1,80 @@ +# Implementation-agnostic Testing and Test Harness + +Date: 10/06/2023 + +## Status + +Proposed + +## Context + +In order to support the development of OSCAL tooling, it was decided prototype a unified tool responsible for validating OSCAL implementations against specification requirements. + +Currently, only profile resolution has been [formalized into a draft specification](../src/specifications/profile-resolution/profile-resolution-specml.xml). + +### Existing Infrastructure + +The profile resolver specification currently leverages an in-house XML format known as SpecML, which breaks down a specification into a collection of **sections**, which contain in turn a collection of **requirements**. +Each `
` and `` has a unique `@id` attribute. + +The sections and requirements are mirrored in the XSLT implementation's profile resolution unit tests. +Although crucial to the XSLT implementation, these tests are not portable and it would not be simple to use the tests in their current state to validate other implementations. + +### Specification Tests + +Some specifications such as [CommonMark](https://commonmark.org/) include a [test suite and testing harness](https://github.com/commonmark/commonmark-spec/tree/master/test) to make it possible for implementors to "score" their implementation's conformance to the specification. + +## Decision + +### SpecML + +The specification format will remain unchanged for now. +There is an argument for the format to be replaced or simplified in the future, but the use of `@id` attributes for sections and requirements make linking a test to a example simple. + +### Test Suite Data Format + +The test suite will be described using a JSON file with a simple data format. + +This file will contain a collection of objects that map to a given spec requirement via `section_id` and `requirement_id` fields. +These objects will further contain a collection of "scenario" objects, each of which containing a `description`, `source_profile_path`, `expected_catalog_path`, and a collection of `selection_expressions`. + +For a given scenario, a test runner would be expected to perform profile resolution with the `source_profile_path` and compare selections of the resulting document with the `expected_catalog_path`. +The `selection_expressions` are XPath expressions, though the [test harness](#test-harness) may further constrain the XPath expression's capabilities. + +Here is an example test suite made up of one requirement: + +```json +[ + { + "section_id": "import", + "requirement_id": "req-uri-resolve", + "scenarios": [ + { + "description": "Check that group and control titles match, signalling that URIs have been resolved", + "source_profile_path": "requirement-tests/req-include-all-asis.xml", + "expected_catalog_path": "requirement-tests/output-expected/req-include-all-asis_RESOLVED.xml", + "selection_expressions": [ + "./oscal:group/oscal:title", + "./oscal:group/oscal:control/oscal:title" + ] + } + ] + } +] +``` + +The development of a JSON schema for this format is left as future work. + +### Test Harness + +A prototype testing harness has been developed, with the capability to report a given profile resolver's compliance to a specification given a [test suite JSON file](#test-suite-data-format). + +The prototype harness is built to be as simple as possible, avoiding external libraries. +Python's native XPath capabilities are limited, further constraining the capabilities of the test suite. + +## Consequences + +Writing specification tests for profile resolution will require significant resources, but will make profile resolution more approachable for implementors and will make changes to the specification more maintainable. + +Due to the "requirement based" approach of the specification test suite, new tests can be added gradually. +Test coverage can be determined by determining which requirements do not have tests. diff --git a/decisions/0008-oscal-content-management.md b/decisions/0008-oscal-content-management.md new file mode 100644 index 0000000000..25bd12c95e --- /dev/null +++ b/decisions/0008-oscal-content-management.md @@ -0,0 +1,65 @@ +# OSCAL Content Data Governance and Release Management + +Date: 11/09/2023 + +## Status + +Approved + +## Context + +Since 2016, the OSCAL project has iterated on methods and locations for managing example content and published catalogs. It is time that we decide whether to continue as-is or make a meaningful change to how project's OSCAL content is developed, published, and maintained. + +### Key Takeaways + +1. Almost all changes to OSCAL representations of the published SP 800-53A and 800-53B catalogs do not diverge from the official publication. Clear data management and governance guidelines are needed to identify which changes are acceptable OSCAL Team leadership to approve for release, and which specific changes need review and approval by the Security Engineering and Risk Management maintainers of the official SP 800-53 content. +1. By the nature of OSCAL models and relationships of document instances, the team must continue to manage published catalogs and examples that cite them together. +1. It is important that final integration testing of all OSCAL content occur with the latest pending release of OSCAL as a final integration test, even if the content is backwards compatible with an older minor or patch release for the same version. + +### Background + +[In September 2020](https://github.com/usnistgov/OSCAL/commit/01c0aa9b45667b25e8105160119da011471c77cb), the NIST OSCAL Team migrated SP 800-53 Revision 4, SP 800-53 Revision 5, and example content from [the core OSCAL repository](https://github.com/usnistgov/OSCAL) to the [new oscal-content repository](https://github.com/usnistgov/oscal-content). Presumably, this migration allowed the development team to manage published catalog content, and to a lesser extent examples, in a more flexible way, independent from the established release process and practices for the core OSCAL models, schemas, and supporting tooling. (However, even by that time examples reflected the real-world cross-document relationships OSCAL models support. Examples inherently reference adjacent published catalogs of the NIST SP 800-53B controls.) The NIST OSCAL Team coordinated with the [Security Engineering and Risk Management Team](https://csrc.nist.gov/Groups/Computer-Security-Division/Security-Engineering-and-Risk-Management), maintainers of NIST SP 800-53A and SP 800-53B, to publish representations of the assessment methods and respective controls in conjunction with their official publication (in PDF and alternate formats). Team members employed semi-automatic techniques for content-generation and data quality checks to coordinate finalized release of the 5.0 and 5.1 versions of the official documents with the OSCAL representation (e.g. releasing to the `main` branch in the GitHub repository). Their publication schedule is more infrequent than the OSCAL development cycle. This is an important takeaway that lead to data governance, testing, and release challenges. + +### Data Management and Governance Challenges + +As enhancements and bug fixes for OSCAL increased, separate of the content, in between official upstream releases of SP 800-53A and SP 800-53B, staff and community members [identified bugs and enhancements to the OSCAL representations](https://github.com/usnistgov/oscal-content/issues). In most cases, these work items would not or did not diverge from the content in the official publication version. These data quality and OSCAL-specific enhancements would or did improve the ability of technical staff using the OSCAL representation to create or improve catalog and profile automation. There has been no clear guidance on how to accept these changes, publish them, and how to identify their versions upon release. These governance questions led to an accumulation of work items that delayed publication (at this time, read: merged into the `main` branch). + +### OSCAL Dependency Upgrades, Integration, and Regression Test Challenges + +Due to infrequent publications of the catalogs, managed together with examples, the OSCAL submodule to provide models, generated schema, and tooling support. At the time of writing this ADR draft, [the oscal-content main branch at `a53f261`](https://github.com/usnistgov/oscal-content/tree/a53f261a946c52811c507deb4d8385d9e4794a6f) uses a version of the OSCAL models and tooling that is ostensibly from December 2022, [`51d5de2`](https://github.com/usnistgov/OSCAL/commit/51d5de22c181477e3f9cf08789c4399fff013f14), a stable commit between v1.0.4 and v1.0.5. Several attempts to smoothly upgrade this with subsequent releases of OSCAL models and supporting tooling were rolled back or never completed. Automated content conversion and schema validation failed. The team confirmed bugs in dependencies to OSCAL. Fixing these issues required months of development work. Below is a non-exhaustive list with two examples. + +- [usnistgov/metaschema#235](https://github.com/usnistgov/metaschema/issues/235) +- [usnistgov/metaschema#240](https://github.com/usnistgov/metaschema/issues/240) + +These bugs, and those like them, impacted conversion and validation of the examples, the published catalogs, or in some cases both. So in all cases, they stopped final publication into the oscal-content `main` branch, even as new OSCAL models were released. Specifically, fixing issues in an implementation of the Metaschema Information Modeling Framework used by OSCAL for schema generation, validation, and conversion need to not only be tested in their upstream projects, but then frequently regression tested across models with complex content present in the oscal-content repo. This manual follow-on work was a necessity to test all edge cases. It was exacerbated by the lack of frequent releases, or such problems would be caught sooner and fixed more frequently. This is a key takeaway that Metaschema and OSCAL Team's developers acknowledge, but not yet put into practice. This last line of defense is important to minimizing toil for the team. + +## Decision + +Moving forward, the team must commit to the following. + +1. A data management and governance procedure will be added [to the OSCAL Team Wiki](https://github.com/usnistgov/OSCAL/wiki/NIST-SP-800%E2%80%9053-OSCAL-Content-Data-Governance). +1. The oscal-content repository will move to a `Makefile`-based approach for [usnistgov/oscal-content#116](https://github.com/usnistgov/oscal-content/issues/116) when [usnistgov/oscal-content#204](https://github.com/usnistgov/oscal-content/pull/204) is merged to match the same approach for the core repository enacted in [ADR 5](./0005-repository-reorganization.md). For consistency and simplicity of this new workflow, all examples, profiles, and catalogs will be developed in the [src directory](https://github.com/usnistgov/oscal-content/tree/7a079afed39b1a36a091c8d4ac939d096d42c76b/src) in OSCAL XML format only and converted later. This approach will simplify the architecture and improve efficiency of development cycles. +1. Every OSCAL model release must coincide with an oscal-content release. At a minimum, even if examples or catalogs to be published do not change any content, the team must do the following. + - Update the OSCAL submodule to the latest tagged release. + - All source catalogs and profiles must have their `oscal-version` and `version` incremented. Their `last-modified` and `published` timestamps must be updated, even if the updated content in that release is backwards compatible with previous major, minor, and/or patch versions. + - All `xml-model` instructions at the top of every example, profile, and catalog instance must be updated to the complete OSCAL XML schema artifact for the release that matches the `oscal-version`. +1. The team will tag the commit with generated artifacts and mimic [the core repository's versioning, branching, and release guidelines](https://github.com/usnistgov/OSCAL/blob/f159b28948cb0034370fb819a45bfdaeaef5192a/versioning-and-branching.md), following [SemVer requirements](https://semver.org/). +1. Releases of content will be created alongside the core OSCAL repository. +1. In ADR 5, the team cited risk with the ongoing use of auto-commit automation with GitHub Actions for core OSCAL models and generated artifacts. To evaluate the best option and allow time for coordination with the community, the team will continue with auto-committing content to `main` as a publication mechanism only for the near-term future. The team will revisit this decision and potentially propose an alternative method that is more suitable in a subsequent spike and approved ADR. +1. OSCAL Team leadership will review resources and the feasibility of ongoing maintenance of the catalogs and alternative courses of action for long-term publication of NIST SP 800-53 Revision 5 catalogs. + +## Consequences + +Below are the consequences of the different approaches. + +### Do Nothing + +In the short-term, doing nothing would mean to stop publication of the content immediately. This solution would be detrimental to the community without effective analysis for alternative courses of action and approaches for usage of existing content. + +### Change Nothing + +If the team continues as-is by publishing content to `main` after bugs and build tooling improvements are complete, the challenges above will still sustain unnecessary risk without changes to process and tooling to support the team and its goals. Development of example content, not just publication of catalogs, will stall due to edge cases and accumulated changes in tooling that lead to many minor changes in content that must be reviewed and analyzed. + +### Clarify Governance and Require Upgrades for Testing + +Clear governance and frequent updates will require more periodic work for the NIST OSCAL Team, but ensure the challenges above will be less frequent and less significant. diff --git a/decisions/0009-tutorials-system-lifecycle.md b/decisions/0009-tutorials-system-lifecycle.md new file mode 100644 index 0000000000..e881617f9c --- /dev/null +++ b/decisions/0009-tutorials-system-lifecycle.md @@ -0,0 +1,46 @@ +# : Design simplified system lifecycle for example system in tutorials + +Date: 10/31/2023 + +## Status + +Proposed + +## Context + +We wish to reduce friction encountered by community members learning security automation with OSCAL through tutorials produced by the OSCAL team. +A series of OSCAL security automation tutorials would need to be centered around and driven by some system lifecycle, such as the implied lifecycle in NIST 800-37 Risk Management Framework or ISO/IEC 27005. +However, adopting a complex real-world lifecycle in the tutorials would have several disadvantages: + +- Complex system lifecycles add overhead that may not be relevant to the tutorial at hand. +- Endorsing a particular lifecycle may incorrectly signal to the reader that OSCAL can only be used with that lifecycle. +- The use of a real-world lifecycle could invite disagreement over the particulars of the lifecycle that are not relevant to the tutorials. + +Summarized, the lifecycle should serve the tutorials and not the other way around. + +*Note: this ADR was created as part of a work item for [OSCAL#1893](https://github.com/usnistgov/OSCAL/issues/1893).* + +## Decision + +The NIST OSCAL team should use a simplified lifecycle in its tutorials. +The lifecycle will focus on the security automation. + +This document will only contain minimally-viable details of the lifecycle. + +### Proposed Lifecycle + +The proposed lifcycle will be evocative of a stripped-down RMF or ISO 27005 SDLC, discarding and simplifying steps that are not immediately relevant to a tutorial. + +The individual tutorials may include asides on how a given process maps to other processes such as RMF. + +|RISK MGMT | Select | Implement | Assess | +| --- | --- | --- | --- | +| DEVELOPMENT | Design | Develop | Test | + +The proposed lifecycle collapses "prepare", "categorize", and "select" into ***design***, renames "implement" into ***develop***, collapses "assess" and "authorize" into ***test***, and removes "monitor". + +The steps of the RMF are all important and deserve individual consideration, but are not the subject of the tutorials. + +## Consequences + +This decision will affect tutorials written in the future, particularly tutorials surrounding the fictional [example system](https://github.com/usnistgov/OSCAL/issues/1892). diff --git a/src/specifications/profile-resolution/metaschema-datatypes.xsd b/src/specifications/profile-resolution/metaschema-datatypes.xsd deleted file mode 100644 index a1f8e099ae..0000000000 --- a/src/specifications/profile-resolution/metaschema-datatypes.xsd +++ /dev/null @@ -1,241 +0,0 @@ - - - - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - - - - - - - The xs:date with a required timezone. - - - - - - - - - - - - - - - The xs:dateTime with a required timezone. - - - - - - - - - - - - - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - An email address - - - - - Need a better pattern. - - - - - - - - A host name - - - - - - - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - The ip-v4-address type specifies an IPv4 address in - dot decimal notation. - - - - - - - - - The ip-v6-address type specifies an IPv6 address - represented in 8 hextets separated by colons. - This is based on the pattern provided here: - https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses - with some customizations. - - - - - - - - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - A string, but not empty and not whitespace-only - (whitespace is U+9, U+10, U+32 or [ \n\t]+ ) - - - - The OSCAL 'string' datatype restricts the XSD type by prohibiting leading - and trailing whitespace, and something (not only whitespace) is required. - - - - - A trimmed string, at least one character with no - leading or trailing whitespace. - - - - - - - - - A string token following the rules of XML "no - colon" names, with no whitespace. (XML names are single alphabetic - characters followed by alphanumeric characters, periods, underscores or dashes.) - - - - - - - - A single token may not contain whitespace. - - - - - - - - - A URI - - - - - Requires a scheme with colon per RFC 3986. - - - - - - - - A URI reference, such as a relative URL - - - - - - A trimmed URI, at least one character with no - leading or trailing whitespace. - - - - - - - - A type 4 ('random' or 'pseudorandom') or type 5 UUID per RFC - 4122. - - - - - A sequence of 8-4-4-4-12 hex digits, with extra - constraints in the 13th and 17-18th places for version 4 and 5 - - - - - - - diff --git a/src/specifications/profile-resolution/profile-resolution-specml-requirements.xspec b/src/specifications/profile-resolution/profile-resolution-specml-requirements.xspec index 379efd8c03..af0474aaa3 100644 --- a/src/specifications/profile-resolution/profile-resolution-specml-requirements.xspec +++ b/src/specifications/profile-resolution/profile-resolution-specml-requirements.xspec @@ -132,7 +132,7 @@ + pending="chained profiles"> Multiple imports

Each import directive is processed to produce a set of controls. Note that this occurs even if the same catalog is imported + id="rq-multiple-imports" level="must">Note that this occurs even if the same catalog is imported multiple times: each distinct import collects controls into a separate selection:

@@ -445,7 +445,7 @@ intermediate: - ac-3 - ac-4 -

The control inclusions are combined and collapsed in the next +

The control inclusions are combined and collapsed in the next phase of processing, merge(see ) .

Multiple imports against the same resource are allowed, and would most commonly occur when the profile author is using to create very specific output. Multiple imports may result in outputs with clashing control IDs if mapping or the merge directive is not set correctly.

diff --git a/src/specifications/profile-resolution/profile-resolution-unit-tests.xml b/src/specifications/profile-resolution/profile-resolution-unit-tests.xml index e8161ff96c..8ccf9b4e5c 100644 --- a/src/specifications/profile-resolution/profile-resolution-unit-tests.xml +++ b/src/specifications/profile-resolution/profile-resolution-unit-tests.xml @@ -87,7 +87,7 @@ When a profile imports a profile, the subordinate profile SHOULD be resolved first into a catalog using this specification, before it is imported. - Note that this occurs even if the same catalog is imported + Note that this occurs even if the same catalog is imported multiple times: each distinct import collects controls into a separate selection The control inclusions are combined and collapsed in the next diff --git a/src/specifications/profile-resolution/resolution-testing.xml b/src/specifications/profile-resolution/resolution-testing.xml index 72a7013638..2f67d31ac5 100644 --- a/src/specifications/profile-resolution/resolution-testing.xml +++ b/src/specifications/profile-resolution/resolution-testing.xml @@ -67,7 +67,7 @@ If a processor encounters a circular import as described above (self-imports are inherently circular), the processor MUST cease processing and generate an error. PENDING circular import detection
- + Note that this occurs even if the same catalog is imported multiple times: each distinct import collects controls into a separate selection PENDING chained profiles diff --git a/src/specifications/profile-resolution/spec-tester.py b/src/specifications/profile-resolution/spec-tester.py new file mode 100755 index 0000000000..7e9c2616c8 --- /dev/null +++ b/src/specifications/profile-resolution/spec-tester.py @@ -0,0 +1,472 @@ +#!/usr/bin/env python3 + +""" +A simple CLI application that tests profile resolver implementations against the adjacent +specification. + +Caveats: +- XPath functionality will depend on the version of Python being used (newer is better). +- On some versions of Python, absolute selections (/root/item) are broken and will result in a + warning, use relative selections instead (./item). +- Comparisons of multiple elements are not "smart". Unlike the OSCAL Deep Diff, this tool does not + attempt to match items together. Selections should be written with this in mind (e.g. select a + specific oscal:param instead of comparing all of them when order is not explicitly specified). + +Future Improvements: +- TODO: Cache results of profile resolution in Driver class for commonly re-used sources +- TODO: Make failure condition more granular (e.g. add parameter to prevent failure on "should" levels) +""" + +import argparse +import sys +import os.path +import subprocess +import tempfile +import shutil +import json +import logging +import time +from itertools import zip_longest +from xml.etree import ElementTree as ET + +from typing import TypedDict, List, Dict, Set, Tuple, Optional + + +class Colors: + """ + ANSI color codes + + Via https://gist.github.com/rene-d/9e584a7dd2935d0f461904b9f2950007 + """ + BLACK = "\033[0;30m" + RED = "\033[0;31m" + GREEN = "\033[0;32m" + BROWN = "\033[0;33m" + BLUE = "\033[0;34m" + PURPLE = "\033[0;35m" + CYAN = "\033[0;36m" + LIGHT_GRAY = "\033[0;37m" + DARK_GRAY = "\033[1;30m" + LIGHT_RED = "\033[1;31m" + LIGHT_GREEN = "\033[1;32m" + YELLOW = "\033[1;33m" + LIGHT_BLUE = "\033[1;34m" + LIGHT_PURPLE = "\033[1;35m" + LIGHT_CYAN = "\033[1;36m" + LIGHT_WHITE = "\033[1;37m" + BOLD = "\033[1m" + FAINT = "\033[2m" + ITALIC = "\033[3m" + UNDERLINE = "\033[4m" + BLINK = "\033[5m" + NEGATIVE = "\033[7m" + CROSSED = "\033[9m" + END = "\033[0m" + # cancel SGR codes if we don't write to a terminal + if not __import__("sys").stdout.isatty(): + for _ in dir(): + if isinstance(_, str) and _[0] != "_": + locals()[_] = "" + else: + # set Windows console in VT mode + if __import__("platform").system() == "Windows": + kernel32 = __import__("ctypes").windll.kernel32 + kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7) + del kernel32 + + +class TestScenario(TypedDict): + """A source profile along with the expected resulting profile and match expressions""" + description: str + source_profile_path: str + expected_catalog_path: str + selection_expressions: List[str] + + +class TestRequirement(TypedDict): + """A single requirement composed of multiple test scenarios""" + section_id: str + requirement_id: str + scenarios: List[TestScenario] + + +DRIVER_SOURCE_TOKEN = "{src}" +DRIVER_DESTINATION_TOKEN = "{dest}" + + +INDENT_TEXT = " " + + +class Driver(object): + """Handles running the profile resolver given a source file and destination path""" + + def __init__(self, command: str, workdir: Optional[str] = None, + logger: Optional[logging.Logger] = None) -> None: + """ + Note: Creates a temporary directory as a side effect, consumer must call .cleanup() to remove + """ + if not DRIVER_SOURCE_TOKEN in command: + raise Exception( + f"Command `{command}` does not contain source token '{DRIVER_SOURCE_TOKEN}'") + if not DRIVER_DESTINATION_TOKEN in command: + raise Exception( + f"Command `{command}` does not contain source token '{DRIVER_DESTINATION_TOKEN}'") + + self.logger = logger if logger is not None else logging.getLogger( + __name__) + self.command = command + self.workdir = workdir + self.out_directory = tempfile.mkdtemp("oscal-pr-test-out") + + self.logger.debug( + f"Created temporary output directory '{self.out_directory}'") + + def run(self, src_path, indent=0) -> ET.ElementTree: + """ + Run the command specified by `self.command`, substituting `DRIVER_SOURCE_TOKEN` and + `DRIVER_DESTINATION_TOKEN` with `src_path` and a generated output path respectively. + + Note: Places output files in a temporary directory, consumer must call .cleanup() to remove + """ + src_name = os.path.basename(src_path) + # some-profile.xml => some-profile_RESOLVED_$TIMESTAMP.xml + dest_name = os.path.splitext( + src_name)[0] + f"_RESOLVED_{time.strftime('%Y%m%d-%H%M%S')}.xml" + dest_path = os.path.join(self.out_directory, dest_name) + + command = self.command\ + .replace(DRIVER_SOURCE_TOKEN, f"'{src_path}'")\ + .replace(DRIVER_DESTINATION_TOKEN, f"'{dest_path}'") + + self.logger.debug(f"{INDENT_TEXT*indent}Running command `{command}`") + + # Notice: this code does not protect against shell injection of any kind, + # `self.command` and `src_path` must be trusted. + ret = subprocess.run(command, shell=True, + capture_output=True, cwd=self.workdir) + # TODO handle command failure + + if ret.returncode != 0: + raise Exception( + f"Process returned non-zero exit code, stderr:\n\n{ret.stderr}") + + return ET.parse(dest_path) + + def cleanup(self): + """Delete temporary directory""" + self.logger.debug( + f"Removing temporary output directory '{self.out_directory}'") + shutil.rmtree(self.out_directory) + + +def compare_elements(e1: Optional[ET.ElementTree], e2: Optional[ET.ElementTree], path=".", + e1Name="left", e2Name="right") -> Tuple[bool, List[str]]: + """ + Compare two element trees returning if they are the same, and a list of changes in the form of + XPath-like selections. + + Warning: This comparison function will likely fail on mixed content (e.g. markup) and in cases + where the order of child elements is different. + + Note: comments added to some difference paths using XPath 2.0 (: comment syntax :) + """ + + differences: List[str] = [] + + if e1 is None: + differences.append( + f"{path}/ (: tag mismatch: {e1Name}=None {e2Name}='{e2.tag}' :)") + elif e2 is None: + differences.append( + f"{path}/ (: tag mismatch: {e1Name}='{e1.tag}' {e2Name}=None :)") + else: + if e1.tag != e2.tag: + # Fail early if tags are mismatched, no point in comparing tag contents + differences.append( + f"{path}/ (: tag mismatch: {e1Name}='{e1.tag}', {e2Name}='{e2.tag}' :)") + else: + e1Text = (e1.text if e1.text is not None else "").strip() + e2Text = (e2.text if e2.text is not None else "").strip() + + # TODO compare on mixed content? + if e1Text != e2Text: + differences.append(path + "/text()") + + e1AttribSet = set(e1.attrib.keys()) + e2AttribSet = set(e2.attrib.keys()) + + for key in e1AttribSet.intersection(e2AttribSet): + if e1.attrib[key] != e2.attrib[key]: + # Attribute value mismatch + differences.append( + f"{path}/@{key} (: attribute value mismatch: {e1Name}='{e1.attrib[key]}', {e2Name}='{e2.attrib[key]}' :)") + + # Attribute not present in one or the other + for key in e1AttribSet.difference(e2AttribSet): + differences.append( + f"{path}/@{key} (: attribute value mismatch: {e1Name}='{e1.attrib[key]}', {e2Name}=None :") + for key in e2AttribSet.difference(e1AttribSet): + differences.append( + f"{path}/@{key} (: attribute value mismatch: in {e1Name}=None, {e2Name}='{e2.attrib[key]}' :") + + for i, (c1, c2) in enumerate(zip_longest(e1, e2)): + # zip_longest returns None for extra items of the shorter iterator + # XPath starts lists with 1 + _, child_differences = compare_elements( + c1, c2, path=f"{path}/*[{i + 1}]", e1Name=e1Name, e2Name=e2Name) + differences += child_differences + + return len(differences) == 0, differences + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_TESTS_PATH = os.path.join(SCRIPT_DIR, "spec-tests.json") +DEFAULT_SPEC_PATH = os.path.join(SCRIPT_DIR, "profile-resolution-specml.xml") + +QUERY_NS = { + "specml": "http://csrc.nist.gov/ns/oscal/specml", + "oscal": "http://csrc.nist.gov/ns/oscal/1.0" +} + + +class RequirementTests(object): + def __init__(self, spec_path=DEFAULT_SPEC_PATH, tests_path=DEFAULT_TESTS_PATH, + logger: Optional[logging.Logger] = None) -> None: + self.spec_path = spec_path + self.tests_path = tests_path + + self.spec = ET.parse(self.spec_path) + + self.logger = logger if logger is not None else logging.getLogger( + __name__) + + with open(self.tests_path) as tests_file: + tests_json = json.loads(tests_file.read()) + # TODO any sort of input validation, this is currently at best a type hint + self.tests: List[TestRequirement] = tests_json + + # used to resolve files relative to the spec file + self.tests_workdir = os.path.dirname(self.tests_path) + + # K,V of section ids -> section titles + self.section_heads: Dict[str, str] = {} + # K,V of section ids -> requirement id -> requirement level + # TODO parse out requirement text and store alongside level? + self.section_requirements: Dict[str, Dict[str, str]] = {} + + # process spec file + for section in self.spec.findall("specml:section", QUERY_NS): + section_id = section.attrib['id'] + section_head = section.find("specml:head", QUERY_NS).text + + self.section_heads[section_id] = section_head + self.section_requirements[section_id] = {} + + for requirement in section.findall(".//specml:req", QUERY_NS): + requirement_id = requirement.attrib['id'] + requirement_level = requirement.attrib['level'] + + self.section_requirements[section_id][requirement_id] = requirement_level + + def print_coverage(self): + """ + Utility method that prints the test coverage against the spec + """ + covered_tests: Dict[str, Set[str]] = {} + + for test in self.tests: + if test["section_id"] not in covered_tests: + covered_tests[test["section_id"]] = set() + covered_tests[test["section_id"]].add(test["requirement_id"]) + + for section_id, section_head in self.section_heads.items(): + requirements = set(self.section_requirements[section_id].keys()) + tested_requirements = covered_tests.get(section_id, set()) + covered_requirements = tested_requirements.intersection( + requirements) + uncovered_requirements = requirements.difference( + tested_requirements) + unknown_requirements = tested_requirements.difference(requirements) + + section_color = Colors.GREEN + if len(requirements) == 0: + section_color = Colors.DARK_GRAY + elif len(tested_requirements) == 0: + section_color = Colors.RED + elif len(uncovered_requirements) > 0: + section_color = Colors.YELLOW + + # Provide the user with information about extraneous requirements + extra_warning = f"{Colors.RED}+{len(unknown_requirements)}" if len( + unknown_requirements) > 0 else "" + + self.logger.info( + f"{Colors.BOLD}{section_color}{section_head} ({section_id}): {len(covered_requirements)}/{len(requirements)} {extra_warning}{Colors.END}") + + for requirement_id, level in self.section_requirements[section_id].items(): + requirement_color = Colors.GREEN if requirement_id in tested_requirements else Colors.RED + self.logger.info( + f"{INDENT_TEXT}{requirement_color}{section_id}/{requirement_id} - {level}{Colors.END}") + + # Warn the user of extraneous requirements in the section + for requirement_id in unknown_requirements: + self.logger.warning( + f"{INDENT_TEXT}{Colors.YELLOW}Unknown requirement id {requirement_id}{Colors.END}") + + # Warn the user of extraneous sections in the tests + for section_id in set(covered_tests.keys()).difference(set(self.section_heads.keys())): + self.logger.warning( + f"{Colors.YELLOW}Unknown section id {section_id} containing {len(covered_tests[section_id])} requirements{Colors.END}") + + def run(self, command, do_cleanup=True) -> bool: + driver = Driver(command, self.tests_workdir, logger=self.logger) + + suite_pass = True + + try: + for test in self.tests: + test_info = f"requirement({test['section_id']}/{test['requirement_id']})" + self.logger.info(f"{Colors.BOLD}{test_info}{Colors.END}") + if self._run_test(driver, test, indent=1): + self.logger.info( + f"{Colors.BOLD}{Colors.GREEN}{test_info}... PASS{Colors.END}") + else: + self.logger.error( + f"{Colors.BOLD}{Colors.RED}{test_info}... FAIL{Colors.END}") + suite_pass = False + finally: + if do_cleanup: + driver.cleanup() + + if suite_pass: + self.logger.info( + f"{Colors.GREEN}Spec suite {self.tests_path}... PASS{Colors.END}") + else: + self.logger.error( + f"{Colors.RED}Spec suite {self.tests_path}... FAIL{Colors.END}") + + return suite_pass + + def _run_test(self, driver: Driver, requirement: TestRequirement, indent=0) -> bool: + test_pass = True + + for scenario in requirement["scenarios"]: + scenario_info = f"{INDENT_TEXT * indent}scenario(source='{scenario['source_profile_path']}', expected='{scenario['expected_catalog_path']}')" + + self.logger.info(f"{Colors.BOLD}{scenario_info}{Colors.END}") + + scenario_pass = self._run_test_scenario( + driver, scenario, indent=indent + 1) + + if scenario_pass: + self.logger.info( + f"{Colors.BOLD}{Colors.GREEN}{scenario_info}... PASS{Colors.END}") + else: + # TODO: param to fail if the level is not "must" + self.logger.error( + f"{Colors.BOLD}{Colors.RED}{scenario_info}... FAIL{Colors.END}") + test_pass = False + + return test_pass + + def _run_test_scenario(self, driver: Driver, scenario: TestScenario, indent=0) -> bool: + """ + Runs a given test scenario, returning True if all selection expressions pass + """ + + self.logger.info( + f"{Colors.BLUE}{INDENT_TEXT * indent}Description: {scenario['description']}{Colors.END}") + + # Correct for path relative to spec tests file + expected_path = scenario["expected_catalog_path"] + if not os.path.isabs(expected_path): + expected_path = os.path.join(self.tests_workdir, expected_path) + # TODO user friendly error if catalog path cannot be found + expected = ET.parse(expected_path) + + # Driver already uses the spec tests file's parent dir as the cwd, no path correction needed + result = driver.run(scenario["source_profile_path"], indent=indent + 1) + + # if no selection expressions exist, test still successfully produced an output + scenario_pass = True + for selection_expression in scenario["selection_expressions"]: + result_selection = result.findall(selection_expression, QUERY_NS) + expected_selection = expected.findall( + selection_expression, QUERY_NS) + + for i, (result_elem, expected_elem) in enumerate(zip(result_selection, expected_selection)): + # XPath starts lists with 1 + selection_expression_indexed = f"{selection_expression}{f'[{i + 1}]' if len(result_selection) > 1 or len(expected_selection) > 1 else ''}" + same, differences = compare_elements(result_elem, expected_elem, + # XPath selection used for debugging. Only specify position predicate if necessary + selection_expression_indexed, e1Name="result", e2Name="expected") + if same: + self.logger.debug( + f"{Colors.GREEN}{INDENT_TEXT * (indent + 1)}selection `{selection_expression_indexed}` result matched{Colors.END}") + else: + scenario_pass = False + self.logger.error( + f"{Colors.RED}{INDENT_TEXT * indent}selection `{selection_expression_indexed}` result mismatch:{Colors.END}") + for difference in differences: + # Clean up tags in comments to use namespaces + difference = difference.replace( + f"{{{QUERY_NS['oscal']}}}", "oscal:") + + self.logger.error( + f"{Colors.RED}{INDENT_TEXT * (indent + 1)}{difference}{Colors.END}") + + if len(result_selection) != len(expected_selection): + self.logger.error( + f"{Colors.RED}{INDENT_TEXT * (indent + 1)}selection `{selection_expression}` result size mismatch (result={len(result_selection)}, expected={len(expected_selection)}){Colors.END}") + scenario_pass = False + + return scenario_pass + + +if __name__ == '__main__': + example_text = f"example: spec-tester.py run 'oscal-cli profile resolve --to=XML {DRIVER_SOURCE_TOKEN} {DRIVER_DESTINATION_TOKEN}'" + + parser = argparse.ArgumentParser( + description='OSCAL profile-resolution testing harness', epilog=example_text) + parser.add_argument( + "--tests_path", default=DEFAULT_TESTS_PATH, help="Override the tests file") + parser.add_argument( + "--spec_path", default=DEFAULT_SPEC_PATH, help="Override the spec file") + parser.add_argument("-v", "--verbose", + help="display debug information", action="store_true") + + subparsers = parser.add_subparsers( + required=True, dest="action", description="valid subcommands") + + # "run" subcommand + parser_run = subparsers.add_parser( + 'run', description='Run the spec tests', epilog=example_text) + parser_run.add_argument( + "command", help="The program to call, with the input profile and output path" + f" replaced with {DRIVER_SOURCE_TOKEN} and {DRIVER_DESTINATION_TOKEN} respectively") + + parser.add_argument("-k", "--keep", + help="keep output directory", action="store_true") + # "coverage" subcommand + parser_coverage = subparsers.add_parser( + 'coverage', description='Report the coverage of the given tests file against the spec') + + args = parser.parse_args() + + # truncate log levels for prettier console formatting + logging.addLevelName(logging.DEBUG, 'DEBG') + logging.addLevelName(logging.INFO, 'INFO') + logging.addLevelName(logging.WARNING, 'WARN') + logging.addLevelName(logging.ERROR, 'ERRR') + logging.addLevelName(logging.CRITICAL, 'CRIT') + logging.basicConfig(format='%(levelname)s: %(message)s', + level=logging.DEBUG if args.verbose else logging.INFO) + + harness = RequirementTests(args.spec_path, args.tests_path) + + if args.action == "run": + suite_pass = harness.run(args.command, do_cleanup=not args.keep) + if not suite_pass: + sys.exit(1) + elif args.action == "coverage": + harness.print_coverage() diff --git a/src/specifications/profile-resolution/spec-tests.json b/src/specifications/profile-resolution/spec-tests.json new file mode 100644 index 0000000000..329df9284b --- /dev/null +++ b/src/specifications/profile-resolution/spec-tests.json @@ -0,0 +1,17 @@ +[ + { + "section_id": "import", + "requirement_id": "req-uri-resolve", + "scenarios": [ + { + "description": "Check that group and control titles match, signalling that URIs have been resolved", + "source_profile_path": "requirement-tests/req-include-all-asis.xml", + "expected_catalog_path": "requirement-tests/output-expected/req-include-all-asis_RESOLVED.xml", + "selection_expressions": [ + "./oscal:group/oscal:title", + "./oscal:group/oscal:control/oscal:title" + ] + } + ] + } +] \ No newline at end of file diff --git a/src/specifications/profile-resolution/unit-tests.xsd b/src/specifications/profile-resolution/unit-tests.xsd index c746b134a0..8f80870a64 100644 --- a/src/specifications/profile-resolution/unit-tests.xsd +++ b/src/specifications/profile-resolution/unit-tests.xsd @@ -2,7 +2,7 @@ - +