Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TIMX 404 - Establish feature flag branching for parquet work #211

Merged
merged 3 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ help: ## Print this message
### Dependency commands ###
install: ## Install script and dependencies
pipenv install --dev
pipenv run pre-commit install

update: install ## Update all Python dependencies
pipenv clean
Expand Down
8 changes: 5 additions & 3 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@ beautifulsoup4 = "*"
click = "*"
jsonlines = "*"
lxml = "*"
lxml-stubs = "*"
pyarrow = "*"
python-dateutil = "*"
sentry-sdk = "*"
smart-open = {version = "*", extras = ["s3"]}
types-python-dateutil = "*"
lxml-stubs = "*"

[dev-packages]
black = "*"
coveralls = "*"
mypy = "*"
pytest = "*"
ipython = "*"
mypy = "*"
pre-commit = "*"
pyarrow-stubs = "*"
pytest = "*"
ruff = "*"
safety = "*"

Expand Down
712 changes: 399 additions & 313 deletions Pipfile.lock

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ STATUS_UPDATE_INTERVAL=### The transform process logs the # of records transform
WORKSPACE=### Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform.
```

### Optional

```shell
ETL_VERSION=### Version number of the TIMDEX ETL infrastructure. This can be used to align application behavior with the requirements of other applications in the TIMDEX ETL pipeline.
```

## CLI commands

### `transform`
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ select = ["ALL", "PT"]

ignore = [
# default
"ANN101",
"ANN102",
Comment on lines -26 to -27
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ruff was updated to v0.8, and this warning came up:

warning: The following rules have been removed and ignoring them has no effect:
    - ANN101
    - ANN102

These were removed, and no issue.

"COM812",
"COM812",
"D107",
"N812",
"PTH",
Expand All @@ -44,7 +42,7 @@ ignore = [
"PLR0913",
"PLR0915",
"S320",
"S321",
"S321",
]

# allow autofix behavior for specified rules
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
@pytest.fixture(autouse=True)
def _test_env(monkeypatch):
monkeypatch.setenv("WORKSPACE", "test")
# NOTE: FEATURE FLAG: remove after v2 work is complete
monkeypatch.setenv("ETL_VERSION", "1")


@pytest.fixture(autouse=True, scope="session")
Expand Down
92 changes: 92 additions & 0 deletions tests/test_temporary_feature_flagging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# ruff: noqa: PLR2004

from unittest.mock import patch

import pytest

from transmogrifier.cli import main
from transmogrifier.config import get_etl_version

# NOTE: FEATURE FLAG: this test file can be removed completely after v2 parquet work.


@pytest.fixture
def mocked_v1_write_method():
with patch(
"transmogrifier.cli.Transformer.transform_and_write_output_files"
) as mocked_method:
yield mocked_method


@pytest.fixture
def mocked_v2_write_method():
with patch(
"transmogrifier.cli.Transformer.write_to_parquet_dataset"
) as mocked_method:
yield mocked_method


def test_etl_version_helper_function_no_env_var_is_v1(monkeypatch):
monkeypatch.delenv("ETL_VERSION")
assert get_etl_version() == 1


def test_etl_version_helper_function_env_var_is_1_is_v1(monkeypatch):
monkeypatch.setenv("ETL_VERSION", "1")
assert get_etl_version() == 1


def test_etl_version_helper_function_env_var_is_2_is_v2(monkeypatch):
monkeypatch.setenv("ETL_VERSION", "2")
assert get_etl_version() == 2


@pytest.mark.parametrize(
"env_value",
[
"pumpkin_pie", # throws ValueError because not integer
"3", # throws ValueError because not 1 or 2
],
)
def test_etl_version_helper_function_env_var_value_is_unsupported(env_value, monkeypatch):
monkeypatch.setenv("ETL_VERSION", env_value)
with pytest.raises(ValueError): # noqa: PT011
get_etl_version()


def test_cli_etl_version_v1_invokes_v1_code(
mocked_v1_write_method, monkeypatch, runner, tmp_path
):
monkeypatch.setenv("ETL_VERSION", "1")
mocked_v1_write_method.side_effect = Exception("Do not proceed")
runner.invoke(
main,
[
"-i",
"/does/not/exist/alma-2023-01-13-full-extracted-records-to-index_01.xml",
"-o",
"/does/not/exist/libguides.json",
"-s",
"libguides",
],
)
mocked_v1_write_method.assert_called()


def test_cli_etl_version_v2_invokes_v2_code(
mocked_v2_write_method, monkeypatch, runner, tmp_path
):
monkeypatch.setenv("ETL_VERSION", "2")
mocked_v2_write_method.side_effect = Exception("Do not proceed")
runner.invoke(
main,
[
"-i",
"/does/not/exist/alma-2023-01-13-full-extracted-records-to-index_01.xml",
"-o",
"/does/not/exist/dataset",
"-s",
"libguides",
],
)
mocked_v2_write_method.assert_called()
18 changes: 16 additions & 2 deletions transmogrifier/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

import click

from transmogrifier.config import SOURCES, configure_logger, configure_sentry
from transmogrifier.config import (
SOURCES,
configure_logger,
configure_sentry,
get_etl_version,
)
from transmogrifier.sources.transformer import Transformer

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,7 +60,15 @@ def main(
logger.info("Running transform for source %s", source)

transformer = Transformer.load(source, input_file, run_id=run_id)
transformer.transform_and_write_output_files(output_file)

# NOTE: FEATURE FLAG: branching logic will be removed after v2 work is complete
etl_version = get_etl_version()
match etl_version:
case 1:
transformer.transform_and_write_output_files(output_file)
case 2:
transformer.write_to_parquet_dataset(output_file)

logger.info(
(
"Completed transform, total records processed: %d, "
Expand All @@ -68,6 +81,7 @@ def main(
transformer.skipped_record_count,
len(transformer.deleted_records),
)

elapsed_time = perf_counter() - start_time
logger.info(
"Total time to complete transform: %s", str(timedelta(seconds=elapsed_time))
Expand Down
9 changes: 9 additions & 0 deletions transmogrifier/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,12 @@ def load_external_config(
else:
message = f"Unrecognized file_type parameter: {file_type}"
raise ValueError(message)


# NOTE: FEATURE FLAG: function will be removed after v2 work is complete
def get_etl_version() -> Literal[1, 2]:
etl_version = int(os.environ.get("ETL_VERSION", "1"))
if etl_version not in [1, 2]:
message = f"ETL_VERSION '{etl_version}' not supported"
raise ValueError(message)
return etl_version # type: ignore[return-value]
Loading