diff --git a/CHANGELOG.md b/CHANGELOG.md index cdca102c..a03a1c60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 6.19.0 - April 5, 2024 + +* Adds a new CLI tool that can be called to resolve unresolved bill-to-bill relationships in the openstates DB + ## 6.18.5 - March 29, 2024 * other_name in committee matching diff --git a/README.md b/README.md index 520ba5c4..60c69e6d 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,19 @@ See [RELASE.md](./RELEASE.md) ## Debugging openstates-core code +### Commands that do not integrate with openstates-scrapers + +* Install pyenv and correct python version +* Install poetry +* `poetry install` + +Example PyCharm config (for `relationships` CLI command): + +* Interpeter: the poetry env that you just set up +* Module: `openstates.cli.relationships` +* Parameters: `--log_level=DEBUG us` +* Env vars: `DATABASE_URL=postgres://USERNAME:PASSWORD@DB_HOSTNAME:PORT/openstatesorg` + ### Update command / scrapers There are @@ -27,7 +40,7 @@ within `openstates-core`. Here's a recipe using PyCharm to successfully debug the update command: -#### Requirements to run code natively (not in docker) +#### Requirements to run the scrape/update code natively (not in docker) * You need the `gdal` library installed on the host system. For me: `sudo apt install gdal-bin python3-gdal` * `openstates-core` checked out at /home/username/repo/openstates/openstates-core/ @@ -37,7 +50,7 @@ Here's a recipe using PyCharm to successfully debug the update command: * Install required python version using the `pyenv` utility * `pip install poetry` (if that python version doesn't already have it) -#### Debugging natively +#### Debugging the scrape/update code natively * If you have previously installed the `openstates` dependency (eg `openstates-core`), then you need to run `poetry remove openstates` to clear that remotely-installed (from pypi) dependency. Each time you make a round diff --git a/openstates/cli/relationships.py b/openstates/cli/relationships.py new file mode 100644 index 00000000..a495f3eb --- /dev/null +++ b/openstates/cli/relationships.py @@ -0,0 +1,38 @@ +import click +import logging +import logging.config +from openstates.utils import abbr_to_jid +from ..utils.django import init_django +from ..exceptions import InternalError +from .. import settings + + +@click.command(help="Resolve unresolved relationships between entities") +@click.argument("jurisdiction_abbreviation") +@click.option( + "--log_level", + help="Set the level of logging to output.", + default="INFO" +) +def main(jurisdiction_abbreviation: str, log_level: str) -> None: + # set up logging + logger = logging.getLogger("openstates") + handler_level = log_level + settings.LOGGING["handlers"]["default"]["level"] = handler_level # type: ignore + logging.config.dictConfig(settings.LOGGING) + + # set up django for DB access + # has to be done before any importer can be imported (?) + init_django() + from openstates.importers import resolve_related_bills + + logger.info(f"Beginning resolution of bill relationships for {jurisdiction_abbreviation}") + jurisdiction_id = abbr_to_jid(jurisdiction_abbreviation) + try: + resolve_related_bills(jurisdiction_id, logger) + except InternalError as e: + logger.error(f"Error during bill relationship resolution for {jurisdiction_abbreviation}: {e}") + + +if __name__ == "__main__": + main() diff --git a/openstates/importers/__init__.py b/openstates/importers/__init__.py index 11c3b45c..43e17689 100644 --- a/openstates/importers/__init__.py +++ b/openstates/importers/__init__.py @@ -1,6 +1,6 @@ # flake8: noqa from .jurisdiction import JurisdictionImporter from .organizations import OrganizationImporter -from .bills import BillImporter +from .bills import BillImporter, resolve_related_bills from .vote_events import VoteEventImporter from .events import EventImporter diff --git a/openstates/importers/bills.py b/openstates/importers/bills.py index ec67dfb3..8ace52d0 100644 --- a/openstates/importers/bills.py +++ b/openstates/importers/bills.py @@ -20,6 +20,39 @@ from .organizations import OrganizationImporter +def resolve_related_bills(jurisdiction_id, logger) -> None: + # go through all RelatedBill objs that are attached to a bill in this jurisdiction and + # are currently unresolved + related_bills = RelatedBill.objects.filter( + bill__legislative_session__jurisdiction_id=jurisdiction_id, + related_bill=None, + ) + logger.info(f"Found {len(related_bills)} unresolved bill relationships") + matches_found = 0 + for rb in related_bills: + candidates = list( + Bill.objects.filter( + legislative_session__identifier=rb.legislative_session, + legislative_session__jurisdiction_id=jurisdiction_id, + identifier=rb.identifier, + ) + ) + if len(candidates) == 1: + rb.related_bill = candidates[0] + rb.save() + matches_found += 1 + logger.debug(f"Resolved {rb.legislative_session} {rb.bill.identifier}") + elif len(candidates) > 1: # pragma: no cover + # if we ever see this, we need to add additional fields on the relation + raise InternalError( + "multiple related_bill candidates found for {}".format(rb) + ) + else: + logger.debug(f"FAILED to resolve {rb.legislative_session} {rb.bill.identifier}") + + logger.info(f"Resolved {matches_found} bills out of {len(related_bills)} bills needing resolution") + + class BillImporter(BaseImporter): _type = "bill" model_class = Bill @@ -106,27 +139,7 @@ def prepare_for_db(self, data: _JsonDict) -> _JsonDict: return data def postimport(self) -> None: - # go through all RelatedBill objs that are attached to a bill in this jurisdiction and - # are currently unresolved - for rb in RelatedBill.objects.filter( - bill__legislative_session__jurisdiction_id=self.jurisdiction_id, - related_bill=None, - ): - candidates = list( - Bill.objects.filter( - legislative_session__identifier=rb.legislative_session, - legislative_session__jurisdiction_id=self.jurisdiction_id, - identifier=rb.identifier, - ) - ) - if len(candidates) == 1: - rb.related_bill = candidates[0] - rb.save() - elif len(candidates) > 1: # pragma: no cover - # if we ever see this, we need to add additional fields on the relation - raise InternalError( - "multiple related_bill candidates found for {}".format(rb) - ) + resolve_related_bills(self.jurisdiction_id, self.logger) def update_computed_fields(self, obj: Model) -> None: update_bill_fields(obj, save=False) diff --git a/poetry.lock b/poetry.lock index 255dcc84..9f96ab46 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1514,13 +1514,13 @@ XlsxWriter = ">=0.5.7" [[package]] name = "pytz" -version = "2023.3" +version = "2024.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, - {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] [[package]] @@ -1535,6 +1535,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1542,8 +1543,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1560,6 +1568,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1567,6 +1576,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1984,4 +1994,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "4ca23b11ba7cf90da3df2c2edc6f885e9119e4f86bf2b9e9e87df03773ea0fd1" +content-hash = "5401631123ef5bb9ec65c12645826f395de4b61d9f1217d8bb57d5e274b61339" diff --git a/pyproject.toml b/pyproject.toml index e0b348e5..18c9748f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openstates" -version = "6.18.5" +version = "6.19.0" description = "core infrastructure for the openstates project" authors = ["James Turk "] license = "MIT" @@ -13,6 +13,7 @@ os-text-extract = 'openstates.cli.text_extract:main' os-people = 'openstates.cli.people:main' os-committees = 'openstates.cli.committees:main' os-us-to-yaml = 'openstates.cli.convert_us:main' +os-relationships = 'openstates.cli.relationships:main' os-scrape = 'openstates.cli.scrape:main' os-validate = 'openstates.cli.validate:main' @@ -34,6 +35,7 @@ PyJWT = "^2.5.0" boto3 = "^1.26.61" us = "^3.1.1" influxdb-client = "^1.37.0" +pytz = "^2024.1" [tool.poetry.dev-dependencies] pytest = "^5.4.1"