Skip to content

Commit

Permalink
Merge pull request #127 from openstates/cli-relationships-resolve
Browse files Browse the repository at this point in the history
CLI command to resolve relationships
  • Loading branch information
jessemortenson authored Apr 5, 2024
2 parents 60a51c7 + a0a15ea commit 78687fa
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 29 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 6.19.0 - April 5, 2024

* Adds a new CLI tool that can be called to resolve unresolved bill-to-bill relationships in the openstates DB

## 6.18.5 - March 29, 2024

* other_name in committee matching
Expand Down
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@ See [RELASE.md](./RELEASE.md)

## Debugging openstates-core code

### Commands that do not integrate with openstates-scrapers

* Install pyenv and correct python version
* Install poetry
* `poetry install`

Example PyCharm config (for `relationships` CLI command):

* Interpeter: the poetry env that you just set up
* Module: `openstates.cli.relationships`
* Parameters: `--log_level=DEBUG us`
* Env vars: `DATABASE_URL=postgres://USERNAME:PASSWORD@DB_HOSTNAME:PORT/openstatesorg`

### Update command / scrapers

There are
Expand All @@ -27,7 +40,7 @@ within `openstates-core`.

Here's a recipe using PyCharm to successfully debug the update command:

#### Requirements to run code natively (not in docker)
#### Requirements to run the scrape/update code natively (not in docker)

* You need the `gdal` library installed on the host system. For me: `sudo apt install gdal-bin python3-gdal`
* `openstates-core` checked out at /home/username/repo/openstates/openstates-core/
Expand All @@ -37,7 +50,7 @@ Here's a recipe using PyCharm to successfully debug the update command:
* Install required python version using the `pyenv` utility
* `pip install poetry` (if that python version doesn't already have it)

#### Debugging natively
#### Debugging the scrape/update code natively

* If you have previously installed the `openstates` dependency (eg `openstates-core`), then you need
to run `poetry remove openstates` to clear that remotely-installed (from pypi) dependency. Each time you make a round
Expand Down
38 changes: 38 additions & 0 deletions openstates/cli/relationships.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import click
import logging
import logging.config
from openstates.utils import abbr_to_jid
from ..utils.django import init_django
from ..exceptions import InternalError
from .. import settings


@click.command(help="Resolve unresolved relationships between entities")
@click.argument("jurisdiction_abbreviation")
@click.option(
"--log_level",
help="Set the level of logging to output.",
default="INFO"
)
def main(jurisdiction_abbreviation: str, log_level: str) -> None:
# set up logging
logger = logging.getLogger("openstates")
handler_level = log_level
settings.LOGGING["handlers"]["default"]["level"] = handler_level # type: ignore
logging.config.dictConfig(settings.LOGGING)

# set up django for DB access
# has to be done before any importer can be imported (?)
init_django()
from openstates.importers import resolve_related_bills

logger.info(f"Beginning resolution of bill relationships for {jurisdiction_abbreviation}")
jurisdiction_id = abbr_to_jid(jurisdiction_abbreviation)
try:
resolve_related_bills(jurisdiction_id, logger)
except InternalError as e:
logger.error(f"Error during bill relationship resolution for {jurisdiction_abbreviation}: {e}")


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion openstates/importers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# flake8: noqa
from .jurisdiction import JurisdictionImporter
from .organizations import OrganizationImporter
from .bills import BillImporter
from .bills import BillImporter, resolve_related_bills
from .vote_events import VoteEventImporter
from .events import EventImporter
55 changes: 34 additions & 21 deletions openstates/importers/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,39 @@
from .organizations import OrganizationImporter


def resolve_related_bills(jurisdiction_id, logger) -> None:
# go through all RelatedBill objs that are attached to a bill in this jurisdiction and
# are currently unresolved
related_bills = RelatedBill.objects.filter(
bill__legislative_session__jurisdiction_id=jurisdiction_id,
related_bill=None,
)
logger.info(f"Found {len(related_bills)} unresolved bill relationships")
matches_found = 0
for rb in related_bills:
candidates = list(
Bill.objects.filter(
legislative_session__identifier=rb.legislative_session,
legislative_session__jurisdiction_id=jurisdiction_id,
identifier=rb.identifier,
)
)
if len(candidates) == 1:
rb.related_bill = candidates[0]
rb.save()
matches_found += 1
logger.debug(f"Resolved {rb.legislative_session} {rb.bill.identifier}")
elif len(candidates) > 1: # pragma: no cover
# if we ever see this, we need to add additional fields on the relation
raise InternalError(
"multiple related_bill candidates found for {}".format(rb)
)
else:
logger.debug(f"FAILED to resolve {rb.legislative_session} {rb.bill.identifier}")

logger.info(f"Resolved {matches_found} bills out of {len(related_bills)} bills needing resolution")


class BillImporter(BaseImporter):
_type = "bill"
model_class = Bill
Expand Down Expand Up @@ -106,27 +139,7 @@ def prepare_for_db(self, data: _JsonDict) -> _JsonDict:
return data

def postimport(self) -> None:
# go through all RelatedBill objs that are attached to a bill in this jurisdiction and
# are currently unresolved
for rb in RelatedBill.objects.filter(
bill__legislative_session__jurisdiction_id=self.jurisdiction_id,
related_bill=None,
):
candidates = list(
Bill.objects.filter(
legislative_session__identifier=rb.legislative_session,
legislative_session__jurisdiction_id=self.jurisdiction_id,
identifier=rb.identifier,
)
)
if len(candidates) == 1:
rb.related_bill = candidates[0]
rb.save()
elif len(candidates) > 1: # pragma: no cover
# if we ever see this, we need to add additional fields on the relation
raise InternalError(
"multiple related_bill candidates found for {}".format(rb)
)
resolve_related_bills(self.jurisdiction_id, self.logger)

def update_computed_fields(self, obj: Model) -> None:
update_bill_fields(obj, save=False)
18 changes: 14 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "openstates"
version = "6.18.5"
version = "6.19.0"
description = "core infrastructure for the openstates project"
authors = ["James Turk <[email protected]>"]
license = "MIT"
Expand All @@ -13,6 +13,7 @@ os-text-extract = 'openstates.cli.text_extract:main'
os-people = 'openstates.cli.people:main'
os-committees = 'openstates.cli.committees:main'
os-us-to-yaml = 'openstates.cli.convert_us:main'
os-relationships = 'openstates.cli.relationships:main'
os-scrape = 'openstates.cli.scrape:main'
os-validate = 'openstates.cli.validate:main'

Expand All @@ -34,6 +35,7 @@ PyJWT = "^2.5.0"
boto3 = "^1.26.61"
us = "^3.1.1"
influxdb-client = "^1.37.0"
pytz = "^2024.1"

[tool.poetry.dev-dependencies]
pytest = "^5.4.1"
Expand Down

0 comments on commit 78687fa

Please sign in to comment.