Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Command refactor #15

Merged
merged 3 commits into from
Aug 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 92 additions & 6 deletions lgsf/commands/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@
import json

from rich.console import Console
from rich.progress import Progress, BarColumn, TimeElapsedColumn
from rich.table import Table

from lgsf.conf import settings
from lgsf.path_utils import _abs_path, load_scraper
from lgsf.path_utils import _abs_path, load_scraper, load_council_info


class CommandBase(metaclass=abc.ABCMeta):
command_name = None

def __init__(self, argv, stdout):
def __init__(self, argv, stdout, pretty=False):
self.argv = argv
self.create_parser()
self.stdout = stdout
self.console = Console(file=self.stdout)

# After all local vars are set up
self.execute()
self.pretty = pretty

def create_parser(self):
self.parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -81,8 +80,27 @@ def create_parser(self):
action="store_true",
help="Only run scrapers not run recently",
)
self.parser.add_argument(
"--check-only",
action="store_true",
help="Just check for updated pages, don't scrape anything",
)
self.parser.add_argument(
"--list-missing",
action="store_true",
help="Print missing councils",
)
self.parser.add_argument(
"--list-disabled",
action="store_true",
help="Print disabled councils",
)

self.add_default_arguments(self.parser)
self.add_arguments(self.parser)

if hasattr(self, "add_arguments"):
self.add_arguments(self.parser)

args = self.parser.parse_args(self.argv[1:])
if args.list_missing or args.list_disabled:
return args
Expand Down Expand Up @@ -167,6 +185,7 @@ def output_status(self):
)
)

@property
def councils_to_run(self):
councils = []
if self.options["all_councils"] or self.options["tags"]:
Expand All @@ -178,6 +197,57 @@ def councils_to_run(self):
councils.append(council)
return councils

def run_councils(self):
for council in self.councils_to_run:
self.run_council(council)

def run_councils_with_progress(self):
to_run = self.councils_to_run
with Progress(
"[progress.description]{task.description}",
BarColumn(),
"[progress.percentage]{task.percentage:>3.0f}%",
TimeElapsedColumn(),
console=self.console,
auto_refresh=False,
) as progress:
total = progress.add_task(description=f"Total", total=len(to_run))
while not progress.finished:
for council in to_run:
self.run_council(council)
progress.update(total, advance=1)
progress.refresh()

def _run_single(self, scraper):
try:
scraper.run()
except KeyboardInterrupt:
raise
except:
if self.options.get("verbose"):
raise

def run_council(self, council):
self.options["council"] = council
self.options["council_info"] = load_council_info(council)
scraper_cls = load_scraper(council, self.command_name)
if not scraper_cls:
return
with scraper_cls(self.options, self.console) as scraper:
should_run = True
if scraper.disabled:
should_run = False
if should_run and self.options["refresh"]:
if scraper.run_since():
should_run = False
if should_run and self.options["tags"]:
required_tags = set(self.options["tags"].split(","))
scraper_tags = set(scraper.get_tags)
if not required_tags.issubset(scraper_tags):
should_run = False
if should_run:
self._run_single(scraper)

def normalise_codes(self):
new_codes = []
if self.options.get("council"):
Expand All @@ -187,3 +257,19 @@ def normalise_codes(self):
new_codes.append(_abs_path(settings.SCRAPER_DIR_NAME, code)[1])
self.options["council"] = ",".join(new_codes)
return self.options

def handle(self, options):
self.options = options

if options["list_missing"]:
self.output_missing()

if options["list_disabled"]:
self.output_disabled()

self.output_status()
self.normalise_codes()
if self.pretty:
self.run_councils_with_progress()
else:
self.run_councils()
109 changes: 0 additions & 109 deletions lgsf/councillors/commands.py
Original file line number Diff line number Diff line change
@@ -1,114 +1,5 @@
from rich.progress import Progress

from lgsf.commands.base import PerCouncilCommandBase
from lgsf.path_utils import load_scraper, load_council_info
from retry import retry


class Command(PerCouncilCommandBase):
command_name = "councillors"

def add_arguments(self, parser):
parser.add_argument(
"--check-only",
action="store_true",
help="Just check for updated pages, don't scrape anything",
)
parser.add_argument(
"--list-missing",
action="store_true",
help="Print missing councils",
)
parser.add_argument(
"--list-disabled",
action="store_true",
help="Print disabled councils",
)

def _run_single(self, scraper, progress, tasks):
try:

from lgsf.councillors.scrapers import ModGovCouncillorScraper

if isinstance(scraper, ModGovCouncillorScraper):

progress.console.print(
"\t".join([scraper.options["council"], scraper.base_url])
)

else:
progress.console.print(
"\t".join([scraper.options["council"], str(scraper.class_tags)])
)
scraper.run()
progress.update(tasks["completed"], advance=1)
except KeyboardInterrupt:
raise
except:
if self.options.get("verbose"):
raise
progress.update(tasks["failed"], advance=1)
progress.console.print(
"Error running asdasd {}, see {} for more".format(
self.options["council"], scraper._error_file_name()
),
style="red",
)

def handle(self, options):
self.options = options
if options["list_missing"]:
self.output_missing()

if options["list_disabled"]:
self.output_disabled()

self.output_status()

self.normalise_codes()
to_run = self.councils_to_run()
with Progress(
auto_refresh=False, redirect_stderr=False, redirect_stdout=False
) as progress:

tasks = {
"total": progress.add_task(description=f"Total", total=len(to_run)),
"completed": progress.add_task(
description=f"Completed", total=len(to_run)
),
"failed": progress.add_task(description=f"Failed", total=len(to_run)),
"skipped": progress.add_task(description=f"Skipped", total=len(to_run)),
}

while not progress.finished:
for council in to_run:
self.options["council"] = council
self.options["council_info"] = load_council_info(council)
scraper_cls = load_scraper(council, self.command_name)
if not scraper_cls:
continue
with scraper_cls((self.options), progress.console) as scraper:
should_run = True
if scraper.disabled:
should_run = False

if should_run and options["refresh"]:
if scraper.run_since():
should_run = False

if should_run and options["tags"]:
required_tags = set(options["tags"].split(","))
scraper_tags = set(scraper.get_tags)
if not required_tags.issubset(scraper_tags):
should_run = False

if should_run:
if options.get("verbose"):
progress.console.print(council)

self._run_single(scraper, progress, tasks)
progress.update(tasks["total"], advance=1)
else:
progress.update(tasks["skipped"], advance=1)
progress.update(tasks["total"], advance=1)
progress.refresh()
2 changes: 1 addition & 1 deletion lgsf/councillors/scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def report(self):
raise ValueError(
"Not many councillors found ({})".format(len(self.councillors))
)
self.console.print("Found {} councillors".format(len(self.councillors)))
self.console.log("Found {} councillors".format(len(self.councillors)))


class HTMLCouncillorScraper(BaseCouncillorScraper):
Expand Down
2 changes: 1 addition & 1 deletion lgsf/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, argv, stdout=None):
self.stdout.write(self.format_help())
else:
Command = load_command(subcommand)
Command(argv[1:], self.stdout)
Command(argv[1:], self.stdout, pretty=True).execute()

def format_help(self):
help_text = [
Expand Down
1 change: 1 addition & 0 deletions scrapers/MTY-merthyr-tydfil/councillors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

class Scraper(ModGovCouncillorScraper):
base_url = "http://democracy.merthyr.gov.uk"
tags = ["example"]
1 change: 1 addition & 0 deletions scrapers/NAY-north-ayrshire/councillors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

class Scraper(CMISCouncillorScraper):
base_url = "https://north-ayrshire.cmis.uk.com/north-ayrshire/Councillors/CurrentCouncillors.aspx"
tags = ["example"]