From 59bdbb9291c4e0fb2a0706339e6585a2253811bc Mon Sep 17 00:00:00 2001 From: Laurent Savaete Date: Fri, 10 Sep 2021 15:13:29 +0100 Subject: [PATCH 1/4] Update schema for issues stream --- tap_github/streams.py | 103 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/tap_github/streams.py b/tap_github/streams.py index ddf79cb2..b9ad6dbd 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -130,17 +130,116 @@ def http_headers(self) -> dict: schema = th.PropertiesList( th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("url", th.StringType), + th.Property("html_url", th.StringType), th.Property("repo", th.StringType), th.Property("org", th.StringType), - th.Property("issue_number", th.IntegerType), + th.Property("number", th.IntegerType), th.Property("updated_at", th.DateTimeType), th.Property("created_at", th.DateTimeType), - # th.Property("closed_at", th.DateTimeType), # Nulls causing parse error + th.Property("closed_at", th.DateTimeType), th.Property("state", th.StringType), th.Property("title", th.StringType), th.Property("comments", th.IntegerType), th.Property("author_association", th.StringType), th.Property("body", th.StringType), + th.Property( + "user", + th.ObjectType( + th.Property("login", th.StringType), + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("avatar_url", th.StringType), + th.Property("gravatar_id", th.StringType), + th.Property("html_url", th.StringType), + th.Property("type", th.StringType), + th.Property("site_admin", th.BooleanType), + ), + ), + th.Property( + "labels", + th.ArrayType( + th.ObjectType( + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("url", th.StringType), + th.Property("name", th.StringType), + th.Property("description", th.StringType), + th.Property("color", th.StringType), + th.Property("default", th.BooleanType), + ), + ), + ), + th.Property( + "assignee", + th.ObjectType( + th.Property("login", th.StringType), + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("avatar_url", th.StringType), + th.Property("gravatar_id", th.StringType), + th.Property("html_url", th.StringType), + th.Property("type", th.StringType), + th.Property("site_admin", th.BooleanType), + ), + ), + th.Property( + "assignees", + th.ArrayType( + th.ObjectType( + th.Property("login", th.StringType), + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("avatar_url", th.StringType), + th.Property("gravatar_id", th.StringType), + th.Property("html_url", th.StringType), + th.Property("type", th.StringType), + th.Property("site_admin", th.BooleanType), + ), + ), + ), + th.Property( + "milestone", + th.ObjectType( + th.Property("html_url", th.StringType), + th.Property("node_id", th.StringType), + th.Property("id", th.IntegerType), + th.Property("number", th.IntegerType), + th.Property("state", th.StringType), + th.Property("title", th.StringType), + th.Property("description", th.StringType), + th.Property( + "creator", + th.ObjectType( + th.Property("login", th.StringType), + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("avatar_url", th.StringType), + th.Property("gravatar_id", th.StringType), + th.Property("html_url", th.StringType), + th.Property("type", th.StringType), + th.Property("site_admin", th.BooleanType), + ), + ), + th.Property("open_issues", th.IntegerType), + th.Property("closed_issues", th.IntegerType), + th.Property("created_at", th.DateTimeType), + th.Property("updated_at", th.DateTimeType), + th.Property("closed_at", th.DateTimeType), + th.Property("due_on", th.DateTimeType), + ), + ), + th.Property("locked", th.BooleanType), + th.Property( + "pull_request", + th.ArrayType( + th.ObjectType( + th.Property("html_url", th.StringType), + th.Property("url", th.StringType), + ), + ), + ), ).to_dict() From 71b07b7ba4cdfc13f7a2c651252d163206e5c56f Mon Sep 17 00:00:00 2001 From: Laurent Savaete Date: Fri, 10 Sep 2021 15:22:46 +0100 Subject: [PATCH 2/4] Update issue comments stream to be a child of repositories and update schema --- tap_github/streams.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tap_github/streams.py b/tap_github/streams.py index b9ad6dbd..e86b6b79 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -244,13 +244,17 @@ def http_headers(self) -> dict: class IssueCommentsStream(GitHubStream): - """Defines 'Issues' stream.""" + """ + Defines 'Issues' stream. + Issue comments are fetched from the repository level (as opposed to per issue) + to optimize for API quota usage. + """ name = "issue_comments" - path = "/repos/{org}/{repo}/issues/{issue_number}/comments" + path = "/repos/{org}/{repo}/issues/comments" primary_keys = ["id"] replication_key = "updated_at" - parent_stream_type = IssuesStream + parent_stream_type = RepositoryStream state_partitioning_keys = ["repo", "org"] ignore_parent_replication_key = False @@ -278,11 +282,25 @@ def get_url_params( schema = th.PropertiesList( th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), th.Property("repo", th.StringType), th.Property("org", th.StringType), - th.Property("issue_number", th.IntegerType), + th.Property("issue_url", th.IntegerType), th.Property("updated_at", th.DateTimeType), th.Property("created_at", th.DateTimeType), th.Property("author_association", th.StringType), th.Property("body", th.StringType), + th.Property( + "user", + th.ObjectType( + th.Property("login", th.StringType), + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("avatar_url", th.StringType), + th.Property("gravatar_id", th.StringType), + th.Property("html_url", th.StringType), + th.Property("type", th.StringType), + th.Property("site_admin", th.BooleanType), + ), + ), ).to_dict() From da1257465095ce75d712fe898c811faa501ca0fd Mon Sep 17 00:00:00 2001 From: Laurent Savaete Date: Sat, 11 Sep 2021 00:05:28 +0100 Subject: [PATCH 3/4] Add issue number to issue_comments stream --- tap_github/streams.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tap_github/streams.py b/tap_github/streams.py index e86b6b79..dc04b21c 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -280,9 +280,14 @@ def get_url_params( params["since"] = since return params + def post_process(self, row: dict, context: Optional[dict] = None) -> dict: + row['issue_number'] = int(row["issue_url"].split('/')[-1]) + return row + schema = th.PropertiesList( th.Property("id", th.IntegerType), th.Property("node_id", th.StringType), + th.Property("issue_number", th.IntegerType), th.Property("repo", th.StringType), th.Property("org", th.StringType), th.Property("issue_url", th.IntegerType), From c9fcc331fdedcb067cc311cb4df622fb163cac9f Mon Sep 17 00:00:00 2001 From: Laurent Savaete Date: Sat, 11 Sep 2021 01:57:49 +0100 Subject: [PATCH 4/4] Fix black style --- tap_github/streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/streams.py b/tap_github/streams.py index dc04b21c..3577a24d 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -281,7 +281,7 @@ def get_url_params( return params def post_process(self, row: dict, context: Optional[dict] = None) -> dict: - row['issue_number'] = int(row["issue_url"].split('/')[-1]) + row["issue_number"] = int(row["issue_url"].split("/")[-1]) return row schema = th.PropertiesList(