From 49b95ad940eddfaf2f3df86be60413635516fc94 Mon Sep 17 00:00:00 2001 From: Sebastian Smiley Date: Tue, 25 Jul 2023 06:30:13 -0400 Subject: [PATCH 1/3] API to v14 and respective minor schema updates SDK to 0.29.0 Python to >=3.7.1 (required by SDK update) README clarifications Linting (mostly removing unused imports) Implement get_url_params instead of monolithic path --- README.md | 2 + pyproject.toml | 4 +- tap_googleads/client.py | 58 +++------- tap_googleads/schemas/customer_hierarchy.json | 67 ++++++++++++ tap_googleads/streams.py | 103 ++++++++---------- tap_googleads/tap.py | 21 +++- tap_googleads/tests/test_core.py | 8 -- .../tests/test_customer_not_found.py | 6 - 8 files changed, 146 insertions(+), 123 deletions(-) create mode 100644 tap_googleads/schemas/customer_hierarchy.json diff --git a/README.md b/README.md index 599ca89..c2b6448 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ THIS IS NOT READY FOR PRODUCTION. Bearer tokens sometimes slip out to logs. Use | start_date | True | 2022-03-24T00:00:00Z (Today-7d) | Date to start our search from, applies to Streams where there is a filter date. Note that Google responds to Data in buckets of 1 Day increments | | end_date | True | 2022-03-31T00:00:00Z (Today) | Date to end our search on, applies to Streams where there is a filter date. Note that the query is BETWEEN start_date AND end_date | +Note that although customer IDs are often displayed in the Google Ads UI in the format 123-456-7890, they should be provided to the tap in the format 1234567890, with no dashes. + ### Get refresh token 1. GET https://accounts.google.com/o/oauth2/v2/auth?response_type=code&client_id=client_id&redirect_uri=http://127.0.0.1&scope=https://www.googleapis.com/auth/adwords&state=autoidm&access_type=offline&prompt=select_account&include_granted_scopes=true 1. POST https://www.googleapis.com/oauth2/v4/token?code={code}&client_id={client_id}&client_secret={client_secret}&redirect_uri=http://127.0.0.1&grant_type=authorization_code diff --git a/pyproject.toml b/pyproject.toml index f7e83c8..33594d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,9 +10,9 @@ keywords = [ license = "Apache 2.0" [tool.poetry.dependencies] -python = "<3.11,>=3.6.2" +python = "<3.11,>=3.7.1" requests = "^2.25.1" -singer-sdk = "0.3.17" +singer-sdk = "0.29.0" [tool.poetry.dev-dependencies] pytest = "^6.2.5" diff --git a/tap_googleads/client.py b/tap_googleads/client.py index 0e1fcd2..798f8b6 100644 --- a/tap_googleads/client.py +++ b/tap_googleads/client.py @@ -1,14 +1,14 @@ """REST client handling, including GoogleAdsStream base class.""" -import requests +from urllib.parse import urlencode, urljoin from pathlib import Path -from typing import Any, Dict, Optional, Union, List, Iterable +from typing import Any, Dict, Optional, Iterable from memoization import cached -from singer_sdk.helpers.jsonpath import extract_jsonpath from singer_sdk.streams import RESTStream from singer_sdk.exceptions import FatalAPIError, RetriableAPIError +from singer_sdk.pagination import JSONPathPaginator from tap_googleads.auth import GoogleAdsAuthenticator from tap_googleads.utils import replicate_pk_at_root @@ -20,10 +20,10 @@ class GoogleAdsStream(RESTStream): """GoogleAds stream class.""" - url_base = "https://googleads.googleapis.com/v12" + url_base = "https://googleads.googleapis.com/v14" records_jsonpath = "$[*]" # Or override `parse_response`. - next_page_token_jsonpath = "$.nextPageToken" # Or override `get_next_page_token`. + next_page_token_jsonpath = "$.nextPageToken" primary_keys_jsonpaths = None _LOG_REQUEST_METRIC_URLS: bool = True @@ -32,11 +32,13 @@ class GoogleAdsStream(RESTStream): def authenticator(self) -> GoogleAdsAuthenticator: """Return a new authenticator object.""" base_auth_url = "https://www.googleapis.com/oauth2/v4/token" - # Silly way to do parameters but it works - auth_url = base_auth_url + f"?refresh_token={self.config['refresh_token']}" - auth_url = auth_url + f"&client_id={self.config['client_id']}" - auth_url = auth_url + f"&client_secret={self.config['client_secret']}" - auth_url = auth_url + f"&grant_type=refresh_token" + auth_params = { + "refresh_token": self.config["refresh_token"], + "client_id": self.config["client_id"], + "client_secret": self.config["client_secret"], + "grant_type": "refresh_token", + } + auth_url = urljoin(base_auth_url, "?" + urlencode(auth_params)) return GoogleAdsAuthenticator(stream=self, auth_endpoint=auth_url) @property @@ -49,23 +51,8 @@ def http_headers(self) -> dict: headers["login-customer-id"] = self.config["login_customer_id"] return headers - def get_next_page_token( - self, response: requests.Response, previous_token: Optional[Any] - ) -> Optional[Any]: - """Return a token for identifying next page or None if no more pages.""" - # TODO: If pagination is required, return a token which can be used to get the - # next page. If this is the final page, return "None" to end the - # pagination loop. - if self.next_page_token_jsonpath: - all_matches = extract_jsonpath( - self.next_page_token_jsonpath, response.json() - ) - first_match = next(iter(all_matches), None) - next_page_token = first_match - else: - next_page_token = None - - return next_page_token + def get_new_paginator(self) -> JSONPathPaginator: + return JSONPathPaginator(self.next_page_token_jsonpath) def get_url_params( self, context: Optional[dict], next_page_token: Optional[Any] @@ -79,7 +66,7 @@ def get_url_params( params["order_by"] = self.replication_key return params - def validate_response(self, response): + def validate_22response(self, response): # Still catch error status codes if response.status_code == 403: msg = ( @@ -145,21 +132,6 @@ def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: f"disabled after the API that lists customers is called. {e=}" ) - def prepare_request_payload( - self, context: Optional[dict], next_page_token: Optional[Any] - ) -> Optional[dict]: - """Prepare the data payload for the REST API request. - - By default, no payload will be sent (return None). - """ - # TODO: Delete this method if no payload is required. (Most REST APIs.) - return None - - def parse_response(self, response: requests.Response) -> Iterable[dict]: - """Parse the response and return an iterator of result rows.""" - # TODO: Parse response body and return a set of records. - yield from extract_jsonpath(self.records_jsonpath, input=response.json()) - def post_process(self, row: dict, context: Optional[dict] = None) -> Optional[dict]: """As needed, append or transform raw data to match expected structure.""" return replicate_pk_at_root(row, self.primary_keys_jsonpaths) diff --git a/tap_googleads/schemas/customer_hierarchy.json b/tap_googleads/schemas/customer_hierarchy.json new file mode 100644 index 0000000..c408aba --- /dev/null +++ b/tap_googleads/schemas/customer_hierarchy.json @@ -0,0 +1,67 @@ +{ + "type": "object", + "properties": { + "customerClient": { + "type": [ + "object", + "null" + ], + "properties": { + "resourceName": { + "type": [ + "string", + "null" + ] + }, + "clientCustomer": { + "type": [ + "string", + "null" + ] + }, + "level": { + "type": [ + "string", + "null" + ] + }, + "timeZone": { + "type": [ + "string", + "null" + ] + }, + "manager": { + "type": [ + "boolean", + "null" + ] + }, + "descriptiveName": { + "type": [ + "string", + "null" + ] + }, + "currencyCode": { + "type": [ + "string", + "null" + ] + }, + "id": { + "type": [ + "string", + "null" + ] + } + } + }, + "_sdc_primary_key": { + "type": [ + "string", + "null" + ] + } + } +} \ No newline at end of file diff --git a/tap_googleads/streams.py b/tap_googleads/streams.py index 1ed089b..7d52427 100644 --- a/tap_googleads/streams.py +++ b/tap_googleads/streams.py @@ -1,13 +1,12 @@ """Stream type classes for tap-googleads.""" from pathlib import Path -from typing import Any, Dict, Optional, Union, List, Iterable +from typing import Any, Dict, Optional, Iterable from datetime import datetime from singer_sdk import typing as th # JSON Schema typing helpers from tap_googleads.client import GoogleAdsStream -from tap_googleads.auth import GoogleAdsAuthenticator SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") @@ -42,20 +41,27 @@ class CustomerHierarchyStream(GoogleAdsStream): # TODO add a seperate stream to get the Customer information and return i rest_method = "POST" + path = "/customers/{client_id}/googleAds:search" + records_jsonpath = "$.results[*]" + name = "customer_hierarchy" + primary_keys_jsonpaths = ["customerClient.id"] + primary_keys = ["_sdc_primary_key"] + replication_key = None + parent_stream_type = AccessibleCustomers + schema_filepath = SCHEMAS_DIR / "customer_hierarchy.json" - @property - def path(self): - # Paramas - path = "/customers/{client_id}" - path = path + "/googleAds:search" - path = path + "?pageSize=10000" - path = path + f"&query={self.gaql}" - return path + def get_url_params( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + params = super().get_url_params(context, next_page_token) + params["pageSize"] = "10000" + params["query"] = self.gaql + return params @property def gaql(self): return """ - SELECT customer_client.client_customer + SELECT customer_client.client_customer , customer_client.level , customer_client.manager , customer_client.descriptive_name @@ -66,29 +72,6 @@ def gaql(self): WHERE customer_client.level <= 1 """ - records_jsonpath = "$.results[*]" - name = "customer_hierarchy" - primary_keys_jsonpaths = ["customerClient.id"] - primary_keys = ["_sdc_primary_key"] - replication_key = None - parent_stream_type = AccessibleCustomers - schema = th.PropertiesList( - th.Property( - "customerClient", - th.ObjectType( - th.Property("resourceName", th.StringType), - th.Property("clientCustomer", th.StringType), - th.Property("level", th.StringType), - th.Property("timeZone", th.StringType), - th.Property("manager", th.BooleanType), - th.Property("descriptiveName", th.StringType), - th.Property("currencyCode", th.StringType), - th.Property("id", th.StringType), - ), - ), - th.Property("_sdc_primary_key", th.StringType), - ).to_dict() - # Goal of this stream is to send to children stream a dict of # login-customer-id:customer-id to query for all queries downstream def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: @@ -115,7 +98,7 @@ def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: for row in self.request_records(context): row = self.post_process(row, context) # Don't search Manager accounts as we can't query them for everything - if row["customerClient"]["manager"] == True: + if row["customerClient"]["manager"] is True: continue yield row @@ -129,14 +112,22 @@ class GeotargetsStream(GoogleAdsStream): rest_method = "POST" - @property - def path(self): - # Paramas - path = "/customers/{login_customer_id}" - path = path + "/googleAds:search" - path = path + "?pageSize=10000" - path = path + f"&query={self.gaql}" - return path + records_jsonpath = "$.results[*]" + name = "geo_target_constant" + primary_keys_jsonpaths = ["geoTargetConstant.resourceName"] + primary_keys = ["_sdc_primary_key"] + replication_key = None + schema_filepath = SCHEMAS_DIR / "geo_target_constant.json" + parent_stream_type = None # Override ReportsStream default as this is a constant + path = "/customers/{login_customer_id}/googleAds:search" + + def get_url_params( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + params = super().get_url_params(context, next_page_token) + params["pageSize"] = "10000" + params["query"] = self.gaql + return params gaql = """ SELECT geo_target_constant.canonical_name @@ -147,18 +138,20 @@ def path(self): , geo_target_constant.target_type FROM geo_target_constant """ - records_jsonpath = "$.results[*]" - name = "geo_target_constant" - primary_keys_jsonpaths = ["geoTargetConstant.resourceName"] - primary_keys = ["_sdc_primary_key"] - replication_key = None - schema_filepath = SCHEMAS_DIR / "geo_target_constant.json" - parent_stream_type = None # Override ReportsStream default as this is a constant class ReportsStream(GoogleAdsStream): rest_method = "POST" parent_stream_type = CustomerHierarchyStream + path = "/customers/{client_id}/googleAds:search" + + def get_url_params( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + params = super().get_url_params(context, next_page_token) + params["pageSize"] = "10000" + params["query"] = self.gaql + return params @property def gaql(self): @@ -184,15 +177,6 @@ def end_date(self): def between_filter(self): return f"BETWEEN '{self.start_date}' AND '{self.end_date}'" - @property - def path(self): - # Paramas - path = "/customers/{client_id}" - path = path + "/googleAds:search" - path = path + "?pageSize=10000" - path = path + f"&query={self.gaql}" - return path - class CampaignsStream(ReportsStream): """Define custom stream.""" @@ -234,7 +218,6 @@ def gaql(self): , ad_group.labels , ad_group.id , ad_group.final_url_suffix - , ad_group.explorer_auto_optimizer_setting.opt_in , ad_group.excluded_parent_asset_field_types , ad_group.effective_target_roas_source , ad_group.effective_target_roas diff --git a/tap_googleads/tap.py b/tap_googleads/tap.py index 57d99bb..ac77e1b 100644 --- a/tap_googleads/tap.py +++ b/tap_googleads/tap.py @@ -72,27 +72,40 @@ class TapGoogleAds(Tap): "customer_id", th.StringType, required=True, - description="Customer ID from Google Ads Console, note this should be the top level client. This tap will pull all subaccounts", + description=( + "Customer ID from Google Ads Console, note this should be the top " + "level client. This tap will pull all subaccounts", + ), ), th.Property( "login_customer_id", th.StringType, required=True, - description="Customer ID that has access to the customer_id, note that they can be the same, but they don't have to be as this could be a Manager account", + description=( + "Customer ID that has access to the customer_id, note that they can be " + "the same, but they don't have to be as this could be a Manager account" + ), ), th.Property( "start_date", th.DateTimeType, default=(date.today() - timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%SZ"), required=True, - description="Date to start our search from, applies to Streams where there is a filter date. Note that Google responds to Data in buckets of 1 Day increments", + description=( + "Date to start our search from, applies to Streams where there is a " + "filter date. Note that Google responds to Data in buckets of 1 Day " + "increments" + ), ), th.Property( "end_date", th.DateTimeType, default=date.today().strftime("%Y-%m-%dT%H:%M:%SZ"), required=True, - description="Date to end our search on, applies to Streams where there is a filter date. Note that the query is BETWEEN start_date AND end_date", + description=( + "Date to end our search on, applies to Streams where there is a filter " + "date. Note that the query is BETWEEN start_date AND end_date" + ), ), ).to_dict() diff --git a/tap_googleads/tests/test_core.py b/tap_googleads/tests/test_core.py index a7a828f..cf2051e 100644 --- a/tap_googleads/tests/test_core.py +++ b/tap_googleads/tests/test_core.py @@ -1,18 +1,10 @@ """Tests standard tap features using the built-in SDK tests library.""" import datetime -from pathlib import Path from singer_sdk.testing import get_standard_tap_tests -from singer_sdk.tap_base import Tap -from singer_sdk.streams.core import Stream -from singer_sdk.exceptions import FatalAPIError from tap_googleads.tap import TapGoogleAds -import pytest -import json -import responses -import requests SAMPLE_CONFIG = { "start_date": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d"), diff --git a/tap_googleads/tests/test_customer_not_found.py b/tap_googleads/tests/test_customer_not_found.py index 38f3821..f9d3903 100644 --- a/tap_googleads/tests/test_customer_not_found.py +++ b/tap_googleads/tests/test_customer_not_found.py @@ -1,19 +1,14 @@ """Tests standard tap features using the built-in SDK tests library.""" import datetime -from pathlib import Path -from singer_sdk.testing import get_standard_tap_tests from singer_sdk.tap_base import Tap from singer_sdk.streams.core import Stream -from singer_sdk.exceptions import FatalAPIError import tap_googleads.tap import pytest import json -import importlib import responses -import requests SAMPLE_CONFIG = { "start_date": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d"), @@ -41,7 +36,6 @@ def mocked_responses(): def test_customer_not_enabled(mocked_responses): - auth_response = { "access_token": "access_granted", "expires_in": "10000000", From d25be28f4224b22ab9a41515d3387cfabba15d6a Mon Sep 17 00:00:00 2001 From: Sebastian Smiley Date: Tue, 25 Jul 2023 07:01:46 -0400 Subject: [PATCH 2/3] Add requested fields to campaign_performance. --- tap_googleads/schemas/campaign_performance.json | 12 ++++++++++++ tap_googleads/streams.py | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/tap_googleads/schemas/campaign_performance.json b/tap_googleads/schemas/campaign_performance.json index dec8e83..797a9fa 100644 --- a/tap_googleads/schemas/campaign_performance.json +++ b/tap_googleads/schemas/campaign_performance.json @@ -15,6 +15,18 @@ }, "name": { "type": "string" + }, + "startDate": { + "type": "string" + }, + "endDate": { + "type": "string" + }, + "advertisingChannelType": { + "type": "string" + }, + "AdvertisingChannelSubType": { + "type": "string" } } }, diff --git a/tap_googleads/streams.py b/tap_googleads/streams.py index 7d52427..d56cea2 100644 --- a/tap_googleads/streams.py +++ b/tap_googleads/streams.py @@ -274,6 +274,10 @@ def gaql(self) -> str: SELECT campaign.id , campaign.name , campaign.status + , campaign.start_date + , campaign.end_date + , campaign.advertising_channel_type + , campaign.advertising_channel_sub_type , segments.device , segments.date , metrics.impressions From b9a9d97536032bcbbc6f0feb0a3bf592f4d2195e Mon Sep 17 00:00:00 2001 From: Sebastian Smiley Date: Tue, 25 Jul 2023 07:23:12 -0400 Subject: [PATCH 3/3] Fix testing. Functionality for secret use in PRs. --- .github/workflows/ci.yml | 12 +++++++++--- pyproject.toml | 4 ++-- tap_googleads/client.py | 5 +++-- tap_googleads/tests/test_customer_not_found.py | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d56886..50610a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,15 +3,15 @@ on: push: branches: - main - pull_request: + pull_request_target: + types: [opened, synchronize, reopened] jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.8, 3.9, "3.10"] os: [ubuntu-latest, windows-latest, macos-latest] - steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -24,7 +24,13 @@ jobs: pip install poetry==1.1.12 - name: Build tap on Python ${{ matrix.python-version }} run: poetry build + authorize: + environment: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository && 'external' || 'internal' }} + runs-on: ubuntu-latest + steps: + - run: true test: + needs: authorize runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/pyproject.toml b/pyproject.toml index 33594d3..0133082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ keywords = [ license = "Apache 2.0" [tool.poetry.dependencies] -python = "<3.11,>=3.7.1" +python = "<3.11,>=3.8" requests = "^2.25.1" singer-sdk = "0.29.0" @@ -18,7 +18,7 @@ singer-sdk = "0.29.0" pytest = "^6.2.5" tox = "^3.24.4" flake8 = "^3.9.2" -black = "^21.9b0" +black = "^23.7.0" pydocstyle = "^6.1.1" mypy = "^0.910" types-requests = "^2.25.8" diff --git a/tap_googleads/client.py b/tap_googleads/client.py index 798f8b6..e2e46d9 100644 --- a/tap_googleads/client.py +++ b/tap_googleads/client.py @@ -2,10 +2,11 @@ from urllib.parse import urlencode, urljoin from pathlib import Path -from typing import Any, Dict, Optional, Iterable +from typing import Any, Dict, Optional, Union, List, Iterable from memoization import cached +from singer_sdk.helpers.jsonpath import extract_jsonpath from singer_sdk.streams import RESTStream from singer_sdk.exceptions import FatalAPIError, RetriableAPIError from singer_sdk.pagination import JSONPathPaginator @@ -66,7 +67,7 @@ def get_url_params( params["order_by"] = self.replication_key return params - def validate_22response(self, response): + def validate_response(self, response): # Still catch error status codes if response.status_code == 403: msg = ( diff --git a/tap_googleads/tests/test_customer_not_found.py b/tap_googleads/tests/test_customer_not_found.py index f9d3903..102f416 100644 --- a/tap_googleads/tests/test_customer_not_found.py +++ b/tap_googleads/tests/test_customer_not_found.py @@ -63,7 +63,7 @@ def test_customer_not_enabled(mocked_responses): mocked_responses.add( responses.POST, # TODO cleanup long url, googleads.googleapis.com/* would suffice - "https://googleads.googleapis.com/v12/customers/12345/googleAds:search?pageSize=10000&query=%0A%20%20%20%20%20%20%20%20%20%20%20%20SELECT%20campaign.id%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20,%20campaign.name%0A%20%20%20%20%20%20%20%20%20%20%20%20FROM%20campaign%20%0A%20%20%20%20%20%20%20%20%20%20%20%20ORDER%20BY%20campaign.id%0A%20%20%20%20%20%20%20%20", + "https://googleads.googleapis.com/v14/customers/12345/googleAds:search?pageSize=10000&query=%0A%20%20%20%20%20%20%20%20%20%20%20%20SELECT%20campaign.id%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20,%20campaign.name%0A%20%20%20%20%20%20%20%20%20%20%20%20FROM%20campaign%20%0A%20%20%20%20%20%20%20%20%20%20%20%20ORDER%20BY%20campaign.id%0A%20%20%20%20%20%20%20%20", body=json.dumps(customer_not_enabled_body).encode("utf-8"), status=403, content_type="application/json",