-
Notifications
You must be signed in to change notification settings - Fork 128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Python regressions in 1.9.0beta #857
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,8 +88,10 @@ def start(self, cluster_id: str) -> None: | |
|
||
response = self.session.post("/start", json={"cluster_id": cluster_id}) | ||
if response.status_code != 200: | ||
raise DbtRuntimeError(f"Error starting terminated cluster.\n {response.content!r}") | ||
logger.debug(f"Cluster start response={response}") | ||
if self.status(cluster_id) not in ["RUNNING", "PENDING"]: | ||
raise DbtRuntimeError(f"Error starting terminated cluster.\n {response.content!r}") | ||
else: | ||
logger.debug("Presuming race condition, waiting for cluster to start") | ||
|
||
self.wait_for_cluster(cluster_id) | ||
|
||
|
@@ -289,7 +291,7 @@ def cancel(self, command: CommandExecution) -> None: | |
raise DbtRuntimeError(f"Cancel command {command} failed.\n {response.content!r}") | ||
|
||
def poll_for_completion(self, command: CommandExecution) -> None: | ||
self._poll_api( | ||
response = self._poll_api( | ||
url="/status", | ||
params={ | ||
"clusterId": command.cluster_id, | ||
|
@@ -300,7 +302,13 @@ def poll_for_completion(self, command: CommandExecution) -> None: | |
terminal_states={"Finished", "Error", "Cancelled"}, | ||
expected_end_state="Finished", | ||
unexpected_end_state_func=self._get_exception, | ||
) | ||
).json() | ||
|
||
if response["results"]["resultType"] == "error": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lost this in the refactor: for some reason Command exec will give a state of 'Finished' rather then 'Error' some times, and then stuff the error in the results. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are published parts of the Databricks REST API. If these don't exist, the entire API becomes untrustworthy. |
||
raise DbtRuntimeError( | ||
f"Python model failed with traceback as:\n" | ||
f"{utils.remove_ansi(response['results']['cause'])}" | ||
) | ||
|
||
def _get_exception(self, response: Response) -> None: | ||
response_json = response.json() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,7 +85,7 @@ | |
SHOW_TABLE_EXTENDED_MACRO_NAME = "show_table_extended" | ||
SHOW_TABLES_MACRO_NAME = "show_tables" | ||
SHOW_VIEWS_MACRO_NAME = "show_views" | ||
GET_COLUMNS_COMMENTS_MACRO_NAME = "get_columns_comments" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Noticed this wasn't referenced anywhere when doing my debugging. |
||
|
||
USE_INFO_SCHEMA_FOR_COLUMNS = BehaviorFlag( | ||
name="use_info_schema_for_columns", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
from dbt.adapters.base import PythonJobHelper | ||
from dbt.adapters.databricks.api_client import CommandExecution, DatabricksApiClient, WorkflowJobApi | ||
from dbt.adapters.databricks.credentials import DatabricksCredentials | ||
from dbt.adapters.databricks.logging import logger | ||
from dbt.adapters.databricks.python_models.python_config import ParsedPythonModel | ||
from dbt.adapters.databricks.python_models.run_tracking import PythonRunTracker | ||
|
||
|
@@ -70,6 +71,8 @@ def __init__( | |
|
||
@override | ||
def submit(self, compiled_code: str) -> None: | ||
logger.debug("Submitting Python model using the Command API.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding debug statement so that we can quickly determine submission method from logs. |
||
|
||
context_id = self.api_client.command_contexts.create(self.cluster_id) | ||
command_exec: Optional[CommandExecution] = None | ||
try: | ||
|
@@ -263,6 +266,8 @@ def create( | |
|
||
@override | ||
def submit(self, compiled_code: str) -> None: | ||
logger.debug("Submitting Python model using the Job Run API.") | ||
|
||
file_path = self.uploader.upload(compiled_code) | ||
job_config = self.config_compiler.compile(file_path) | ||
|
||
|
@@ -494,6 +499,8 @@ def create( | |
|
||
@override | ||
def submit(self, compiled_code: str) -> None: | ||
logger.debug("Submitting Python model using the Workflow API.") | ||
|
||
file_path = self.uploader.upload(compiled_code) | ||
|
||
workflow_config, existing_job_id = self.config_compiler.compile(file_path) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved these to somewhere more sensible (during investigation realized these were in persist_docs, but are sometimes called outside of persisting docs). |
||
{% macro get_columns_comments(relation) -%} | ||
{% call statement('get_columns_comments', fetch_result=True) -%} | ||
describe table {{ relation|lower }} | ||
{% endcall %} | ||
|
||
{% do return(load_result('get_columns_comments').table) %} | ||
{% endmacro %} | ||
|
||
{% macro get_columns_comments_via_information_schema(relation) -%} | ||
{% call statement('repair_table', fetch_result=False) -%} | ||
REPAIR TABLE {{ relation|lower }} SYNC METADATA | ||
{% endcall %} | ||
{% call statement('get_columns_comments_via_information_schema', fetch_result=True) -%} | ||
select | ||
column_name, | ||
full_data_type, | ||
comment | ||
from `system`.`information_schema`.`columns` | ||
where | ||
table_catalog = '{{ relation.database|lower }}' and | ||
table_schema = '{{ relation.schema|lower }}' and | ||
table_name = '{{ relation.identifier|lower }}' | ||
{% endcall %} | ||
|
||
{% do return(load_result('get_columns_comments_via_information_schema').table) %} | ||
{% endmacro %} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When we query state after error, if it's pending or running it means the start failed due to race condition as another thread got the cluster started.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should it be
info
rather thandebug
ordebug
with status_code?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Anything that is info will show in the normal dbt output, so we are very conservative about what we log with info.
This is normal operation; if they are using multiple python models and the command api they will hit this, so I don't think this is worth bringing to the users' attention if it's running or pending. For whatever reason, the cluster start API errors if you ask to start a cluster that is already in the process of starting. If it's not running or pending, they'll get the full output from raising the error.