diff --git a/dataproc_jupyter_plugin/services/executor.py b/dataproc_jupyter_plugin/services/executor.py index 34b1bb52..f8517702 100644 --- a/dataproc_jupyter_plugin/services/executor.py +++ b/dataproc_jupyter_plugin/services/executor.py @@ -17,6 +17,8 @@ import subprocess import uuid from datetime import datetime, timedelta +from google.cloud import storage +from google.api_core.exceptions import NotFound import aiohttp import pendulum @@ -86,14 +88,16 @@ async def get_bucket(self, runtime_env): self.log.exception(f"Error getting bucket name: {str(e)}") raise Exception(f"Error getting composer bucket: {str(e)}") - async def check_file_exists(self, bucket, file_path): + async def check_file_exists(self, bucket_name, file_path): try: - cmd = f"gsutil ls gs://{bucket}/dataproc-notebooks/{file_path}" - await async_run_gsutil_subcommand(cmd) - return True - except subprocess.CalledProcessError as error: - self.log.exception(f"Error checking papermill file: {error.decode()}") - raise IOError(error.decode) + if not bucket_name: + raise ValueError("Bucket name cannot be empty") + bucket = storage.Client().bucket(bucket_name) + blob = bucket.blob(file_path) + return blob.exists() + except Exception as error: + self.log.exception(f"Error checking file: {error}") + raise IOError(f"Error creating dag: {error}") async def upload_papermill_to_gcs(self, gcs_dag_bucket): env = Environment( diff --git a/pyproject.toml b/pyproject.toml index 0c911601..874a34f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "bigframes~=0.22.0", "aiohttp~=3.9.5", "google-cloud-dataproc~=5.10.2" + "google-cloud-storage~=2.18.2" ] dynamic = ["version", "description", "authors", "urls", "keywords"] diff --git a/src/jobs/labelProperties.tsx b/src/jobs/labelProperties.tsx index 4ece4760..f35f5b66 100644 --- a/src/jobs/labelProperties.tsx +++ b/src/jobs/labelProperties.tsx @@ -151,7 +151,7 @@ function LabelProperties({ if (data.split(':')[1] === '') { data = data + value; } else { - data = data.replace(data.split(':')[1], value); + data = data.replace(data.split(/:(.+)/)[1], value); } } } @@ -287,7 +287,7 @@ function LabelProperties({ } defaultValue={ labelSplit.length > 2 - ? labelSplit[1] + ':' + labelSplit[2] + ? label.split(/:(.+)/)[1] : labelSplit[1] } Label={`Value ${index + 1}`} diff --git a/src/runtime/createRunTime.tsx b/src/runtime/createRunTime.tsx index befce2cb..e3744afd 100644 --- a/src/runtime/createRunTime.tsx +++ b/src/runtime/createRunTime.tsx @@ -194,6 +194,8 @@ function CreateRunTime({ [] ); const [sharedvpcSelected, setSharedvpcSelected] = useState(''); + const [gpuDetailChangeDone, setGpuDetailChangeDone] = useState(false); + useEffect(() => { checkConfig(setLoggedIn, setConfigError, setLoginError); const localstorageGetInformation = localStorage.getItem('loginState'); @@ -246,41 +248,105 @@ function CreateRunTime({ const [key, value] = item.split(':'); if (key === 'spark.dataproc.executor.resource.accelerator.type') { if (value === 'l4') { - resourceAllocationModify = resourceAllocationDetailUpdated + resourceAllocationModify = resourceAllocationModify .map((item: string) => { - if (item === 'spark.dataproc.executor.disk.size:400g') { + if (item.includes('spark.dataproc.executor.disk.size')) { // To remove the property if GPU checkbox is checked and 'spark.dataproc.executor.resource.accelerator.type:l4'. return null; + } else if (item === 'spark.executor.cores:12') { + return 'spark.executor.cores:4'; } return item; }) .filter((item): item is string => item !== null); // To filter out null values.' setResourceAllocationDetail(resourceAllocationModify); setResourceAllocationDetailUpdated(resourceAllocationModify); + + let gpuDetailModify = [...gpuDetailUpdated]; + resourceAllocationModify.forEach(item => { + const [key, value] = item.split(':'); + if (key === 'spark.executor.cores') { + const cores = Number(value); + const gpuValue = (1 / cores).toFixed(2); + gpuDetailModify = gpuDetailModify.map(gpuItem => { + const [gpuKey] = gpuItem.split(':'); + if (gpuKey === 'spark.task.resource.gpu.amount') { + return `spark.task.resource.gpu.amount:${gpuValue}`; + } + return gpuItem; + }); + } + }); + setGpuDetail(gpuDetailModify); + setGpuDetailUpdated(gpuDetailModify); + setGpuDetailChangeDone(true); } else { + resourceAllocationModify = resourceAllocationModify + .map((item: string) => { + if (item === 'spark.executor.cores:4') { + return 'spark.executor.cores:12'; + } + return item; + }) + .filter((item): item is string => item !== null); // To filter out null values. + setResourceAllocationDetail(resourceAllocationModify); + setResourceAllocationDetailUpdated(resourceAllocationModify); + if ( - !resourceAllocationDetailUpdated.includes( - 'spark.dataproc.executor.disk.size:400g' - ) + resourceAllocationModify.filter(property => + property.includes('spark.dataproc.executor.disk.size') + ).length === 0 ) { - // To add the spark.dataproc.executor.disk.size:400g at index 9. - resourceAllocationDetailUpdated.splice( + // To add the spark.dataproc.executor.disk.size:750g at index 9. + resourceAllocationModify.splice( 8, 0, - 'spark.dataproc.executor.disk.size:400g' + 'spark.dataproc.executor.disk.size:750g' ); - const updatedArray = [...resourceAllocationDetailUpdated]; + const updatedArray = [...resourceAllocationModify]; setResourceAllocationDetail(updatedArray); setResourceAllocationDetailUpdated(updatedArray); } + + let gpuDetailModify = [...gpuDetailUpdated]; + resourceAllocationModify.forEach(item => { + const [key, value] = item.split(':'); + if (key === 'spark.executor.cores') { + const cores = Number(value); + const gpuValue = (1 / cores).toFixed(2); + gpuDetailModify = gpuDetailModify.map(gpuItem => { + const [gpuKey] = gpuItem.split(':'); + if (gpuKey === 'spark.task.resource.gpu.amount') { + return `spark.task.resource.gpu.amount:${gpuValue}`; + } + return gpuItem; + }); + } + }); + setGpuDetail(gpuDetailModify); + setGpuDetailUpdated(gpuDetailModify); + setGpuDetailChangeDone(true); } } }); + setResourceAllocationDetail(resourceAllocationModify); + setResourceAllocationDetailUpdated(resourceAllocationModify); }; - useEffect(() => { - modifyResourceAllocation(); - }, [gpuDetailUpdated]); + if ( + !gpuDetailChangeDone && + (!selectedRuntimeClone || + selectedRuntimeClone.runtimeConfig.properties[ + 'spark.dataproc.executor.resource.accelerator.type' + ] === 'l4' || + gpuDetailUpdated.includes( + 'spark.dataproc.executor.resource.accelerator.type:l4' + ) || + resourceAllocationDetailUpdated.length === 9) + ) { + modifyResourceAllocation(); + } + }, [gpuDetailUpdated, gpuDetailChangeDone]); const displayUserInfo = async () => { await RunTimeSerive.displayUserInfoService(setUserInfo); @@ -402,9 +468,11 @@ function CreateRunTime({ if (gpuChecked || gpuDetailList.length > 0) { setGpuDetail(gpuDetailList); setGpuDetailUpdated(gpuDetailList); + setGpuDetailChangeDone(false); } else { setGpuDetail(['']); setGpuDetailUpdated(['']); + setGpuDetailChangeDone(false); } setPropertyDetail(prevPropertyDetail => { @@ -926,7 +994,7 @@ function CreateRunTime({ propertyObject[key] = value; }); propertyDetailUpdated.forEach((label: string) => { - const labelSplit = label.split(':'); + const labelSplit = label.split(/:(.+)/); const key = labelSplit[0]; const value = labelSplit[1]; propertyObject[key] = value; @@ -1077,12 +1145,15 @@ function CreateRunTime({ }); setGpuDetail(gpuDetailModify); setGpuDetailUpdated(gpuDetailModify); + setGpuDetailChangeDone(false); } else { let resourceAllocationModify = [...resourceAllocationDetailUpdated]; resourceAllocationModify = resourceAllocationModify.map( (item: string) => { if (item === 'spark.dataproc.executor.disk.tier:premium') { return 'spark.dataproc.executor.disk.tier:standard'; + } else if (item.includes('spark.executor.cores')) { + return 'spark.executor.cores:4'; } return item; } @@ -1100,9 +1171,9 @@ function CreateRunTime({ ); } if ( - !resourceAllocationModify.includes( - 'spark.dataproc.executor.disk.size:400g' - ) + resourceAllocationModify.filter(property => + property.includes('spark.dataproc.executor.disk.size') + ).length === 0 ) { // To add the spark.dataproc.executor.disk.size:400g at index 9 when GPU is unchecked resourceAllocationModify.splice( @@ -1110,12 +1181,23 @@ function CreateRunTime({ 0, 'spark.dataproc.executor.disk.size:400g' ); + } else { + resourceAllocationModify = resourceAllocationModify.map( + (item: string) => { + if (item.includes('spark.dataproc.executor.disk.size')) { + return 'spark.dataproc.executor.disk.size:400g'; + } + return item; + } + ); } + setResourceAllocationDetail(resourceAllocationModify); setResourceAllocationDetailUpdated(resourceAllocationModify); setExpandGpu(false); setGpuDetail(['']); setGpuDetailUpdated(['']); + setGpuDetailChangeDone(false); } }; @@ -1640,6 +1722,7 @@ function CreateRunTime({ sparkValueValidation={sparkValueValidation} setSparkValueValidation={setSparkValueValidation} sparkSection="resourceallocation" + setGpuDetailChangeDone={setGpuDetailChangeDone} /> )}
@@ -1684,6 +1767,7 @@ function CreateRunTime({ sparkValueValidation={sparkValueValidation} setSparkValueValidation={setSparkValueValidation} sparkSection="autoscaling" + setGpuDetailChangeDone={setGpuDetailChangeDone} /> )}
@@ -1737,6 +1821,7 @@ function CreateRunTime({ sparkValueValidation={sparkValueValidation} setSparkValueValidation={setSparkValueValidation} sparkSection="gpu" + setGpuDetailChangeDone={setGpuDetailChangeDone} /> )}
Others
diff --git a/src/runtime/sparkProperties.tsx b/src/runtime/sparkProperties.tsx index ecd794f2..7e635421 100644 --- a/src/runtime/sparkProperties.tsx +++ b/src/runtime/sparkProperties.tsx @@ -42,7 +42,8 @@ function SparkProperties({ setLabelDetailUpdated, sparkValueValidation, setSparkValueValidation, - sparkSection + sparkSection, + setGpuDetailChangeDone }: any) { /* labelDetail used to store the permanent label details when onblur @@ -70,6 +71,9 @@ function SparkProperties({ const handleEditLabelSwitch = () => { setLabelDetail(labelDetailUpdated); + if (sparkSection === 'gpu') { + setGpuDetailChangeDone(false); + } }; const updateErrorIndexes = (index: number, hasError: boolean) => { @@ -177,11 +181,9 @@ function SparkProperties({ value is split from labels Example:"client:dataproc_jupyter_plugin" */ - if (data.split(':')[1] === '') { - data = data + value; - } else { - data = data.replace(data.split(':')[1], value); - } + let sparkProperties = data.split(':'); + sparkProperties[1] = value.trim(); + data = sparkProperties[0] + ':' + sparkProperties[1]; } labelEdit[dataNumber] = data; });