Merge pull request #101 from kefeimo/adding-vap-notebooks

Updated notebooks due to new act-atmos version (> 2.0) at JupyterHub introduced several breaks.
ARM-Development · Jul 1, 2024 · e9de1d6 · e9de1d6
2 parents ff82789 + 09846f4
commit e9de1d6
Show file tree

Hide file tree

Showing 231 changed files with 19,232 additions and 1,439 deletions.
diff --git a/VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb b/VAPs/quicklook/2DS-AIR/2DS-AIR_tutorial.ipynb
@@ -82,8 +82,8 @@
     "\n",
     "For example, this notebook is called `aaf2dsh.c1`, where `aaf2dsh` is the \"datastream name\", and the `{process.ds_class_level}}` is the \"data level\".\n",
     "\n",
-    "This datastream also contains site `cor` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n",
-    "In such a case, the data of this data-stream is stored at `/data/archive/cor/coraaf2dshF1.c1`, which is in the format of `<DATA_DIR>/<site>/<site><DATASTREAM_NAME><facility>.<DATA_LEVEL>`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n",
+    "This datastream also contains site `mao` and facility `F1`. (Note: individual datastream might have multiple site-facility pairs.)\n",
+    "In such a case, the data of this data-stream is stored at `/data/archive/mao/maoaaf2dshF1.c1`, which is in the format of `<DATA_DIR>/<site>/<site><DATASTREAM_NAME><facility>.<DATA_LEVEL>`. We can use the following method to assign the data-stream directory `datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )`\n",
     "\n",
     "The data files under datastream_dir also follows naming conventions. But once reach the datastream_dir level, the most import file naming convention to differentiate the files is \"yyyyMMdd.hhmmss\", which comes handy to filter out files based on datetime. For example, we can use `glob.glob(f'{datastream_dir}/*.200709*.*')` to filter files in 2007 September.\n",
     "\n",
@@ -114,7 +114,7 @@
     "# Speicify datastream_dir following the path conventions and check its existence\n",
     "DATASTREAM_NAME = \"aaf2dsh\"\n",
     "DATA_LEVEL = \"c1\"\n",
-    "site = \"cor\"\n",
+    "site = \"mao\"\n",
     "facility = \"F1\"\n",
     "datastream_dir = os.path.join(DATA_DIR, site, site + DATASTREAM_NAME + facility + '.' + DATA_LEVEL )\n",
     "print(datastream_dir)\n",
@@ -494,6 +494,33 @@
     "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7bff08b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up dataset with dim that has inf values\n",
+    "def clean_inf_dims(ds: xr.Dataset):\n",
+    "    # Replace infinite values with a large number\n",
+    "    # replaces the infinite values in dim with 110% of the maximum finite value. \n",
+    "    # If the value is not infinite, it remains unchanged.\n",
+    "    for dim_name in list(ds.dims):\n",
+    "        # if dim_name in [\"time\", \"bound\"]:\n",
+    "        #     continue\n",
+    "        dim = ds[dim_name].values\n",
+    "        if not np.issubdtype(ds[dim_name].values.dtype, np.number):  # only works for numerical dtype\n",
+    "            continue\n",
+    "        if not any(np.isinf(dim)):  # skip if there is no inf value\n",
+    "            continue\n",
+    "        dim_replaced = np.where(np.isinf(dim), np.nanmax(dim[np.isfinite(dim)]) * 1.1, dim)\n",
+    "        ds[dim_name] = ((dim_name,), dim_replaced)\n",
+    "    return ds\n",
+    "\n",
+    "ds = clean_inf_dims(ds)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "ae6e1a69",

diff --git a/VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb b/VAPs/quicklook/2DS-AIR/aaf2dsh.c1.ipynb
@@ -37,7 +37,7 @@
     "# Datastream info\n",
     "DATASTREAM_NAME = 'aaf2dsh'\n",
     "DATA_LEVEL = 'c1'\n",
-    "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]"
+    "LOCATIONS = [{'end_date': '2014-10-04', 'facility': 'F1', 'site': 'mao', 'start_date': '2014-02-22'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]"
    ]
   },
   {
@@ -167,12 +167,40 @@
    "source": [
     "# Load files as a single dataset\n",
     "files_list = files_filter \n",
-    "ds = act.io.armfiles.read_netcdf(files_list)\n",
-    "ds.clean.cleanup()\n",
-    "print(f'{len(files_list)} files loaded')\n",
+    "\n",
+    "# ds = xr.open_mfdataset(files_list)  # open multiple netCDF files and merge as on dataset. (not always work)\n",
+    "ds = xr.open_dataset(files_list[0])  # open the first file for analysis\n",
+    "ds.clean.cleanup()  #  note: ARM's QC does not work directly with the internal logic. The ARM QC needs to be converted to CF QC before the QC will work.\n",
     "ds\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7385bbf4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up dataset with dim that has inf values\n",
+    "def clean_inf_dims(ds: xr.Dataset):\n",
+    "    # Replace infinite values with a large number\n",
+    "    # replaces the infinite values in dim with 110% of the maximum finite value. \n",
+    "    # If the value is not infinite, it remains unchanged.\n",
+    "    for dim_name in list(ds.dims):\n",
+    "        # if dim_name in [\"time\", \"bound\"]:\n",
+    "        #     continue\n",
+    "        dim = ds[dim_name].values\n",
+    "        if not np.issubdtype(ds[dim_name].values.dtype, np.number):  # only works for numerical dtype\n",
+    "            continue\n",
+    "        if not any(np.isinf(dim)):  # skip if there is no inf value\n",
+    "            continue\n",
+    "        dim_replaced = np.where(np.isinf(dim), np.nanmax(dim[np.isfinite(dim)]) * 1.1, dim)\n",
+    "        ds[dim_name] = ((dim_name,), dim_replaced)\n",
+    "    return ds\n",
+    "\n",
+    "ds = clean_inf_dims(ds)"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -207,7 +235,57 @@
     "    ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n",
     "    ts_ax.grid()\n",
     "\n",
-    "plt.show()\n"
+    "plt.show()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "194399aa-1907-452b-8ba9-bc31d7f60291",
+   "metadata": {},
+   "source": [
+    "## Quality check plots\n",
+    "#### Define variable for QC plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21c39b16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# existing qc variables\n",
+    "qc_variables = [var for var in list(ds.variables) if \"qc_\" in var]\n",
+    "qc_variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# QC Plot \n",
+    "def qc_plot_example(ds, qc_variable):\n",
+    "    # Plot\n",
+    "    qc_display = act.plotting.TimeSeriesDisplay(ds)\n",
+    "    qc_display.add_subplots((2,), figsize = (9.5,10))\n",
+    "    qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n",
+    "    qc_ax.grid()\n",
+    "    qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "qc_variable = 'None'\n",
+    "\n",
+    "if qc_variable:\n",
+    "    try:\n",
+    "        qc_plot_example(ds=ds, qc_variable=qc_variable)\n",
+    "    except Exception as e:\n",
+    "        print(e)"
    ]
   },
   {

diff --git a/VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb b/VAPs/quicklook/2DS-AIR/aaf2dsv.c1.ipynb
@@ -37,7 +37,7 @@
     "# Datastream info\n",
     "DATASTREAM_NAME = 'aaf2dsv'\n",
     "DATA_LEVEL = 'c1'\n",
-    "LOCATIONS = [{'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]"
+    "LOCATIONS = [{'end_date': '2014-10-04', 'facility': 'F1', 'site': 'mao', 'start_date': '2014-02-22'}, {'end_date': '2018-02-19', 'facility': 'F1', 'site': 'ena', 'start_date': '2017-06-21'}, {'end_date': '2018-12-08', 'facility': 'F1', 'site': 'cor', 'start_date': '2018-11-04'}, {'end_date': '2016-09-22', 'facility': 'F1', 'site': 'sgp', 'start_date': '2016-04-25'}]"
    ]
   },
   {
@@ -167,12 +167,40 @@
    "source": [
     "# Load files as a single dataset\n",
     "files_list = files_filter \n",
-    "ds = act.io.armfiles.read_netcdf(files_list)\n",
-    "ds.clean.cleanup()\n",
-    "print(f'{len(files_list)} files loaded')\n",
+    "\n",
+    "# ds = xr.open_mfdataset(files_list)  # open multiple netCDF files and merge as on dataset. (not always work)\n",
+    "ds = xr.open_dataset(files_list[0])  # open the first file for analysis\n",
+    "ds.clean.cleanup()  #  note: ARM's QC does not work directly with the internal logic. The ARM QC needs to be converted to CF QC before the QC will work.\n",
     "ds\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7385bbf4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up dataset with dim that has inf values\n",
+    "def clean_inf_dims(ds: xr.Dataset):\n",
+    "    # Replace infinite values with a large number\n",
+    "    # replaces the infinite values in dim with 110% of the maximum finite value. \n",
+    "    # If the value is not infinite, it remains unchanged.\n",
+    "    for dim_name in list(ds.dims):\n",
+    "        # if dim_name in [\"time\", \"bound\"]:\n",
+    "        #     continue\n",
+    "        dim = ds[dim_name].values\n",
+    "        if not np.issubdtype(ds[dim_name].values.dtype, np.number):  # only works for numerical dtype\n",
+    "            continue\n",
+    "        if not any(np.isinf(dim)):  # skip if there is no inf value\n",
+    "            continue\n",
+    "        dim_replaced = np.where(np.isinf(dim), np.nanmax(dim[np.isfinite(dim)]) * 1.1, dim)\n",
+    "        ds[dim_name] = ((dim_name,), dim_replaced)\n",
+    "    return ds\n",
+    "\n",
+    "ds = clean_inf_dims(ds)"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -207,7 +235,57 @@
     "    ts_ax = ts_display.plot(v, subplot_index=(i,), set_title=ds.variables[v].attrs['long_name'],)\n",
     "    ts_ax.grid()\n",
     "\n",
-    "plt.show()\n"
+    "plt.show()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "194399aa-1907-452b-8ba9-bc31d7f60291",
+   "metadata": {},
+   "source": [
+    "## Quality check plots\n",
+    "#### Define variable for QC plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21c39b16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# existing qc variables\n",
+    "qc_variables = [var for var in list(ds.variables) if \"qc_\" in var]\n",
+    "qc_variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "532663a3-4dc0-4497-bda8-018c5f91e1c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# QC Plot \n",
+    "def qc_plot_example(ds, qc_variable):\n",
+    "    # Plot\n",
+    "    qc_display = act.plotting.TimeSeriesDisplay(ds)\n",
+    "    qc_display.add_subplots((2,), figsize = (9.5,10))\n",
+    "    qc_ax = qc_display.plot(qc_variable, subplot_index=(0,), set_title=\"QC results on field: \" + qc_variable,)\n",
+    "    qc_ax.grid()\n",
+    "    qc_display.qc_flag_block_plot(qc_variable, subplot_index=(1,))\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "qc_variable = 'None'\n",
+    "\n",
+    "if qc_variable:\n",
+    "    try:\n",
+    "        qc_plot_example(ds=ds, qc_variable=qc_variable)\n",
+    "    except Exception as e:\n",
+    "        print(e)"
    ]
   },
   {

diff --git a/VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb b/VAPs/quicklook/ACSMCDCE/ACSMCDCE_tutorial.ipynb
@@ -494,6 +494,33 @@
     "* [Pandas - Cleaning Data](https://www.w3schools.com/python/pandas/pandas_cleaning.asp)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7bff08b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up dataset with dim that has inf values\n",
+    "def clean_inf_dims(ds: xr.Dataset):\n",
+    "    # Replace infinite values with a large number\n",
+    "    # replaces the infinite values in dim with 110% of the maximum finite value. \n",
+    "    # If the value is not infinite, it remains unchanged.\n",
+    "    for dim_name in list(ds.dims):\n",
+    "        # if dim_name in [\"time\", \"bound\"]:\n",
+    "        #     continue\n",
+    "        dim = ds[dim_name].values\n",
+    "        if not np.issubdtype(ds[dim_name].values.dtype, np.number):  # only works for numerical dtype\n",
+    "            continue\n",
+    "        if not any(np.isinf(dim)):  # skip if there is no inf value\n",
+    "            continue\n",
+    "        dim_replaced = np.where(np.isinf(dim), np.nanmax(dim[np.isfinite(dim)]) * 1.1, dim)\n",
+    "        ds[dim_name] = ((dim_name,), dim_replaced)\n",
+    "    return ds\n",
+    "\n",
+    "ds = clean_inf_dims(ds)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "ae6e1a69",