From 187a1b72d6cd72fbbe7156d5c51b018c390e23ac Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 6 Jun 2023 13:59:58 -0400 Subject: [PATCH 1/2] Add a tutorial showing how to read data using xpystac --- docs/tutorials.rst | 9 + .../how-to-read-data-from-stac.ipynb | 1541 +++++++++++++++++ 2 files changed, 1550 insertions(+) create mode 100644 docs/tutorials/how-to-read-data-from-stac.ipynb diff --git a/docs/tutorials.rst b/docs/tutorials.rst index d7e6a972e..72588ade8 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -11,6 +11,14 @@ PySTAC Introduction This tutorial gives an introduction to PySTAC concepts through code examples. +How to read data from STAC +-------------------------- + +- :tutorial:`GitHub version ` +- :ref:`Docs version ` + +This tutorial shows how to read data from PySTAC into xarray. + PySTAC SpaceNet tutorial ------------------------ @@ -56,6 +64,7 @@ implement your own custom extensions. :glob: tutorials/pystac-introduction.ipynb + tutorials/how-to-read-data-from-stac.ipynb tutorials/pystac-spacenet-tutorial.ipynb tutorials/how-to-create-stac-catalogs.ipynb tutorials/creating-a-landsat-stac.ipynb diff --git a/docs/tutorials/how-to-read-data-from-stac.ipynb b/docs/tutorials/how-to-read-data-from-stac.ipynb new file mode 100644 index 000000000..a0a5cdede --- /dev/null +++ b/docs/tutorials/how-to-read-data-from-stac.ipynb @@ -0,0 +1,1541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "be2c57c1-798a-4eaf-b2b8-41c261b657d1", + "metadata": {}, + "source": [ + "# How to read data from STAC\n", + "\n", + "This notebook shows how to read the data in from a STAC asset using [xarray](https://docs.xarray.dev/en/stable/) and a little hidden helper library called [xpystac](https://pypi.org/project/xpystac/).\n", + "\n", + "## tl;dr\n", + "\n", + "For any PySTAC object that can be represented as an ndimensional dataset you can read the data using the following command:\n", + "\n", + "```python\n", + "xr.open_dataset(object)\n", + "```\n", + "\n", + "## Dependencies\n", + "\n", + "There are lots of optional dependencies depending on where and how the data you are interested in are stored. Here are some of the libraries that you will probably need:\n", + "\n", + "- dask - to delay data loading until access\n", + "- fsspec - to access data from remote storage\n", + "- pystac - STAC object structures\n", + "- xarray, rioxarray - data structures\n", + "- xpystac, stackstac - helper for loading pystac into xarray objects" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "11dddb09-6313-4822-90ba-26eb6e5c143b", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install adlfs dask fsspec[http] planetary_computer stackstac xarray xpystac zarr --quiet" + ] + }, + { + "cell_type": "markdown", + "id": "ad3fb6dc-3529-47bd-a5b3-f5260f23db88", + "metadata": {}, + "source": [ + "Despite all these install instructions, the import block is very straightforward" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2a8afebd-b397-4e7a-b448-0f59cc030e66", + "metadata": {}, + "outputs": [], + "source": [ + "import pystac\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "id": "6b24745c-b2d5-43d6-9c7e-66458b3a88e3", + "metadata": {}, + "source": [ + "## Examples\n", + "\n", + "Here are a few examples of the different types of objects that you can open in xarray." + ] + }, + { + "cell_type": "markdown", + "id": "30da7cfd-2861-4095-b15b-9952a7d824d9", + "metadata": {}, + "source": [ + "### COGs\n", + "\n", + "Read all the data from the COGs referenced by the assets on an item." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c77432e6-8b0d-44d2-a947-ec74a529b8cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                      (time: 1, y: 7802, x: 7762, band: 19)\n",
+       "Coordinates: (12/32)\n",
+       "  * time                         (time) datetime64[ns] 2023-04-08T23:37:51.63...\n",
+       "    id                           (time) <U31 ...\n",
+       "  * x                            (x) float64 3.774e+05 3.774e+05 ... 6.102e+05\n",
+       "  * y                            (y) float64 -3.713e+06 ... -3.947e+06\n",
+       "    proj:shape                   object ...\n",
+       "    sci:doi                      <U16 ...\n",
+       "    ...                           ...\n",
+       "    raster:bands                 (band) object ...\n",
+       "    classification:bitfields     (band) object ...\n",
+       "    common_name                  (band) object ...\n",
+       "    center_wavelength            (band) object ...\n",
+       "    full_width_half_max          (band) object ...\n",
+       "    epsg                         int64 ...\n",
+       "Dimensions without coordinates: band\n",
+       "Data variables: (12/19)\n",
+       "    qa                           (time, y, x) float64 ...\n",
+       "    red                          (time, y, x) float64 ...\n",
+       "    blue                         (time, y, x) float64 ...\n",
+       "    drad                         (time, y, x) float64 ...\n",
+       "    emis                         (time, y, x) float64 ...\n",
+       "    emsd                         (time, y, x) float64 ...\n",
+       "    ...                           ...\n",
+       "    swir16                       (time, y, x) float64 ...\n",
+       "    swir22                       (time, y, x) float64 ...\n",
+       "    coastal                      (time, y, x) float64 ...\n",
+       "    qa_pixel                     (time, y, x) float64 ...\n",
+       "    qa_radsat                    (time, y, x) float64 ...\n",
+       "    qa_aerosol                   (time, y, x) float64 ...\n",
+       "Attributes:\n",
+       "    spec:        RasterSpec(epsg=32656, bounds=(377370.0, -3947130.0, 610230....\n",
+       "    crs:         epsg:32656\n",
+       "    transform:   | 30.00, 0.00, 377370.00|\\n| 0.00,-30.00,-3713070.00|\\n| 0.0...\n",
+       "    resolution:  30.0
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 1, y: 7802, x: 7762, band: 19)\n", + "Coordinates: (12/32)\n", + " * time (time) datetime64[ns] 2023-04-08T23:37:51.63...\n", + " id (time) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:                  (time: 14965, y: 584, x: 284, nv: 2)\n",
+       "Coordinates:\n",
+       "    lat                      (y, x) float32 ...\n",
+       "    lon                      (y, x) float32 ...\n",
+       "  * time                     (time) datetime64[ns] 1980-01-01T12:00:00 ... 20...\n",
+       "  * x                        (x) float32 -5.802e+06 -5.801e+06 ... -5.519e+06\n",
+       "  * y                        (y) float32 -3.9e+04 -4e+04 ... -6.21e+05 -6.22e+05\n",
+       "Dimensions without coordinates: nv\n",
+       "Data variables:\n",
+       "    dayl                     (time, y, x) float32 ...\n",
+       "    lambert_conformal_conic  int16 ...\n",
+       "    prcp                     (time, y, x) float32 ...\n",
+       "    srad                     (time, y, x) float32 ...\n",
+       "    swe                      (time, y, x) float32 ...\n",
+       "    time_bnds                (time, nv) datetime64[ns] ...\n",
+       "    tmax                     (time, y, x) float32 ...\n",
+       "    tmin                     (time, y, x) float32 ...\n",
+       "    vp                       (time, y, x) float32 ...\n",
+       "    yearday                  (time) int16 ...\n",
+       "Attributes:\n",
+       "    Conventions:       CF-1.6\n",
+       "    Version_data:      Daymet Data Version 4.0\n",
+       "    Version_software:  Daymet Software Version 4.0\n",
+       "    citation:          Please see http://daymet.ornl.gov/ for current Daymet ...\n",
+       "    references:        Please see http://daymet.ornl.gov/ for current informa...\n",
+       "    source:            Daymet Software Version 4.0\n",
+       "    start_year:        1980
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 14965, y: 584, x: 284, nv: 2)\n", + "Coordinates:\n", + " lat (y, x) float32 ...\n", + " lon (y, x) float32 ...\n", + " * time (time) datetime64[ns] 1980-01-01T12:00:00 ... 20...\n", + " * x (x) float32 -5.802e+06 -5.801e+06 ... -5.519e+06\n", + " * y (y) float32 -3.9e+04 -4e+04 ... -6.21e+05 -6.22e+05\n", + "Dimensions without coordinates: nv\n", + "Data variables:\n", + " dayl (time, y, x) float32 ...\n", + " lambert_conformal_conic int16 ...\n", + " prcp (time, y, x) float32 ...\n", + " srad (time, y, x) float32 ...\n", + " swe (time, y, x) float32 ...\n", + " time_bnds (time, nv) datetime64[ns] ...\n", + " tmax (time, y, x) float32 ...\n", + " tmin (time, y, x) float32 ...\n", + " vp (time, y, x) float32 ...\n", + " yearday (time) int16 ...\n", + "Attributes:\n", + " Conventions: CF-1.6\n", + " Version_data: Daymet Data Version 4.0\n", + " Version_software: Daymet Software Version 4.0\n", + " citation: Please see http://daymet.ornl.gov/ for current Daymet ...\n", + " references: Please see http://daymet.ornl.gov/ for current informa...\n", + " source: Daymet Software Version 4.0\n", + " start_year: 1980" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "daymet_collection = pystac.Collection.from_file(\n", + " \"https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-daily-hi\"\n", + ")\n", + "daymet_asset = daymet_collection.assets[\"zarr-abfs\"]\n", + "\n", + "xr.open_dataset(daymet_asset)" + ] + }, + { + "cell_type": "markdown", + "id": "fd4e0c53-90b0-4276-9caf-9014aa0a31f9", + "metadata": {}, + "source": [ + "### Reference file\n", + "\n", + "If the collection has a reference file we can use that" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "00efc688-a8b8-4b45-8ee8-1aa076a870f4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:  (time: 23741, lat: 600, lon: 1440)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float64 -59.88 -59.62 -59.38 -59.12 ... 89.38 89.62 89.88\n",
+       "  * lon      (lon) float64 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9\n",
+       "  * time     (time) datetime64[us] 1950-01-01T12:00:00 ... 2014-12-31T12:00:00\n",
+       "Data variables:\n",
+       "    hurs     (time, lat, lon) float32 ...\n",
+       "    huss     (time, lat, lon) float32 ...\n",
+       "    pr       (time, lat, lon) float32 ...\n",
+       "    rlds     (time, lat, lon) float32 ...\n",
+       "    rsds     (time, lat, lon) float32 ...\n",
+       "    sfcWind  (time, lat, lon) float32 ...\n",
+       "    tas      (time, lat, lon) float32 ...\n",
+       "    tasmax   (time, lat, lon) float32 ...\n",
+       "    tasmin   (time, lat, lon) float32 ...\n",
+       "Attributes: (12/22)\n",
+       "    Conventions:           CF-1.7\n",
+       "    activity:              NEX-GDDP-CMIP6\n",
+       "    cmip6_institution_id:  CSIRO-ARCCSS\n",
+       "    cmip6_license:         CC-BY-SA 4.0\n",
+       "    cmip6_source_id:       ACCESS-CM2\n",
+       "    contact:               Dr. Rama Nemani: rama.nemani@nasa.gov, Dr. Bridget...\n",
+       "    ...                    ...\n",
+       "    scenario:              historical\n",
+       "    source:                BCSD\n",
+       "    title:                 ACCESS-CM2, r1i1p1f1, historical, global downscale...\n",
+       "    tracking_id:           16d27564-470f-41ea-8077-f4cc3efa5bfe\n",
+       "    variant_label:         r1i1p1f1\n",
+       "    version:               1.0
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 23741, lat: 600, lon: 1440)\n", + "Coordinates:\n", + " * lat (lat) float64 -59.88 -59.62 -59.38 -59.12 ... 89.38 89.62 89.88\n", + " * lon (lon) float64 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9\n", + " * time (time) datetime64[us] 1950-01-01T12:00:00 ... 2014-12-31T12:00:00\n", + "Data variables:\n", + " hurs (time, lat, lon) float32 ...\n", + " huss (time, lat, lon) float32 ...\n", + " pr (time, lat, lon) float32 ...\n", + " rlds (time, lat, lon) float32 ...\n", + " rsds (time, lat, lon) float32 ...\n", + " sfcWind (time, lat, lon) float32 ...\n", + " tas (time, lat, lon) float32 ...\n", + " tasmax (time, lat, lon) float32 ...\n", + " tasmin (time, lat, lon) float32 ...\n", + "Attributes: (12/22)\n", + " Conventions: CF-1.7\n", + " activity: NEX-GDDP-CMIP6\n", + " cmip6_institution_id: CSIRO-ARCCSS\n", + " cmip6_license: CC-BY-SA 4.0\n", + " cmip6_source_id: ACCESS-CM2\n", + " contact: Dr. Rama Nemani: rama.nemani@nasa.gov, Dr. Bridget...\n", + " ... ...\n", + " scenario: historical\n", + " source: BCSD\n", + " title: ACCESS-CM2, r1i1p1f1, historical, global downscale...\n", + " tracking_id: 16d27564-470f-41ea-8077-f4cc3efa5bfe\n", + " variant_label: r1i1p1f1\n", + " version: 1.0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cmip6_collection = pystac.Collection.from_file(\n", + " \"https://planetarycomputer.microsoft.com/api/stac/v1/collections/nasa-nex-gddp-cmip6\"\n", + ")\n", + "cmip6_asset = cmip6_collection.assets[\"ACCESS-CM2.historical\"]\n", + "\n", + "xr.open_dataset(cmip6_asset)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 7eb3aa09e3735d95f5d68fc0c78f689d48f3bf4e Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 7 Jun 2023 11:05:16 -0400 Subject: [PATCH 2/2] Update docs/tutorials/how-to-read-data-from-stac.ipynb Co-authored-by: Ian Cooke --- docs/tutorials/how-to-read-data-from-stac.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/how-to-read-data-from-stac.ipynb b/docs/tutorials/how-to-read-data-from-stac.ipynb index a0a5cdede..4ee846761 100644 --- a/docs/tutorials/how-to-read-data-from-stac.ipynb +++ b/docs/tutorials/how-to-read-data-from-stac.ipynb @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install adlfs dask fsspec[http] planetary_computer stackstac xarray xpystac zarr --quiet" + "!pip install adlfs dask 'fsspec[http]' planetary_computer stackstac xarray xpystac zarr --quiet" ] }, {