Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add mdx generation #188

Merged
merged 17 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,22 +208,24 @@ jobs:
runs-on: ubuntu-latest
needs: publish-new-datasets
steps:
- name: Use output from dataset-publication-and-configuration
- name: Checkout code
uses: actions/checkout@v4

- name: Use output from dataset-publication-and-configuration
run: |
echo "The output from the previous step is: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}"
echo "The output from the previous step is: ${{ needs.publish-new-datasets.outputs.publishedCollections }}"

# Creates a slim dataset mdx file for each collection based on the dataset config json
- name: Create dataset mdx for given collections
env:
PUBLISHED_COLLECTION_FILES: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}
run: echo "NO-OP step"
# run: |
# pip install -r scripts/requirements.txt
# for file in "${PUBLISHED_COLLECTION_FILES[@]}"
# do
# python3 scripts/mdx.py "$file"
# done
PUBLISHED_COLLECTION_FILES: ${{ needs.publish-new-datasets.outputs.publishedCollections }}
run: |
echo $PUBLISHED_COLLECTION_FILES
pip install -r ./scripts/requirements.txt
for file in "${PUBLISHED_COLLECTION_FILES[@]}"
do
python3 ./scripts/generate-mdx.py "$file"
done

open-veda-config-pr:
runs-on: ubuntu-latest
Expand All @@ -238,7 +240,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Publish to production on PR merge
run: echo "NO-OP. This step runs when a PR is merged."
30 changes: 30 additions & 0 deletions ingestion-data/testing/dataset-config/test-2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"collection": "modis-ndvi-diff-2015-2022-TEST",
"title": "Camp Fire Domain: MODIS NDVI Difference",
"spatial_extent": {
"xmin": -122.21,
"ymin": 39.33,
"xmax": -120.91,
"ymax": 40.22
},
"temporal_extent": {
"startdate": "2022-12-31T00:00:00Z",
"enddate": "2022-12-31T23:59:59Z"
},
"data_type": "cog",
"license": "CC0-1.0",
"description": "MODIS NDVI difference from a three-year average of 2015 to 2018 subtracted from a three-year average of 2019-2022. These tri-annual averages represent periods before and after the fire.",
"is_periodic": true,
"time_density": "year",
"sample_files": [
"s3://veda-data-store-staging/modis-ndvi-diff-2015-2022/campfire_ndvi_difference_2015_2022.tif"
],
"discovery_items": [
{
"discovery": "s3",
"prefix": "modis-ndvi-diff-2015-2022/",
"bucket": "veda-data-store-staging",
"filename_regex": "(.*)campfire_ndvi_difference_2015_2022.tif$"
}
]
}
2 changes: 1 addition & 1 deletion ingestion-data/testing/dataset-config/test.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"collection": "hls-swir-falsecolor-composite-TEST",
"collection": "hls-swir-falsecolor-composite-SECOND-TEST",
"title": "HLS SWIR FalseColor Composite",
"spatial_extent": {
"xmin": -156.75,
Expand Down
5 changes: 5 additions & 0 deletions scripts/dataset.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<Block>
<Prose>
[[REPLACE WITH RELEVANT DATASET INFORMATION]]
</Prose>
</Block>
127 changes: 127 additions & 0 deletions scripts/generate-mdx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#! /usr/bin/env python
"""
This file creates a minimal <collection>.data.mdx file
from the input dataset config json file
Dependency: `dataset.mdx` file
"""

import yaml
import os
import json
import sys


def create_frontmatter(input_data):
"""
Creates json based on input dataset config
"""
collection_id = input_data["collection"]

json_data = {
"id": collection_id,
"name": input_data.get("title", "Dataset Title"),
"featured": False,
"description": input_data.get("description", "Dataset Description"),
"media": {
"src": "https://bootstrap-cheatsheet.themeselection.com/assets/images/bs-images/img-2x1.png",
"alt": "Placeholder image",
"author": {"name": "Media author", "url": ""},
},
"taxonomy": [
{"name": "Source", "values": ["NASA"]},
],
"infoDescription": """::markdown
- **Temporal Extent:** 2015 - 2100
- **Temporal Resolution:** Annual
- **Spatial Extent:** Global
- **Spatial Resolution:** 0.25 degrees x 0.25 degrees
- **Data Units:** Days (Days per year above 90°F or 110°F)
- **Data Type:** Research
""",
"layers": [],
}

for asset_id, asset in input_data.get("item_assets", {}).items():
layer = {
"id": f"{collection_id}-{asset_id}",
"stacCol": collection_id,
"name": asset.get("title", "Asset Title"),
"type": "raster",
"description": asset.get("description", "Asset Description"),
"zoomExtent": [0, 4],
"sourceParams": {
"assets": asset_id,
"resampling_method": "bilinear",
"colormap_name": "wistia",
"rescale": "0,365",
"maxzoom": 4,
},
Comment on lines +52 to +58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"sourceParams": {
"assets": asset_id,
"resampling_method": "bilinear",
"colormap_name": "wistia",
"rescale": "0,365",
"maxzoom": 4,
},

I think we should check and see if the dashboard is ready to use renders params instead of sourceParams and leave this out entirely?

"compare": {
"datasetId": collection_id,
"layerId": asset_id,
"mapLabel": (
"::js ({ dateFns, datetime, compareDatetime }) "
"=> {if (dateFns && datetime && compareDatetime)"
"return `${dateFns.format(datetime, 'yyyy')} "
"VS ${dateFns.format(compareDatetime, 'yyyy')}`;}"
),
},
"analysis": {"exclude": False, "metrics": ["mean"]},
"legend": {
"unit": {"label": "Days"},
"type": "gradient",
"min": 0,
"max": 365,
"stops": [
"#E4FF7A",
"#FAED2D",
"#FFCE0A",
"#FFB100",
"#FE9900",
"#FC7F00",
],
},
"info": {
"source": "NASA",
"spatialExtent": "Global",
"temporalResolution": "Annual",
"unit": "Days",
},
}
json_data["layers"].append(layer)

# Convert json to yaml for frontmatter
yaml_data = yaml.dump(json_data, sort_keys=False)

return yaml_data


def safe_open_w(path):
"""Open "path" for writing, creating any parent directories as needed."""
os.makedirs(os.path.dirname(path), exist_ok=True)
return open(path, "w")


if __name__ == "__main__":
input_data = json.load(open(sys.argv[1]))
dataset_config = create_frontmatter(input_data)
front_matter = f"---\n{dataset_config}---\n"

# Path to the existing file
curr_directory = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(curr_directory, "dataset.mdx")

# Read the existing content of the file
with open(file_path, "r") as file:
existing_content = file.read()

# Combine front matter and existing content
new_content = front_matter + existing_content

# Write the combined content back to the file
output_filepath = os.path.join(
curr_directory,
f"../ingestion-data/dataset-mdx/{input_data['collection']}.data.mdx",
)
with safe_open_w(output_filepath) as ofile:
ofile.write(new_content)
1 change: 1 addition & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyyaml
Loading