Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NetCDF preview #6

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ jobs:
- name: word-cloud-extractor
FOLDER: word-cloud-extractor
PLATFORM: "linux/amd64,linux/arm64"
- name: ncsa.pdg.asjp.netcdf.preview
FOLDER: preview.netcdf
PLATFORM: "linux/amd64,linux/arm64"

steps:
- uses: actions/checkout@v2
Expand Down
34 changes: 34 additions & 0 deletions preview.netcdf/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM --platform=linux/amd64 ubuntu:18.04
ENV PATH="/root/miniconda3/bin:${PATH}"
ARG PATH="/root/miniconda3/bin:${PATH}"
RUN apt-get update

RUN apt-get install -y wget && rm -rf /var/lib/apt/lists/*

RUN wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& mkdir /root/.conda \
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
&& rm -f Miniconda3-latest-Linux-x86_64.sh
RUN conda --version

RUN echo $CONDA_PREFIX

COPY extractor_info.json .
COPY environment.yml .

COPY netcdfutils.py ncsa.geo.netcdf.extractor.py extractor_info.json ./

RUN conda install chardet

RUN conda install -c conda-forge mamba

RUN mamba env create -f environment.yml

SHELL ["conda", "run", "-n", "netcdf-preview", "/bin/bash", "-c"]

RUN python -m pip install pyclowder

ENV CLOWDER_VERSION=2

CMD ["conda", "run", "--no-capture-output", "-n", "netcdf-preview", "python","-u", "/ncsa.geo.netcdf.extractor.py"]
28 changes: 28 additions & 0 deletions preview.netcdf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Clowder Geo NetCDF Extractor

Overview

This extractor uses python NetCDF4 and matplotlib to plot data from
.nc and .nc4 files on a map.

NOTE - this is supposed to be a general purpose extractor that should work on
many files, but because NetCDF is a flexible file format, it is not guaranteed to work.
If the data is a time series, it will generate 4 previews spaced evenly throughout the time interval.


## Build a docker image
docker build -t ncsa-netcdf-extractor:latest .

## Test the docker container image:
docker run -t -i --rm --net clowder_clowder -e "RABBITMQ_URI=amqp://guest:guest@rabbitmq:5672/%2f" --name "ncsa-netcdf-extractor" ncsa-netcdf-extractor

## To run without docker

1. Install required python packages using *conda*

`conda env create -f environment.yml`
2. Activate conda environment
`conda activate netcdf-preview`
3. Start extractor

`./ncsa.geo.netcdf.py`
Binary file not shown.
14 changes: 14 additions & 0 deletions preview.netcdf/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: netcdf-preview
channels:
- conda-forge
- defaults
dependencies:
- python=3.9
- pip
- wheel
- netCDF4
- pyproj
- matplotlib
- basemap
- pip:
- pyclowder
46 changes: 46 additions & 0 deletions preview.netcdf/extractor_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
"name": "ncsa.pdg.asjp.netcdf.preview",
"version": "1.0",
"description": "Maps of NetCDF Data",
"author": "Todd Nicholson <[email protected]>",
"contributors": [
"Luigi Marini <[email protected]>"
],
"contexts": [
{
}
],
"repository": [
{
"repType": "git",
"repUrl": "https://opensource.ncsa.illinois.edu/stash/scm/cats/extractors-geo.git"
},
{
"repType": "docker",
"repUrl": "clowder/extractors-geo-netcdf-preview"
}
],
"process": {
"file": [
"application/x-netcdf"
]
},
"external_services": [
"geoserver"
],
"dependencies": [],
"bibtex": [],
"parameters": {
"schema": {
"directory": {
"type": "string",
"title": "projection"
}
}
},
"labels": [
"Type/Image",
"Domain/Geo"
]
}
91 changes: 91 additions & 0 deletions preview.netcdf/ncsa.geo.netcdf.extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python

"""Example extractor based on the clowder code."""

import logging
import json
import pyclowder
from pyclowder.extractors import Extractor
import pyclowder.files
import os
import matplotlib.pyplot as plt
import netcdfutils

plt.rcParams["figure.figsize"] = (16.0, 12.0)

lmarini marked this conversation as resolved.
Show resolved Hide resolved

class GeoNetCDF(Extractor):
"""Count the number of characters, words and lines in a text file."""

def __init__(self):
Extractor.__init__(self)

# add any additional arguments to parser
# self.parser.add_argument('--max', '-m', type=int, nargs='?', default=-1,
# help='maximum number (default=-1)')

# parse command line and load default logging configuration
self.setup()

logging.basicConfig(level=logging.INFO)
# setup logging for the exctractor

logging.getLogger("pyclowder").setLevel(logging.DEBUG)
logging.getLogger("__main__").setLevel(logging.DEBUG)

def process_message(
self,
connector,
host,
secret_key,
resource,
parameters,
projection="Polar Stereographic'",
):
# Process the file and upload the results

logger = logging.getLogger(__name__)
params = json.loads(parameters["parameters"])

inputfile = resource["local_paths"][0]
file_id = resource["id"]
file_name = resource["name"]
# These process messages will appear in the Clowder UI under Extractions.
connector.message_process(resource, "Loading contents of file...")
logger.debug("Preparing to generate plots")

png_filepaths = netcdfutils.generate_maps_for_file(path_to_file=inputfile)
for png_file in png_filepaths:
base_name = os.path.basename(png_file)
variable_name = base_name.replace(file_name, "")
variable_name = variable_name.lstrip("_")
variable_name = variable_name.rstrip(".png")
preview_id = pyclowder.files.upload_preview(
connector,
host,
secret_key,
file_id,
png_file,
None,
"image/" + "png",
visualization_name=variable_name,
visualization_description=variable_name,
visualization_component_id="basic-image-component",
)
try:
os.remove(png_file)
except Exception as e:
logger.debug(f"Error removing {png_file}")
logger.debug(f"{e}")
try:
logger.debug("Cleaning up all png files")
os.system("rm *.png")
except Exception as e:
logger.debug(f"Error cleaning up files {e}")


if __name__ == "__main__":
clowder_version = os.environ.get("CLOWDER_VERSION")
print(clowder_version, "is the clowder version")
extractor = GeoNetCDF()
extractor.start()
166 changes: 166 additions & 0 deletions preview.netcdf/netcdfutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from netCDF4 import Dataset as Dataset
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap

plt.rcParams["figure.figsize"] = (16.0, 12.0)


def generate_maps_for_file(path_to_file, projection="merc"):
previews_returned = []

ds1 = Dataset(path_to_file)
variable_names = list(ds1.variables.keys())

lat_name = ""
lon_name = ""
for variable in variable_names:
lowercase_variable = str(variable).lower()
if "lat" in lowercase_variable:
lat_name = variable
if "lon" in lowercase_variable:
lon_name = variable

# we now have the variable names, we need
# TODO we are explicitly plotting here

latitutde = ds1.variables[lat_name]
longitude = ds1.variables[lon_name]
latitude_shape = latitutde.shape
longitude_shape = longitude.shape
lat_lon_shape_values = []
for shape in latitude_shape:
lat_lon_shape_values.append(shape)
for shape in longitude_shape:
lat_lon_shape_values.append(shape)
variable_names_to_plot = []
for variable in variable_names:
if variable != lat_name and variable != lon_name:
current_variable = ds1[variable]
current_variable_shape = current_variable.shape
print(type(current_variable_shape))
shape_list = list(current_variable.shape)
has_lat_lon_values = set(lat_lon_shape_values).issubset(shape_list)
if has_lat_lon_values:
variable_names_to_plot.append(variable)

lats = ds1.variables[lat_name][:]
lons = ds1.variables[lon_name][:]
use_meshgrid = False
# if the lat and lon are 1 dimensional arrays, we need to use
# meshgrid to send in 2 dimensional arrays for matplotlib
if len(lats.shape) == 1 and len(lons.shape) == 1:
use_meshgrid = True

for variable in variable_names_to_plot:
print(variable)
current_variable = ds1[variable]
try:
if current_variable.long_name:
long_name = current_variable.long_name
except Exception as e:
long_name = current_variable.name
print("before range")
units = None
try:
units = current_variable.units
except Exception as e:
print("no units")
not_lat_lon_indices = []
current_variable_shape = current_variable.shape
current_variable_shape_list = list(current_variable_shape)
for i in range(0, len(current_variable_shape_list)):
if current_variable.shape[i] not in lat_lon_shape_values:
not_lat_lon_indices.append(i)
print("what does this variable have")
variable_data = current_variable[:]
if len(not_lat_lon_indices) == 2:
print("it is more than one")
print("we need to find the time variable")
for index in not_lat_lon_indices:
value = current_variable[:][index]
print("value")
if len(not_lat_lon_indices) == 1:
non_matching_shape_size = current_variable.shape[not_lat_lon_indices[0]]
quarter_time = int(np.floor(non_matching_shape_size / 4))
# with time series data, we will show quarterly previews

for i in range(0, 4):
current_time_to_plot = int(np.floor(i * quarter_time))
current_time_variable_data = variable_data[current_time_to_plot]
print("plot this")
m2 = Basemap(
projection=projection,
llcrnrlat=-80,
urcrnrlat=80,
llcrnrlon=-180,
urcrnrlon=180,
lat_ts=20,
resolution="c",
)
# if we need to use a meshgrid for 1 dimensional lat and lon
if use_meshgrid:
gridlons, gridlats = np.meshgrid(lons, lats)
xi, yi = m2(gridlons, gridlats)
else:
xi, yi = m2(lons, lats)

squeezed_data = np.squeeze(current_time_variable_data)
max = np.nanmax(squeezed_data)
min = np.nanmin(squeezed_data)
# if min > 0:
# min = 0
cs2 = m2.pcolor(xi, yi, squeezed_data)
m2.drawcoastlines()
m2.drawcountries()
m2.drawparallels(np.arange(-90.0, 91.0, 30.0))
m2.drawmeridians(np.arange(-180.0, 181.0, 60.0))
cbar = m2.colorbar()
cbar.solids.set_edgecolor("face")
cbar.set_ticks([min, max])
title = long_name
if units:
title = title + "(" + str(units) + ")"
plt.title(title, fontdict={"fontsize": 26})

plot_name = (
long_name + str(i) + "_" + str(non_matching_shape_size) + ".png"
)
plt.savefig(plot_name)
previews_returned.append(plot_name)
plt.clf()
# if it is NOT time series data
if len(not_lat_lon_indices) == 0:
m2 = Basemap(
projection="merc",
llcrnrlat=-80,
urcrnrlat=80,
llcrnrlon=-180,
urcrnrlon=180,
lat_ts=20,
resolution="c",
)
# if we need to use a meshgrid for 1 dimensional lat and lon
if use_meshgrid:
gridlons, gridlats = np.meshgrid(lons, lats)
xi, yi = m2(gridlons, gridlats)
else:
xi, yi = m2(lons, lats)
squeezed_data = np.squeeze(variable_data)
max = np.nanmax(squeezed_data)
min = np.nanmin(squeezed_data)
# if min > 0:
# min = 0
cs2 = m2.pcolor(xi, yi, squeezed_data)
m2.drawcoastlines()
m2.drawcountries()
m2.drawparallels(np.arange(-90.0, 91.0, 30.0))
m2.drawmeridians(np.arange(-180.0, 181.0, 60.0))
cbar = m2.colorbar()
cbar.solids.set_edgecolor("face")
cbar.set_ticks([min, max])
plot_name = long_name + ".png"
plt.savefig(plot_name)
previews_returned.append(plot_name)
plt.clf()
return previews_returned
Loading