Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions docs/source/reference/workflow/grid_stat.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Comparing against Gridded Data
==============================

METplus can perform comparison against gridded datasets using the ``grid_stat``
tool. Enable ``grid_stat`` comparisons by adding to your ``rose-suite.conf``::

RUN_METPLUS_GRID_STAT = True

Observations have to be configured for your site so that METplus knows the
correct source data paths.

Select which observations to run using e.g.::

METPLUS_GRID_STAT_OBS = ["GPM"]

Output
------

The workflow creates spatial plots of the model field, observation field and
difference between model and observation. ``grid_stat`` handles regridding both
data sources to a common grid.

Observation Types
-----------------

GPM
^^^

https://gpm.nasa.gov/data/imerg

Compares model field ``stratiform_rainfall_flux`` against observation field
``precipitation_flux``.

Available sites:
* NCI
1 change: 1 addition & 0 deletions docs/source/reference/workflow/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ Reference information for the CSET workflow
:maxdepth: 1

observations
grid_stat
197 changes: 197 additions & 0 deletions src/CSET/cset_workflow/app/grid_stat_plot/bin/grid_stat_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/env python3

"""Create CSET plots from MET grid_stat output."""

import argparse
import json
import logging
import os
from pathlib import Path
from typing import Iterable

import iris.coords
import iris.cube

from CSET.operators.plot import spatial_pcolormesh_plot

log = logging.getLogger(__name__)


def standardise_names(cube: iris.cube.Cube) -> iris.cube.Cube:
"""
Convert the name MET gives fields to something more standard.

From a name like
{type}_{var0}_{levels0}(_{var1}_{levels1})?_{region}

Sets:
var_name to '{type}_{var0}'
long_name to '{var0}_{long_type}'
standard_name to '{var0}' (for OBS and FCST fields only)

Modifies the cube in-place and returns it
"""
assert cube.var_name is not None
name = cube.var_name
levels = cube.attributes["level"].split(" and ")
mask_region = cube.attributes["masking_region"]
type = name.split("_")[0]

level_names = [lev.replace("*", "all").replace(",", "_") for lev in levels]

name_middle = name[len(type) + 1 : -(len(mask_region) + 1)]

var0 = name_middle[: name_middle.index(level_names[0]) - 1]

cube.var_name = f"{type}_{var0}_{mask_region}"
cube.attributes["type"] = type

if type == "OBS":
cube.long_name = f"{var0}_observation"
cube.standard_name = var0
cube.attributes["verification_type"] = "Observation Value"
elif type == "FCST":
cube.long_name = f"{var0}_forecast"
cube.standard_name = var0
cube.attributes["verification_type"] = "Forecast Value"
elif type == "DIFF":
cube.long_name = f"{var0}_difference"
cube.attributes["model_var"] = var0
cube.attributes["verification_type"] = cube.attributes["Difference"]

return cube


def postproc_cube(cube: iris.cube.Cube, field: str, filename: str):
"""
Prepare the MET data for processing.

Use this as a callback when loading Iris cubes from grid_stat output.

Fixes the names and sets up time coordinates properly
"""
cube = standardise_names(cube)

# Grab timestamps
init_time_ut = int(cube.attributes.pop("init_time_ut"))
valid_time_ut = int(cube.attributes.pop("valid_time_ut"))

# Clean up attributes that might not match between different files
cube.attributes.pop("init_time")
cube.attributes.pop("valid_time")
cube.attributes.pop("level")
cube.attributes.pop("name")
cube.attributes.pop("FileOrigins")

cube.add_aux_coord(
iris.coords.DimCoord(
init_time_ut,
standard_name="forecast_reference_time",
units="seconds since 1970-01-01 00:00Z",
)
)
cube.add_aux_coord(
iris.coords.DimCoord(
valid_time_ut, standard_name="time", units="seconds since 1970-01-01 00:00Z"
)
)

if "invalid_units" in cube.attributes:
cube.units = cube.attributes["invalid_units"].split(" and ")[0]


def load_grid_stat(paths: Iterable[Path]) -> iris.cube.CubeList:
"""Load processed grid_stat netcdf output as iris cubes."""
cubes = iris.cube.CubeList()
for path in paths:
cubes.extend(iris.load(path, callback=postproc_cube))
return cubes.merge()


def plot_grid_stat(
cube: iris.cube.Cube,
webdir: Path,
*,
style_file: Path | str | None = None,
plot_resolution: int | None = None,
):
"""
Create plots from grid_stat output.

Parameters
----------
cube:
Input data cube
webdir:
Base web path for this cycle
style_file:
Colorbar definition JSON file
plot_resolution:
Plot resolution in pixels per inch
"""
model = cube.attributes["model"]
obstype = cube.attributes["obtype"]
assert cube.var_name is not None
outdir = webdir / "grid_stat" / f"{model}_vs_{obstype}" / cube.var_name
outdir.mkdir(exist_ok=True, parents=True)
os.chdir(outdir)

log.info("writing %s to %s", cube.name(), outdir)

# Set up CSET metadata
meta = {
"category": "Gridded Verification",
"title": f"{cube.long_name}",
"case_date": cube.coord("forecast_reference_time")
.as_string_arrays(fmt="%Y%m%dT%H%MZ")
.points[0],
"MODEL_NAME": model,
"OBSERVATION_TYPE": obstype,
"GRID_STAT_TYPE": cube.attributes["verification_type"],
"SUBAREA_EXTENT": cube.attributes["masking_region"],
}

if cube.attributes["type"] == "OBS":
meta["title"] = f"{obstype} {cube.long_name}"
elif cube.attributes["type"] == "FCST":
meta["title"] = f"{model} {cube.long_name}"
elif cube.attributes["type"] == "DIFF":
meta["title"] = f"{model} vs {obstype} {cube.long_name}"

with open("meta.json", "wt") as f:
json.dump(meta, f)

cube.coord("latitude").guess_bounds()
cube.coord("longitude").guess_bounds()
spatial_pcolormesh_plot(cube, f"grid_stat_DarwinCTL_vs_GPM_{cube.var_name}.png")


def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--web-dir", help="directory to store plots", type=Path, required=True
)
parser.add_argument("--style-file", help="colorbar definitions", type=Path)
parser.add_argument("--plot-resolution", help="plot resolution", type=int)
parser.add_argument(
"input", help="grid_stat directories to process", type=Path, nargs="+"
)
args = parser.parse_args()

logging.basicConfig(level=logging.INFO)

for grid_stat_dir in args.input:
cubes = load_grid_stat(grid_stat_dir.glob("**/*.nc"))

for cube in cubes:
plot_grid_stat(
cube,
args.web_dir,
style_file=args.style_file,
plot_resolution=args.plot_resolution,
)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions src/CSET/cset_workflow/app/grid_stat_plot/rose-app.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[command]
default = grid_stat_plot.py --web-dir "${CYLC_WORKFLOW_SHARE_DIR}/web/plots/${CYLC_TASK_CYCLE_POINT}" "${CYLC_TASK_SHARE_CYCLE_DIR}/grid_stat"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[config]

OBTYPE = GPM

# Daily files contain times 0015 to 2345
# Make sure valid time 0000 uses the previous day's file by adding a shift of -1s to valid times
OBS_GRID_STAT_INPUT_DIR = /g/data/dp9/da/verification/satellite/products/gpm/netcdf/imerg/NRTlate
OBS_GRID_STAT_INPUT_TEMPLATE = {valid?fmt=%Y}/gpm_imerg_NRTlate_V07B_{valid?fmt=%Y%m%d?shift=-1}.nc

OBS_VAR1_NAME = precipitation_flux
FCST_VAR1_NAME = stratiform_rainfall_flux
BOTH_VAR1_LEVELS = "({valid?fmt=%Y%m%d_%H%M%S},*,*)"
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Common METplus grid_stat configuration to make it work with CSET

[dir]
MET_INSTALL_DIR = {ENV[CONDA_PREFIX]}

[config]
PROCESS_LIST = GridStat

OUTPUT_BASE = {ENV[CYLC_TASK_SHARE_CYCLE_DIR]}
LOG_DIR = {ENV[CYLC_TASK_LOG_DIR]}

MODEL = {ENV[MODEL_NAME]}

# [!] Set this for your site/obs type in a site file
# OBTYPE =

###
# Time Info
# LOOP_BY options are INIT, VALID, RETRO, and REALTIME
# If set to INIT or RETRO:
# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set
# If set to VALID or REALTIME:
# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set
# LEAD_SEQ is the list of forecast leads to process
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control
###

LOOP_BY = INIT

INIT_TIME_FMT = %Y%m%dT%H%MZ
INIT_BEG = {ENV[CYLC_TASK_CYCLE_POINT]}
INIT_END = {ENV[CYLC_TASK_CYCLE_POINT]}
INIT_INCREMENT = 1H

LEAD_SEQ = {ENV[LEAD_SEQ]}

###
# File I/O
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info
###

FCST_GRID_STAT_INPUT_DIR = {ENV[CYLC_WORKFLOW_SHARE_DIR]}/cycle/{init?fmt=%Y%m%dT%H%MZ}/metplus_fcst
FCST_GRID_STAT_INPUT_TEMPLATE = {ENV[MODEL_ID]}.nc

# [!] Set this for your site/obs type in a site file
# OBS_GRID_STAT_INPUT_DIR =
# OBS_GRID_STAT_INPUT_TEMPLATE =

GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}
GRID_STAT_OUTPUT_TEMPLATE = grid_stat/{MODEL}/{OBTYPE}
GRID_STAT_OUTPUT_PREFIX = {MODEL}_vs_{OBTYPE}_{CURRENT_OBS_NAME}

# Default regridding
GRID_STAT_REGRID_TO_GRID = FCST
GRID_STAT_REGRID_METHOD = BILIN
GRID_STAT_REGRID_WIDTH = 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[command]
default = for TYPE in $GRID_STAT_OBS; do app_env_wrapper run_metplus.py "$SITE/gridstat.conf" "$SITE/$TYPE.conf"; done

[env]
!CONDA_VENV_LOCATION=/dev/null
MET_INSTALL_DIR=/bom-ngm/conda
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
# Common METplus point_stat configuration to make it work with CSET

[dir]
FCST_BASE = {ENV[METPLUS_FCST_DIR]}
OUTPUT_BASE = {ENV[CYLC_TASK_WORK_DIR]}
MET_INSTALL_DIR = {ENV[CONDA_PREFIX]}

[config]
PROCESS_LIST = PointStat

OUTPUT_BASE = {ENV[CYLC_TASK_SHARE_CYCLE_DIR]}
LOG_DIR = {ENV[CYLC_TASK_LOG_DIR]}

# Documentation for this use case can be found at
# https://metplus.readthedocs.io/en/latest/generated/met_tool_wrapper/ASCII2NC/ASCII2NC_python_embedding.html

# For additional information, please see the METplus Users Guide.
# https://metplus.readthedocs.io/en/latest/Users_Guide

###
# Processes to run
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list
###

PROCESS_LIST = PointStat
MODEL = {ENV[MODEL_NAME]}

###
# Time Info
Expand All @@ -32,25 +23,24 @@ PROCESS_LIST = PointStat
###

LOOP_BY = INIT
INIT_TIME_FMT = %Y%m%dT%H
INIT_BEG = {ENV[TASK_START_TIME]}
INIT_END = {ENV[TASK_START_TIME]}

INIT_TIME_FMT = %Y%m%dT%H%MZ
INIT_BEG = {ENV[CYLC_TASK_CYCLE_POINT]}
INIT_END = {ENV[CYLC_TASK_CYCLE_POINT]}
INIT_INCREMENT = 1H
LEAD_SEQ = begin_end_incr(0,{ENV[FORECAST_LENGTH]},1)

# Number of seconds to shift times in the fcst file (try half the time increment)
FCST_SHIFT = 1800
LEAD_SEQ = {ENV[LEAD_SEQ]}

###
# File I/O
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info
###

FCST_POINT_STAT_INPUT_DIR = {FCST_BASE}
FCST_POINT_STAT_INPUT_TEMPLATE = m1.nc
FCST_POINT_STAT_INPUT_DIR = {ENV[CYLC_WORKFLOW_SHARE_DIR]}/cycle/{init?fmt=%Y%m%dT%H%MZ}/metplus_fcst
FCST_POINT_STAT_INPUT_TEMPLATE = {ENV[MODEL_ID]}.nc

OBS_POINT_STAT_INPUT_DIR = {ENV[CYLC_WORKFLOW_SHARE_DIR]}/obs_nc
OBS_POINT_STAT_INPUT_TEMPLATE = {valid?fmt=%Y%m%dT%H}.nc

POINT_STAT_OUTPUT_DIR = {ENV[CYLC_TASK_SHARE_CYCLE_DIR]}
POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}
POINT_STAT_OUTPUT_TEMPLATE = Point_Stat_{ENV[MODEL_NAME]}
Loading