你爱我还是他 | xarray2024.11.0版本如何读取GRIB数据
当前镜像:气象分析镜像 3.11 内测 使用资源:2 核 8G CPU资源
近年来,气象数据处理工具链持续演进,xarray在2024.11.0版本中进行了重大更新,正式弃用了传统的PyNio和pygrib引擎,转而全面采用ECMWF开发的cfgrib作为主要GRIB格式解析引擎。
恰逢气象镜像迭代到3.11版本,Nano号召群雄为萌新撰写grib读取攻略,版主义不容辞哇
言归正传
本文针对该技术演进背景,面向初学者系统讲解基于cfgrib引擎的xarray使用方法,具体涵盖全球预报系统(Global Forecast System, GFS)数据的读取、解析及可视化全流程,并提供可复现的操作指导。
ready go!
PS: 标题出自陶喆名曲《你爱我还是他》
!pip install cmaps -i https://pypi.mirrors.ustc.edu.cn/simple/
🎓 作者:酷炫用户名
📢 版权声明:公益性质转载需联系作者本人获取授权。转载本文时,请务必文字注明“来自:和鲸社区:酷炫用户名”,并附带本项目超链接。
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.shapereader import BasicReader
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
ds = xr.open_dataset('/home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2',engine ='cfgrib')
ds
---------------------------------------------------------------------------
DatasetBuildError Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:692, in build_dataset_components(index, errors, encode_cf, squeeze, log, read_keys, time_dims, extra_coords, coords_as_attributes, cache_geo_coords)
691 try:
--> 692 dims, data_var, coord_vars = build_variable_components(
693 var_index,
694 encode_cf,
695 filter_by_keys,
696 errors=errors,
697 squeeze=squeeze,
698 read_keys=read_keys,
699 time_dims=time_dims,
700 extra_coords=extra_coords,
701 coords_as_attributes=coords_as_attributes,
702 cache_geo_coords=cache_geo_coords,
703 )
704 except DatasetBuildError as ex:
705 # NOTE: When a variable has more than one value for an attribute we need to raise all
706 # the values in the file, not just the ones associated with that variable. See #54.
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:501, in build_variable_components(index, encode_cf, filter_by_keys, log, errors, squeeze, read_keys, time_dims, extra_coords, coords_as_attributes, cache_geo_coords)
488 def build_variable_components(
489 index: abc.Index[T.Any, abc.Field],
490 encode_cf: T.Sequence[str] = (),
(...)
499 cache_geo_coords: bool = True,
500 ) -> T.Tuple[T.Dict[str, int], Variable, T.Dict[str, Variable]]:
--> 501 data_var_attrs = enforce_unique_attributes(index, DATA_ATTRIBUTES_KEYS, filter_by_keys)
502 grid_type_keys = GRID_TYPE_MAP.get(index.getone("gridType"), [])
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:291, in enforce_unique_attributes(index, attributes_keys, filter_by_keys)
290 fbks.append(fbk)
--> 291 raise DatasetBuildError("multiple values for key %r" % key, key, fbks)
292 if values and values[0] not in ("undef", "unknown"):
DatasetBuildError: multiple values for key 'typeOfLevel'
During handling of the above exception, another exception occurred:
DatasetBuildError Traceback (most recent call last)
Cell In[2], line 1
----> 1 ds = xr.open_dataset('/home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2',engine ='cfgrib')
2 ds
File /opt/conda/lib/python3.11/site-packages/xarray/backends/api.py:670, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
658 decoders = _resolve_decoders_kwargs(
659 decode_cf,
660 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
666 decode_coords=decode_coords,
667 )
669 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 670 backend_ds = backend.open_dataset(
671 filename_or_obj,
672 drop_variables=drop_variables,
673 **decoders,
674 **kwargs,
675 )
676 ds = _dataset_from_backend_dataset(
677 backend_ds,
678 filename_or_obj,
(...)
688 **kwargs,
689 )
690 return ds
File /opt/conda/lib/python3.11/site-packages/cfgrib/xarray_plugin.py:111, in CfGribBackend.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, lock, indexpath, filter_by_keys, read_keys, ignore_keys, encode_cf, squeeze, time_dims, errors, extra_coords, coords_as_attributes, cache_geo_coords)
87 def open_dataset(
88 self,
89 filename_or_obj: T.Union[str, abc.MappingFieldset[T.Any, abc.Field]],
(...)
109 cache_geo_coords: bool = True,
110 ) -> xr.Dataset:
--> 111 store = CfGribDataStore(
112 filename_or_obj,
113 indexpath=indexpath,
114 filter_by_keys=filter_by_keys,
115 read_keys=read_keys,
116 ignore_keys=ignore_keys,
117 encode_cf=encode_cf,
118 squeeze=squeeze,
119 time_dims=time_dims,
120 lock=lock,
121 errors=errors,
122 extra_coords=extra_coords,
123 coords_as_attributes=coords_as_attributes,
124 cache_geo_coords=cache_geo_coords,
125 )
126 with xr.core.utils.close_on_error(store):
127 vars, attrs = store.load() # type: ignore
File /opt/conda/lib/python3.11/site-packages/cfgrib/xarray_plugin.py:40, in CfGribDataStore.__init__(self, filename, lock, **backend_kwargs)
38 else:
39 opener = dataset.open_fieldset
---> 40 self.ds = opener(filename, **backend_kwargs)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:824, in open_file(path, errors, indexpath, filter_by_keys, read_keys, time_dims, extra_coords, ignore_keys, **kwargs)
822 index_keys = compute_index_keys(time_dims, extra_coords)
823 index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys)
--> 824 return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:763, in open_from_index(index, read_keys, time_dims, extra_coords, **kwargs)
756 def open_from_index(
757 index: abc.Index[T.Any, abc.Field],
758 read_keys: T.Sequence[str] = (),
(...)
761 **kwargs: T.Any,
762 ) -> Dataset:
--> 763 dimensions, variables, attributes, encoding = build_dataset_components(
764 index, read_keys=read_keys, time_dims=time_dims, extra_coords=extra_coords, **kwargs
765 )
766 return Dataset(dimensions, variables, attributes, encoding)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:715, in build_dataset_components(index, errors, encode_cf, squeeze, log, read_keys, time_dims, extra_coords, coords_as_attributes, cache_geo_coords)
713 fbks.append(fbk)
714 error_message += "\n filter_by_keys=%r" % fbk
--> 715 raise DatasetBuildError(error_message, key, fbks)
716 short_name = data_var.attributes.get("GRIB_shortName", "paramId_%d" % param_id)
717 var_name = data_var.attributes.get("GRIB_cfVarName", "unknown")
DatasetBuildError: multiple values for unique key, try re-open the file with one of:
filter_by_keys={'typeOfLevel': 'meanSea'}
filter_by_keys={'typeOfLevel': 'hybrid'}
filter_by_keys={'typeOfLevel': 'atmosphere'}
filter_by_keys={'typeOfLevel': 'surface'}
filter_by_keys={'typeOfLevel': 'planetaryBoundaryLayer'}
filter_by_keys={'typeOfLevel': 'isobaricInPa'}
filter_by_keys={'typeOfLevel': 'isobaricInhPa'}
filter_by_keys={'typeOfLevel': 'heightAboveGround'}
filter_by_keys={'typeOfLevel': 'depthBelowLandLayer'}
filter_by_keys={'typeOfLevel': 'heightAboveSea'}
filter_by_keys={'typeOfLevel': 'atmosphereSingleLayer'}
filter_by_keys={'typeOfLevel': 'lowCloudLayer'}
filter_by_keys={'typeOfLevel': 'middleCloudLayer'}
filter_by_keys={'typeOfLevel': 'highCloudLayer'}
filter_by_keys={'typeOfLevel': 'cloudCeiling'}
filter_by_keys={'typeOfLevel': 'convectiveCloudBottom'}
filter_by_keys={'typeOfLevel': 'lowCloudBottom'}
filter_by_keys={'typeOfLevel': 'middleCloudBottom'}
filter_by_keys={'typeOfLevel': 'highCloudBottom'}
filter_by_keys={'typeOfLevel': 'convectiveCloudTop'}
filter_by_keys={'typeOfLevel': 'lowCloudTop'}
filter_by_keys={'typeOfLevel': 'middleCloudTop'}
filter_by_keys={'typeOfLevel': 'highCloudTop'}
filter_by_keys={'typeOfLevel': 'convectiveCloudLayer'}
filter_by_keys={'typeOfLevel': 'boundaryLayerCloudLayer'}
filter_by_keys={'typeOfLevel': 'nominalTop'}
filter_by_keys={'typeOfLevel': 'heightAboveGroundLayer'}
filter_by_keys={'typeOfLevel': 'tropopause'}
filter_by_keys={'typeOfLevel': 'maxWind'}
filter_by_keys={'typeOfLevel': 'isothermZero'}
filter_by_keys={'typeOfLevel': 'highestTroposphericFreezing'}
filter_by_keys={'typeOfLevel': 'pressureFromGroundLayer'}
filter_by_keys={'typeOfLevel': 'sigmaLayer'}
filter_by_keys={'typeOfLevel': 'sigma'}
filter_by_keys={'typeOfLevel': 'potentialVorticity'}
为什么会报错呢,因为gfs数据结构较为复杂,还需加上额外的参数
DatasetBuildError: multiple values for unique key, try re-open the file with one of:
遇到了一个数据集构建错误(DatasetBuildError),提示存在多个值对应唯一的键。这种情况通常发生在试图创建或处理需要唯一键的数据结构时,例如数据库表或某些类型的数据集,其中每个键只能关联一个唯一的值。
那么我们下面试试报错中提到的参数
#地表变量
ds = xr.open_dataset('/home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2',engine ='cfgrib',
filter_by_keys={'typeOfLevel': 'surface'})
ds
---------------------------------------------------------------------------
DatasetBuildError Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:692, in build_dataset_components(index, errors, encode_cf, squeeze, log, read_keys, time_dims, extra_coords, coords_as_attributes, cache_geo_coords)
691 try:
--> 692 dims, data_var, coord_vars = build_variable_components(
693 var_index,
694 encode_cf,
695 filter_by_keys,
696 errors=errors,
697 squeeze=squeeze,
698 read_keys=read_keys,
699 time_dims=time_dims,
700 extra_coords=extra_coords,
701 coords_as_attributes=coords_as_attributes,
702 cache_geo_coords=cache_geo_coords,
703 )
704 except DatasetBuildError as ex:
705 # NOTE: When a variable has more than one value for an attribute we need to raise all
706 # the values in the file, not just the ones associated with that variable. See #54.
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:501, in build_variable_components(index, encode_cf, filter_by_keys, log, errors, squeeze, read_keys, time_dims, extra_coords, coords_as_attributes, cache_geo_coords)
488 def build_variable_components(
489 index: abc.Index[T.Any, abc.Field],
490 encode_cf: T.Sequence[str] = (),
(...)
499 cache_geo_coords: bool = True,
500 ) -> T.Tuple[T.Dict[str, int], Variable, T.Dict[str, Variable]]:
--> 501 data_var_attrs = enforce_unique_attributes(index, DATA_ATTRIBUTES_KEYS, filter_by_keys)
502 grid_type_keys = GRID_TYPE_MAP.get(index.getone("gridType"), [])
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:291, in enforce_unique_attributes(index, attributes_keys, filter_by_keys)
290 fbks.append(fbk)
--> 291 raise DatasetBuildError("multiple values for key %r" % key, key, fbks)
292 if values and values[0] not in ("undef", "unknown"):
DatasetBuildError: multiple values for key 'stepType'
During handling of the above exception, another exception occurred:
DatasetBuildError Traceback (most recent call last)
Cell In[3], line 2
1 #地表变量
----> 2 ds = xr.open_dataset('/home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2',engine ='cfgrib',
3 filter_by_keys={'typeOfLevel': 'surface'})
4 ds
File /opt/conda/lib/python3.11/site-packages/xarray/backends/api.py:670, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
658 decoders = _resolve_decoders_kwargs(
659 decode_cf,
660 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
666 decode_coords=decode_coords,
667 )
669 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 670 backend_ds = backend.open_dataset(
671 filename_or_obj,
672 drop_variables=drop_variables,
673 **decoders,
674 **kwargs,
675 )
676 ds = _dataset_from_backend_dataset(
677 backend_ds,
678 filename_or_obj,
(...)
688 **kwargs,
689 )
690 return ds
File /opt/conda/lib/python3.11/site-packages/cfgrib/xarray_plugin.py:111, in CfGribBackend.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, lock, indexpath, filter_by_keys, read_keys, ignore_keys, encode_cf, squeeze, time_dims, errors, extra_coords, coords_as_attributes, cache_geo_coords)
87 def open_dataset(
88 self,
89 filename_or_obj: T.Union[str, abc.MappingFieldset[T.Any, abc.Field]],
(...)
109 cache_geo_coords: bool = True,
110 ) -> xr.Dataset:
--> 111 store = CfGribDataStore(
112 filename_or_obj,
113 indexpath=indexpath,
114 filter_by_keys=filter_by_keys,
115 read_keys=read_keys,
116 ignore_keys=ignore_keys,
117 encode_cf=encode_cf,
118 squeeze=squeeze,
119 time_dims=time_dims,
120 lock=lock,
121 errors=errors,
122 extra_coords=extra_coords,
123 coords_as_attributes=coords_as_attributes,
124 cache_geo_coords=cache_geo_coords,
125 )
126 with xr.core.utils.close_on_error(store):
127 vars, attrs = store.load() # type: ignore
File /opt/conda/lib/python3.11/site-packages/cfgrib/xarray_plugin.py:40, in CfGribDataStore.__init__(self, filename, lock, **backend_kwargs)
38 else:
39 opener = dataset.open_fieldset
---> 40 self.ds = opener(filename, **backend_kwargs)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:824, in open_file(path, errors, indexpath, filter_by_keys, read_keys, time_dims, extra_coords, ignore_keys, **kwargs)
822 index_keys = compute_index_keys(time_dims, extra_coords)
823 index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys)
--> 824 return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:763, in open_from_index(index, read_keys, time_dims, extra_coords, **kwargs)
756 def open_from_index(
757 index: abc.Index[T.Any, abc.Field],
758 read_keys: T.Sequence[str] = (),
(...)
761 **kwargs: T.Any,
762 ) -> Dataset:
--> 763 dimensions, variables, attributes, encoding = build_dataset_components(
764 index, read_keys=read_keys, time_dims=time_dims, extra_coords=extra_coords, **kwargs
765 )
766 return Dataset(dimensions, variables, attributes, encoding)
File /opt/conda/lib/python3.11/site-packages/cfgrib/dataset.py:715, in build_dataset_components(index, errors, encode_cf, squeeze, log, read_keys, time_dims, extra_coords, coords_as_attributes, cache_geo_coords)
713 fbks.append(fbk)
714 error_message += "\n filter_by_keys=%r" % fbk
--> 715 raise DatasetBuildError(error_message, key, fbks)
716 short_name = data_var.attributes.get("GRIB_shortName", "paramId_%d" % param_id)
717 var_name = data_var.attributes.get("GRIB_cfVarName", "unknown")
DatasetBuildError: multiple values for unique key, try re-open the file with one of:
filter_by_keys={'stepType': 'instant', 'typeOfLevel': 'surface'}
filter_by_keys={'stepType': 'avg', 'typeOfLevel': 'surface'}
filter_by_keys={'stepType': 'accum', 'typeOfLevel': 'surface'}
opps,那么我们根据报错继续加参数,这次的参数介绍一下 instant 瞬时 avg 平均 accum 累积
#地表变量
ds = xr.open_dataset('/home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2',engine ='cfgrib',
filter_by_keys={'stepType': 'instant', 'typeOfLevel': 'surface'})
ds
成功了,下面进行简单可视化
lon = ds.longitude
lat = ds.latitude
gust = ds.gust
gust
import cmaps
# 创建绘图对象
fig, ax = plt.subplots(figsize=(18, 4), subplot_kw={'projection': ccrs.PlateCarree()})
#ax.set_extent([91, 116, 20, 40], crs=ccrs.PlateCarree())
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=1.2, color='gray', alpha=0.5, linestyle='--')
gl.top_labels = False
gl.right_labels = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
# 绘制要素分布
im = ax.contourf(lon, lat, gust, levels=10, cmap=cmaps.WhiteBlue,
transform=ccrs.PlateCarree())
# 添加标题
ax.set_title('gust')
# 增加地图
ax.coastlines(resolution='50m', lw=0.5)
ax.add_feature(cfeature.OCEAN.with_scale('50m'))
ax.add_feature(cfeature.LAND.with_scale('50m'))
#色标
cax = fig.add_axes([0.7, 0.2, 0.02, 0.6])
cbar = fig.colorbar(im, cax=cax, orientation='vertical', shrink=0.8, pad=0.05)
cbar.set_label('gust')
plt.show()
上面所展示的是一种傻瓜穷举法,带你熟悉熟悉数据结构
而你也可以借助其他grib工具了解数据结构——grib_ls
!grib_ls /home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2
上面是所有变量信息,假定我们知道一个变量名需要从grib中筛选出它,可以这样
!grib_ls -w shortName=gust /home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2
好的,我们知道这个变量在surface对应的数据集中
ds = xr.open_dataset('/home/mw/input/GFS1824/gfs_4_20230902_0000_021.grb2',engine ='cfgrib',
filter_by_keys={'typeOfLevel': 'surface','shortName': 'gust'})
ds
ds.gust.plot()
<matplotlib.collections.QuadMesh at 0x7fbd9396c110>
pygrib和pynio的弃用有些“新时代的船已经载不动我”的感觉
上面的内容如有不解可评论区讨论
你想进一步掌握grib数据的操作可以参加一下活动版块的GRIB 数据处理 | Python 气象工程师训练营 MeteoEng2