看到标题你可能会发笑:怎会有人写这种文章?简单读取然后存储即可
我们经常需要对大量的模型输出数据进行处理和分析。在气象学中,WRF(Weather Research and Forecasting Model)是一个常用的数值天气预报模型,它可以提供丰富的气象变量数据来帮助我们理解和预测天气现象。 为了更好地处理WRF模型输出数据(当然因为wrfout文件太大了!),我们经常需要批量提取其中的变量,并将提取的数据保存为NetCDF格式(.nc文件),这样可以方便我们后续的分析和可视化操作。
但是实际操作起来你会发现有些问题,比如下面报错
In [5]:
import xarray as xr
import os
from netCDF4 import Dataset
from wrf import getvar, ALL_TIMES
# 定义 WRF 文件夹路径和文件名前缀
wrfout_path = "/home/mw/input/wrfout3385"
filename_prefix = "wrfout_d02_"
wrf_files = sorted([os.path.join(wrfout_path, f) for f in os.listdir(wrfout_path) if f.startswith(filename_prefix)])
wrf_list = [Dataset(f) for f in wrf_files]
vars = ['T2', 'PSFC', 'RAINC','RAINNC', 'pressure', 'z', 'ua', 'va', 'dbz', 'mdbz', 'ter']
# 创建空的数据集
dataset = xr.Dataset()
for var in vars:
# 获取变量数据
var_data = getvar(wrf_list, var, timeidx=ALL_TIMES, method='cat')
# 将变量添加到数据集
dataset[var] = var_data
print(dataset)
# # 转换 projection 属性的值为字符串
# dataset.attrs['projection'] = str(dataset.attrs['projection'])
# # 保存数据集为NetCDF文件
dataset.to_netcdf('output.nc', engine='netcdf4')
print('End the program!')
<xarray.Dataset>
Dimensions: (south_north: 90, west_east: 90, Time: 9, bottom_top: 49)
Coordinates:
XLONG (south_north, west_east) float32 100.9 101.0 101.1 ... 111.0 111.1
XLAT (south_north, west_east) float32 27.24 27.24 27.23 ... 34.43 34.42
XTIME (Time) float64 360.0 420.0 480.0 540.0 ... 660.0 720.0 780.0 840.0
* Time (Time) datetime64[ns] 2022-07-14T06:00:00 ... 2022-07-14T14:00:00
datetime (Time) datetime64[ns] 2022-07-14T06:00:00 ... 2022-07-14T14:00:00
Dimensions without coordinates: south_north, west_east, bottom_top
Data variables:
T2 (Time, south_north, west_east) float32 293.7 293.6 ... 298.4 298.4
PSFC (Time, south_north, west_east) float32 7.226e+04 ... 9.094e+04
RAINC (Time, south_north, west_east) float32 0.3239 0.239 ... 13.77 13.1
RAINNC (Time, south_north, west_east) float32 0.0 0.0 0.0 ... 0.0 0.0 0.0
pressure (Time, bottom_top, south_north, west_east) float32 720.4 ... 51.94
z (Time, bottom_top, south_north, west_east) float32 2.857e+03 .....
ua (Time, bottom_top, south_north, west_east) float32 -0.1216 ... ...
va (Time, bottom_top, south_north, west_east) float32 3.404 ... -6...
dbz (Time, bottom_top, south_north, west_east) float32 -30.0 ... -30.0
mdbz (Time, south_north, west_east) float32 -30.0 -30.0 ... -30.0 -30.0
ter (Time, south_north, west_east) float32 2.831e+03 ... 839.3
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-8fe549e0f8e5> in <module>
27 # dataset.attrs['projection'] = str(dataset.attrs['projection'])
28 # # 保存数据集为NetCDF文件
---> 29 dataset.to_netcdf('output.nc', engine='netcdf4')
30
31 print('End the program!')
/opt/conda/lib/python3.9/site-packages/xarray/core/dataset.py in to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
1910 from xarray.backends.api import to_netcdf
1911
-> 1912 return to_netcdf( # type: ignore # mypy cannot resolve the overloads:(
1913 self,
1914 path,
/opt/conda/lib/python3.9/site-packages/xarray/backends/api.py in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
1183 # validate Dataset keys, DataArray names, and attr keys/values
1184 _validate_dataset_names(dataset)
-> 1185 _validate_attrs(dataset, invalid_netcdf=invalid_netcdf and engine == "h5netcdf")
1186
1187 try:
/opt/conda/lib/python3.9/site-packages/xarray/backends/api.py in _validate_attrs(dataset, invalid_netcdf)
206 for variable in dataset.variables.values():
207 for k, v in variable.attrs.items():
--> 208 check_attr(k, v, valid_types)
209
210
/opt/conda/lib/python3.9/site-packages/xarray/backends/api.py in check_attr(name, value, valid_types)
193
194 if not isinstance(value, valid_types):
--> 195 raise TypeError(
196 f"Invalid value for attr {name!r}: {value!r}. For serialization to "
197 "netCDF files, its value must be of one of the following types: "
TypeError: Invalid value for attr 'projection': LambertConformal(stand_lon=95.9219970703125, moad_cen_lat=29.99502182006836, truelat1=33.891998291015625, truelat2=33.891998291015625, pole_lat=90.0, pole_lon=0.0). For serialization to netCDF files, its value must be of one of the following types: str, Number, ndarray, number, list, tuple
为什么会有这种问题呢 github上有老哥解释: Note that the XLAT XLON coordinates of WRF files are not sufficient to retrieve the WRF projection in the current format of your dataset. wrf-python or salem need the original dataset attributes to parse the projection information (STAND_LON, TRUELAT1 & 2, etc). wrf-python should probably store their projection as a string rather than an object, though.
简而言之就是wrfpython的projection是str没法识别 并有人提出方法:删除投影
In [25]:
import xarray as xr
import os
from netCDF4 import Dataset
from wrf import getvar, ALL_TIMES
def write_xarray_to_netcdf(xarray_array, output_path, mode='w', format='NETCDF4', group=None, engine=None, encoding=None):
"""将 xarray 数据写入 NetCDF 格式的输出文件。
使用适用于 wrf-python 的 xarray 数据结构。将投影对象转换为字符串以便作为 NetCDF 属性使用。
:param xarray_array: xarray.DataArray
:param output_path: str,输出文件路径
:param mode: str,文件打开模式(默认为 'w')
:param format: str,NetCDF 文件格式('NETCDF4'、'NETCDF4_CLASSIC'、'NETCDF3_64BIT' 或 'NETCDF3_CLASSIC')
默认为 'NETCDF4'
:param group: str,组名(默认为 None)
:param engine: str,NetCDF 引擎('netcdf4'、'scipy' 或 'h5netcdf')
:param encoding: dict,编码设置(默认为 None)
"""
xarray_array_out = xarray_array.copy()
del xarray_array_out.attrs['coordinates']
xarray_array_out.attrs['projection'] = str(xarray_array_out.attrs['projection'])
try:
xarray_array_out.to_netcdf(path=output_path, mode=mode, format=format, group=group, engine=engine, encoding=encoding)
print(f"数据成功写入至 {output_path}")
except Exception as e:
print(f"写入数据至 {output_path} 时发生错误:{e}")
# 定义 WRF 文件夹路径和文件名前缀
wrfout_path = "/home/mw/input/wrfout3385"
filename_prefix = "wrfout_d02_"
wrf_files = sorted([os.path.join(wrfout_path, f) for f in os.listdir(wrfout_path) if f.startswith(filename_prefix)])
wrf_list = [Dataset(f) for f in wrf_files]
# 创建空的数据集
dataset = xr.Dataset()
var_data = getvar(wrf_list, 'mdbz', timeidx=ALL_TIMES, method='cat')
dataset = var_data
print(dataset)
output_path = "./out.nc"
write_xarray_to_netcdf(dataset, output_path)
In [16]:
mdbz = xr.open_dataset('/home/mw/project/out.nc')
mdbz.max_dbz[0].plot.()
Out[17]:
<matplotlib.collections.QuadMesh at 0x7fe43d45a340>
需要遍历每个变量删除proj
In [22]:
import os
import xarray as xr
from netCDF4 import Dataset
from wrf import getvar, ALL_TIMES
def write_xarray_to_netcdf(xarray_array, mode='w', format='NETCDF4', group=None, encoding=None):
"""将 xarray 写入 NetCDF 格式的输出文件
使用适用于 wrf-python 的 xarray 结构。将投影对象转换为字符串,以便可以将其作为 NetCDF 属性使用
:param xarray_array: xarray.DataArray
:param mode: 文件打开模式,默认为 'w'
:param format: 文件格式,'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT' 或 'NETCDF3_CLASSIC',默认为 'NETCDF4'
:param group: 组名,默认为 None
:param encoding: 编码设置,默认为 None
"""
xarray_array_out = xarray_array.copy()
# 从变量中删除坐标信息
del xarray_array_out.attrs['coordinates']
# wrf-python 投影对象无法处理,将其转换为字符串
xarray_array_out.attrs['projection'] = str(xarray_array_out.attrs['projection'])
return xarray_array_out
# 定义 WRF 文件夹路径和文件名前缀
wrfout_path = "/home/mw/input/wrfout3385"
filename_prefix = "wrfout_d02_"
wrf_files = sorted([os.path.join(wrfout_path, f) for f in os.listdir(wrfout_path) if f.startswith(filename_prefix)])
wrf_list = [Dataset(f) for f in wrf_files]
variables = ['T2', 'PSFC', 'RAINC', 'RAINNC', 'pressure', 'z', 'ua', 'va', 'dbz', 'mdbz', 'ter']
# 创建空的数据集
dataset = xr.Dataset()
for var in variables:
# 获取变量数据
var_data = getvar(wrf_list, var, timeidx=ALL_TIMES, method='cat')
var_data = write_xarray_to_netcdf(var_data)
# 将变量添加到数据集
dataset[var] = var_data
print(dataset)
# 保存数据集为 NetCDF 文件
dataset.to_netcdf('output.nc')
print('End the program!')
<xarray.Dataset>
Dimensions: (south_north: 90, west_east: 90, Time: 9, bottom_top: 49)
Coordinates:
XLONG (south_north, west_east) float32 100.9 101.0 101.1 ... 111.0 111.1
XLAT (south_north, west_east) float32 27.24 27.24 27.23 ... 34.43 34.42
XTIME (Time) float64 360.0 420.0 480.0 540.0 ... 660.0 720.0 780.0 840.0
* Time (Time) datetime64[ns] 2022-07-14T06:00:00 ... 2022-07-14T14:00:00
datetime (Time) datetime64[ns] 2022-07-14T06:00:00 ... 2022-07-14T14:00:00
Dimensions without coordinates: south_north, west_east, bottom_top
Data variables:
T2 (Time, south_north, west_east) float32 293.7 293.6 ... 298.4 298.4
PSFC (Time, south_north, west_east) float32 7.226e+04 ... 9.094e+04
RAINC (Time, south_north, west_east) float32 0.3239 0.239 ... 13.77 13.1
RAINNC (Time, south_north, west_east) float32 0.0 0.0 0.0 ... 0.0 0.0 0.0
pressure (Time, bottom_top, south_north, west_east) float32 720.4 ... 51.94
z (Time, bottom_top, south_north, west_east) float32 2.857e+03 .....
ua (Time, bottom_top, south_north, west_east) float32 -0.1216 ... ...
va (Time, bottom_top, south_north, west_east) float32 3.404 ... -6...
dbz (Time, bottom_top, south_north, west_east) float32 -30.0 ... -30.0
mdbz (Time, south_north, west_east) float32 -30.0 -30.0 ... -30.0 -30.0
ter (Time, south_north, west_east) float32 2.831e+03 ... 839.3
End the program!
In [23]:
da = xr.open_dataset('/home/mw/project/output.nc')
da
点击此处有完整代码和可在线运行