-
Notifications
You must be signed in to change notification settings - Fork 447
Closed
Labels
Area: CalcPertains to calculationsPertains to calculationsType: BugSomething is not working like it shouldSomething is not working like it should
Milestone
Description
For bugs
-
Example data file :
UPAR_CHN_MUL_FTM-2018071612.txt -
Code :
# coding: utf-8
u'''
cal_sounding_data.py
* python = 3.6
此程序将对CMA的MUL_FTM探空资料进行清洗,然后提取温度、露点温度、气压等变量,并计算探空数据所需的状态曲线数据、
LCL、LFC、EL、CAPE、CIN等数据,最终将数据存储成指定格式的json文件
This program can clean MUL_FTM sounding data from CMA, and abstract temperature, dew point temperature,
pressure etc.. And it will calculate parcel profile, lcl, lfc, el, cape, cin etc.. Then it will save all of
variables into a json file.
V0.0.1 - 李文韬 - 2018/07/26 - [email protected] / [email protected]
+ 首次创建 first create.
'''
import time
import numpy as np
import pandas as pd
import metpy.calc as mpcalc
from metpy.units import units
import json
from sys import argv
import pdb
import matplotlib.pyplot as plt
import warnings
# ignore warnings
warnings.filterwarnings('ignore')
# inpath, outpath = argv[1], argv[2]
inpath = './UPAR_CHN_MUL_FTM-2018071612.txt'
outpath = './result.json'
# create first timestamp
time0 = time.time()
def cleanDataByPress(df):
u'''
clean data
'''
datapool = df.sort_values(by=['PRS_HWC'],ascending=False)
rm_index = []
for i in datapool.index:
dataline = datapool.loc[i]
if dataline['PRS_HWC'] > 900000 or (dataline['TEM'] > 900000 and dataline['DPT'] > 900000):
rm_index.append(i)
datapool.drop(rm_index,inplace=True)
datapool.drop_duplicates(['PRS_HWC'],'first',inplace=True)
return datapool
def dicFillValues2None(dic):
u'''
convert fill values to None
'''
for k in dic:
if dic[k] == 999999:
dic[k] = None
return dic
def CheckWind(wind):
u'''
check whether the wind speed is correct.
'''
for i in range(len(wind)):
if abs(wind[i]) > 10000:
wind[i] = None
return wind
def arrFillValues2None(arr,fill_value):
leng = len(arr)
for i in range(leng):
if arr[i] == fill_value:
arr[i] = np.nan
return arr
def dicRound2digit(dic):
for k in dic:
try:
length = len(dic[k])
except TypeError:
try:
dic[k] = round(dic[k],2)
except TypeError:
dic[k] = None
continue
dic[k] = list(dic[k])
for i in range(length):
try:
dic[k][i] = round(dic[k][i],2)
except TypeError:
continue
return dic
def dicNpNan2None(dic):
for k in dic:
try:
length = len(dic[k])
except TypeError:
if np.isnan(dic[k]):
dic[k] = None
continue
dic[k] = list(dic[k])
for i in range(length):
if np.isnan(dic[k][i]):
dic[k][i] = None
return dic
def soundingCalculation(std_id,p,t,td,ws,wd):
u'''
calculate sounding data
输入
---
std_id : int
站号编码
Stations' code
p : ndarray
气压层序列,相当于其他变量的索引,其长度须与t、td、ws、wd的长度相同
Pressere array, its length must be the same as t, td, ws and wd's. Pressure's unit is hPa
t : ndarray
气温层序列表,其单位为dgeC
Temperature array, its unit is degC.
td : ndarray
露点气温序列表,其单位为degC
Dew point temperature array, its unit is degC.
ws : ndarray
风速序列表,其单位为M/S
Wind speed array, its unit is M/S
wd : ndarray
风向序列表,为其单位为deg
Wind direction array, its unit is deg.
输出
---
new_dic : dict
处理过得到的数据集,其键包含{'P','T','Td','U','V','LCL','LFC','EL','Parcel','CAPE','CIN','Station_ID'}
按顺序分别表示
{气压、温度、露点温度、纬向风速、经向风速、抬升凝结高度、自由对流高度、平衡高度、气块状态曲线(温度)、对流不稳定能量、
对流抑制能量、观测站编号}
Dict that we want. Its keys include {'P','T','Td','U','V','LCL','LFC','EL','Parcel','CAPE','CIN','Station_ID'}
'''
P = p * units.hPa
T = t * units.degC
Td = td * units.degC
wind_speed = ws * (units.meter / units.second)
wind_dir = wd * units.degrees
u, v = mpcalc.get_wind_components(wind_speed, wind_dir)
# 当具备计算气块状态曲线条件的时候,计算状态曲线
# perform calculate when the conditions to calculate parcel profile and lcl etc. are met.
if P[0].magnitude < 900000 and T[0].magnitude < 900000 and Td[0].magnitude < 900000:
# 计算抬升凝结高度LCL
# calculate lcl
lcl_p, lcl_T = mpcalc.lcl(P[0],T[0],Td[0])
# 计算自由对流高度LFC
# calculate lfc
# pdb.set_trace()
lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
# try:
# # ************* 此处有bug ******************
# # ************* here apperes bug ***************
# lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
# except IndexError:
# pdb.set_trace()
# # 计算平衡高度EL
# calculate el
try:
el_p, el_T = mpcalc.el(P,T,Td)
except IndexError:
el_p = np.nan
el_T = np.nan
# 计算气块状态曲线
# calculate the parcel profile
try:
parcel = mpcalc.parcel_profile(P,T[0],Td[0]).to('degC')
except IndexError:
parcel = np.array([np.nan] * len(P))
try:
cape, cin = mpcalc.cape_cin(P,T,Td,parcel)
except IndexError:
cape = np.nan
cin = np.nan
Parcel = parcel.magnitude
# 不具备状态曲线条件的时候,填充为np.nan值
# if can't meet the conditions to calculate parcel profile, then fill it with np.nan
elif P[0].magnitude < 900000:
Parcel = np.array([np.nan] * len(P))
P = P.magnitude
T = arrFillValues2None(T.magnitude,999999)
Td = arrFillValues2None(Td.magnitude,999999)
U = np.round(CheckWind(u.magnitude),2)
V = np.round(CheckWind(v.magnitude),2)
Parcel = np.round(Parcel,2)
# 将抬升凝结高度点等变量去单位化,并保留2位小数
# remove units of variables and keep 2 digit decimal
# 如果值为空,或者不管出他妈的什么bug,先统一填充为np.nan
# if variable is None or appears other unexpected problems, then fill it with np.nan
try:
LCL_P = round(lcl_p.magnitude,2)
except:
LCL_P = np.nan
try:
LCL_T = round(lcl_T.magnitude,2)
except:
LCL_T = np.nan
try:
LFC_P = round(lfc_p.magnitude,2)
except:
LFC_P = np.nan
try:
LFC_T = round(lfc_T.magnitude,2)
except:
LFC_T = np.nan
try:
EL_P = round(el_p.magnitude,2)
except:
EL_P = np.nan
try:
EL_T = round(el_T.magnitude,2)
except:
EL_T = np.nan
try:
CAPE = round(cape.magnitude,2)
except:
CAPE = np.nan
try:
CIN = round(cin.magnitude,2)
except:
CIN = np.nan
new_dic = {'P':P,'T':T,'Td':Td,'U':U,'V':V,'LCL':[LCL_P,LCL_T],
'LFC':[LFC_P,LFC_T],'EL':[EL_P,EL_T],'Parcel':Parcel,
'CAPE':CAPE,'CIN':CIN,'Station_ID':std_id}
# 把np.nan格式统一改为内建None格式以便json输出时输出为null
# convert np.nan to None in order to output to "null" in json file
new_dic = dicNpNan2None(new_dic)
# 保留2位小数
# keep 2 digit decimal
new_dic = dicRound2digit(new_dic)
return new_dic
def specStationSounding(num,dataset_dic):
u'''
计算单站的所有探空数据
calculate all sounding data for one specific station.
输入
---
num : int
站点编号
stations's code
dataset_dic : dict
数据集字典,每个键对应的值应为pd.DataFrame数据
dataset dictionary, each key's value must be a data type of pd.DataFrame.
输出
---
result_dic : dict
数据集字典,每个键对应的值不再是pd.DataFrame,而是字典,即字典嵌套字典
dataset dictionary, each key's value is no longer a pd.DataFrame, instead, it's dictionary, too.
it's dictionary in dictionary.
'''
p = cleanDataByPress(dataset_dic[num])['PRS_HWC'].values
td = cleanDataByPress(dataset_dic[num])['DPT'].values
t = cleanDataByPress(dataset_dic[num])['TEM'].values
ws = cleanDataByPress(dataset_dic[num])['WIN_S'].values
wd = cleanDataByPress(dataset_dic[num])['WIN_D'].values
result_dic = soundingCalculation(num,p,t,td,ws,wd)
return result_dic
def dicAddAttrInfo(result_dic,orig_dic):
u'''
为新的数据字典添加属性类字段
输入
---
result_dic : dict
需添加属性类字段的字典
orig_dic : dict
含有属性类字段原始字典
输出
---
result_dic : dict
添加好属性类字段信息的新字典
'''
for k in orig_dic:
AttrKeys = ['Lon','Lat','Year','Mon','Day','Hour','Min','Second']
single = orig_dic[k].drop_duplicates(AttrKeys)
time = timeStrBySingle(single)
lon = single['Lon'].values[0]
lat = single['Lat'].values[0]
# result_dic[k]['Station_id'] = k
result_dic[k]['TIME'] = time
result_dic[k]['LON'] = lon
result_dic[k]['LAT'] = lat
return result_dic
def timeStrBySingle(single):
u'''
利用single输出标准的时间格式字符串
输入
---
single类型: pd.DataFrame
仅有一行的pd.DataFrame数据,即该站点删除重复时间后的第一行数据
输出
---
time类型: 字符串
例如:20180910143000
'''
yyyy = str(single['Year'].values[0])
mm = str(single['Mon'].values[0]).zfill(2)
dd = str(single['Day'].values[0]).zfill(2)
HH = str(single['Hour'].values[0]).zfill(2)
MM = str(single['Min'].values[0]).zfill(2)
SS = str(single['Second'].values[0]).zfill(2)
time = yyyy+mm+dd+HH+MM+SS
return time
if __name__ == '__main__':
# 读取所有数据
all_cols = pd.read_csv(inpath,sep='\t')
# 选取有用数据的字段
useful_col_names = ['Station_Id_C','Lat','Lon','Year','Mon','Day','Hour',
'Min','Second','PRS_HWC','TEM','DPT','WIN_D','WIN_S']
# 读取有用字段数据
dataset = all_cols[useful_col_names]
# 以观测站编号为键,创建字典,字典内存储的是pd.DataFrame表格
dataset_dic = {}
for name, group in dataset.groupby('Station_Id_C'):
dataset_dic[name] = group
# 计算探空数据,并存储在新建立的字典中
newset_dic = {}
for k in dataset_dic.keys():
newset_dic[k] = specStationSounding(k,dataset_dic)
# 在新建立的字典中添加属性类信息
newset_dic = dicAddAttrInfo(newset_dic,dataset_dic)
# 建立json格式的字符串
js_str = ''
for k in newset_dic:
js_str = js_str + json.dumps(newset_dic[k])+'\n'
# 保存文件
with open(outpath,'w') as f:
f.write(js_str)
# 打印脚本用时
time1 = time.time()
print(time1 - time0,end='\n')- Problem description:
My Python program raised IndexError: index 0 is out of bounds for axis 0 with size 0 when I call the metpy.calc.lfc() function.
The full error informations are as follow:
Traceback (most recent call last):
File "cal_sounding_data.py", line 360, in <module>
newset_dic[k] = specStationSounding(k,dataset_dic)
File "cal_sounding_data.py", line 284, in specStationSounding
result_dic = soundingCalculation(num,p,t,td,ws,wd)
File "cal_sounding_data.py", line 170, in soundingCalculation
lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
File "/anaconda2/envs/py3/lib/python3.6/site-packages/metpy/xarray.py", line 138, in wrapper
return func(*args, **kwargs)
File "/anaconda2/envs/py3/lib/python3.6/site-packages/metpy/units.py", line 290, in wrapper
return func(*args, **kwargs)
File "/anaconda2/envs/py3/lib/python3.6/site-packages/metpy/calc/thermo.py", line 378, in lfc
return x[0], y[0]
File "/anaconda2/envs/py3/lib/python3.6/site-packages/pint/quantity.py", line 1281, in __getitem__
value = self._magnitude[key]
IndexError: index 0 is out of bounds for axis 0 with size 0
I debugged it with pdb and inserted following code to catch the error:
try:
lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
except IndexError:
pdb.set_trace()I ran this program again and then checked variables of P, T and Td when it encounter that error.
It shows that:
(Pdb) P
<Quantity([ 1004. 1000. 943. 928. 925. 850. 839. 749. 700. 699.
603. 500. 404. 400. 363. 306. 300. 250. 213. 200.
176. 150. 120. 105. 100. 70. 66. 58. 50. 40.
33. 30. 23. 20. 16.], 'hectopascal')>
(Pdb) T
<Quantity([ 24.2 24. 20.2 21.6 21.4 20.4 20.2 14.4 13.2 13. 6.8 -3.3
-13.1 -13.7 -17.9 -25.5 -26.9 -37.9 -46.7 -48.7 -52.1 -58.9 -67.3 -66.5
-66.7 -65.1 -66.1 -60.9 -60.5 -57.7 -50.1 -50.3 -50.1 -47.9 -43.1], 'degC')>
(Pdb) Td
<Quantity([ 2.19000000e+01 2.21000000e+01 1.92000000e+01 2.05000000e+01
2.04000000e+01 1.84000000e+01 1.74000000e+01 8.40000000e+00
-2.80000000e+00 -3.00000000e+00 -1.52000000e+01 -2.03000000e+01
-2.91000000e+01 -2.77000000e+01 -2.49000000e+01 -3.95000000e+01
-4.19000000e+01 -5.19000000e+01 -6.07000000e+01 -6.27000000e+01
-6.51000000e+01 -7.19000000e+01 9.99999000e+05 9.99999000e+05
9.99999000e+05 9.99999000e+05 9.99999000e+05 9.99999000e+05
9.99999000e+05 9.99999000e+05 9.99999000e+05 9.99999000e+05
9.99999000e+05 9.99999000e+05 9.99999000e+05], 'degC')>
So it seems that P, T and Td are all correct. After all, they are actually not axis with size 0. Most of the time, this function can run well.
- Expected output :
Correct lfc_p and lfc_T. - Versions :
- Python version : 3.6.6
- MetPy version : 0.8.0
Metadata
Metadata
Assignees
Labels
Area: CalcPertains to calculationsPertains to calculationsType: BugSomething is not working like it shouldSomething is not working like it should