Skip to content

calc.lfc( ) bug report. #902

@Clarmy

Description

@Clarmy

For bugs

# coding: utf-8

u'''
    cal_sounding_data.py  

    * python = 3.6

    此程序将对CMA的MUL_FTM探空资料进行清洗,然后提取温度、露点温度、气压等变量,并计算探空数据所需的状态曲线数据、
LCL、LFC、EL、CAPE、CIN等数据,最终将数据存储成指定格式的json文件

    This program can clean MUL_FTM sounding data from CMA, and abstract temperature, dew point temperature,
pressure etc.. And it will calculate parcel profile, lcl, lfc, el, cape, cin etc.. Then it will save all of 
variables into a json file.

V0.0.1 - 李文韬 - 2018/07/26 - [email protected] / [email protected]
    + 首次创建 first create.
'''

import time
import numpy as np
import pandas as pd
import metpy.calc as mpcalc
from metpy.units import units
import json
from sys import argv
import pdb
import matplotlib.pyplot as plt
import warnings

# ignore warnings
warnings.filterwarnings('ignore')

# inpath, outpath = argv[1], argv[2]
inpath = './UPAR_CHN_MUL_FTM-2018071612.txt'
outpath = './result.json'

# create first timestamp
time0 = time.time()

def cleanDataByPress(df):
    u'''
    clean data
    '''
    datapool = df.sort_values(by=['PRS_HWC'],ascending=False)
    rm_index = []
    for i in datapool.index:
        dataline = datapool.loc[i]
        if dataline['PRS_HWC'] > 900000 or (dataline['TEM'] > 900000 and dataline['DPT'] > 900000):
                rm_index.append(i)

    datapool.drop(rm_index,inplace=True)
    datapool.drop_duplicates(['PRS_HWC'],'first',inplace=True)

    return datapool

def dicFillValues2None(dic):
    u'''
    convert fill values to None 
    '''
    for k in dic:
        if dic[k] == 999999:
            dic[k] = None
    return dic

def CheckWind(wind):
    u'''
    check whether the wind speed is correct.
    '''
    for i in range(len(wind)):
        if abs(wind[i]) > 10000:
            wind[i] = None
    return wind

def arrFillValues2None(arr,fill_value):
    leng = len(arr)
    for i in range(leng):
        if arr[i] == fill_value:
            arr[i] = np.nan
    return arr

def dicRound2digit(dic):
    for k in dic:
        try:
            length = len(dic[k])
        except TypeError:
            try:
                dic[k] = round(dic[k],2)
            except TypeError:
                dic[k] = None
            continue

        dic[k] = list(dic[k])
        for i in range(length):
            try:
                dic[k][i] = round(dic[k][i],2)
            except TypeError:
                continue
    return dic

def dicNpNan2None(dic):
    for k in dic:
        try:
            length = len(dic[k])
        except TypeError:
            if np.isnan(dic[k]):
                dic[k] = None
            continue 
        dic[k] = list(dic[k])
        for i in range(length):
            if np.isnan(dic[k][i]):
                dic[k][i] = None
    return dic

def soundingCalculation(std_id,p,t,td,ws,wd):
    u'''
    calculate sounding data

    输入
    ---
    std_id : int
        站号编码
        Stations' code

    p : ndarray
        气压层序列,相当于其他变量的索引,其长度须与t、td、ws、wd的长度相同
        Pressere array, its length must be the same as t, td, ws and wd's. Pressure's unit is hPa

    t : ndarray
        气温层序列表,其单位为dgeC
        Temperature array, its unit is degC.

    td : ndarray
        露点气温序列表,其单位为degC
        Dew point temperature array, its unit is degC.

    ws : ndarray
        风速序列表,其单位为M/S
        Wind speed array, its unit is M/S

    wd : ndarray
        风向序列表,为其单位为deg
        Wind direction array, its unit is deg.

    输出
    ---
    new_dic : dict
        处理过得到的数据集,其键包含{'P','T','Td','U','V','LCL','LFC','EL','Parcel','CAPE','CIN','Station_ID'}
        按顺序分别表示
        {气压、温度、露点温度、纬向风速、经向风速、抬升凝结高度、自由对流高度、平衡高度、气块状态曲线(温度)、对流不稳定能量、
        对流抑制能量、观测站编号}
        Dict that we want. Its keys include {'P','T','Td','U','V','LCL','LFC','EL','Parcel','CAPE','CIN','Station_ID'}

    '''
    P = p * units.hPa
    T = t * units.degC
    Td = td * units.degC
    wind_speed = ws * (units.meter / units.second)
    wind_dir = wd * units.degrees
    u, v = mpcalc.get_wind_components(wind_speed, wind_dir)
    
    # 当具备计算气块状态曲线条件的时候,计算状态曲线 
    # perform calculate when the conditions to calculate parcel profile and lcl etc. are met. 
    if P[0].magnitude < 900000 and T[0].magnitude < 900000 and  Td[0].magnitude < 900000:
        # 计算抬升凝结高度LCL
        # calculate lcl
        lcl_p, lcl_T = mpcalc.lcl(P[0],T[0],Td[0])
        # 计算自由对流高度LFC
        # calculate lfc
        # pdb.set_trace()
        lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
        # try:
        #     # *************    此处有bug   ******************
        #     # ************* here apperes bug ***************
        #     lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
        # except IndexError:
        #     pdb.set_trace()
        # # 计算平衡高度EL
        # calculate el
        try:
            el_p, el_T = mpcalc.el(P,T,Td)
        except IndexError:
            el_p = np.nan
            el_T = np.nan
        # 计算气块状态曲线
        # calculate the parcel profile
        try:
            parcel = mpcalc.parcel_profile(P,T[0],Td[0]).to('degC')
        except IndexError:
            parcel = np.array([np.nan] * len(P))
        try:
            cape, cin = mpcalc.cape_cin(P,T,Td,parcel)
        except IndexError:
            cape = np.nan
            cin = np.nan
        Parcel = parcel.magnitude
    # 不具备状态曲线条件的时候,填充为np.nan值
    # if can't meet the conditions to calculate parcel profile, then fill it with np.nan
    elif P[0].magnitude < 900000:
        Parcel = np.array([np.nan] * len(P))

    P = P.magnitude
    T = arrFillValues2None(T.magnitude,999999)
    Td = arrFillValues2None(Td.magnitude,999999)
    U = np.round(CheckWind(u.magnitude),2)
    V = np.round(CheckWind(v.magnitude),2)
    Parcel = np.round(Parcel,2)
    
    # 将抬升凝结高度点等变量去单位化,并保留2位小数
    # remove units of variables and keep 2 digit decimal
    # 如果值为空,或者不管出他妈的什么bug,先统一填充为np.nan
    # if variable is None or appears other unexpected problems, then fill it with np.nan
    try:
        LCL_P  = round(lcl_p.magnitude,2)
    except:
        LCL_P  = np.nan
    try:
        LCL_T  = round(lcl_T.magnitude,2)
    except:
        LCL_T  = np.nan
    try:
        LFC_P  = round(lfc_p.magnitude,2)
    except:
        LFC_P  = np.nan
    try:
        LFC_T  = round(lfc_T.magnitude,2)
    except:
        LFC_T  = np.nan
    try:
        EL_P   = round(el_p.magnitude,2)
    except:
        EL_P   = np.nan
    try:
        EL_T   = round(el_T.magnitude,2)
    except:
        EL_T   = np.nan
    try:
        CAPE   = round(cape.magnitude,2)
    except:
        CAPE   = np.nan
    try:
        CIN    = round(cin.magnitude,2)
    except:
        CIN    = np.nan
        
    new_dic = {'P':P,'T':T,'Td':Td,'U':U,'V':V,'LCL':[LCL_P,LCL_T],
               'LFC':[LFC_P,LFC_T],'EL':[EL_P,EL_T],'Parcel':Parcel,
               'CAPE':CAPE,'CIN':CIN,'Station_ID':std_id}
    # 把np.nan格式统一改为内建None格式以便json输出时输出为null
    # convert np.nan to None in order to output to "null" in json file
    new_dic = dicNpNan2None(new_dic)
    # 保留2位小数
    # keep 2 digit decimal
    new_dic = dicRound2digit(new_dic)
    return new_dic

def specStationSounding(num,dataset_dic):
    u'''
    计算单站的所有探空数据
    calculate all sounding data for one specific station.

    输入
    ---
    num : int
        站点编号
        stations's code

    dataset_dic : dict
        数据集字典,每个键对应的值应为pd.DataFrame数据
        dataset dictionary, each key's value must be a data type of pd.DataFrame.

    输出
    ---
    result_dic : dict
        数据集字典,每个键对应的值不再是pd.DataFrame,而是字典,即字典嵌套字典
        dataset dictionary, each key's value is no longer a pd.DataFrame, instead, it's dictionary, too.
        it's dictionary in dictionary.
    '''
    p = cleanDataByPress(dataset_dic[num])['PRS_HWC'].values
    td = cleanDataByPress(dataset_dic[num])['DPT'].values
    t = cleanDataByPress(dataset_dic[num])['TEM'].values
    ws = cleanDataByPress(dataset_dic[num])['WIN_S'].values
    wd = cleanDataByPress(dataset_dic[num])['WIN_D'].values
    
    result_dic = soundingCalculation(num,p,t,td,ws,wd)
    
    return result_dic

def dicAddAttrInfo(result_dic,orig_dic):
    u'''
    为新的数据字典添加属性类字段

    输入
    ---
    result_dic : dict
        需添加属性类字段的字典
    orig_dic : dict
        含有属性类字段原始字典

    输出
    ---
    result_dic : dict
        添加好属性类字段信息的新字典
    '''
    for k in orig_dic:
        AttrKeys = ['Lon','Lat','Year','Mon','Day','Hour','Min','Second']
        single = orig_dic[k].drop_duplicates(AttrKeys)
        time = timeStrBySingle(single)
        lon = single['Lon'].values[0]
        lat = single['Lat'].values[0]
        
        # result_dic[k]['Station_id'] = k
        result_dic[k]['TIME'] = time
        result_dic[k]['LON'] = lon
        result_dic[k]['LAT'] = lat
    return result_dic

def timeStrBySingle(single):
    u'''
    利用single输出标准的时间格式字符串
    
    输入
    ---
    single类型: pd.DataFrame
        仅有一行的pd.DataFrame数据,即该站点删除重复时间后的第一行数据

    输出
    ---
    time类型: 字符串
        例如:20180910143000
    '''
    yyyy = str(single['Year'].values[0])
    mm = str(single['Mon'].values[0]).zfill(2)
    dd = str(single['Day'].values[0]).zfill(2)
    HH = str(single['Hour'].values[0]).zfill(2)
    MM = str(single['Min'].values[0]).zfill(2)
    SS = str(single['Second'].values[0]).zfill(2)
    time = yyyy+mm+dd+HH+MM+SS
    
    return time

if __name__ == '__main__':

    # 读取所有数据
    all_cols = pd.read_csv(inpath,sep='\t')

    # 选取有用数据的字段
    useful_col_names = ['Station_Id_C','Lat','Lon','Year','Mon','Day','Hour',
                        'Min','Second','PRS_HWC','TEM','DPT','WIN_D','WIN_S']
    # 读取有用字段数据
    dataset = all_cols[useful_col_names]

    # 以观测站编号为键,创建字典,字典内存储的是pd.DataFrame表格
    dataset_dic = {}
    for name, group in dataset.groupby('Station_Id_C'):
        dataset_dic[name] = group

    # 计算探空数据,并存储在新建立的字典中
    newset_dic = {}
    for k in dataset_dic.keys():
        newset_dic[k] = specStationSounding(k,dataset_dic)

    # 在新建立的字典中添加属性类信息
    newset_dic = dicAddAttrInfo(newset_dic,dataset_dic)

    # 建立json格式的字符串
    js_str = ''
    for k in newset_dic:
        js_str = js_str + json.dumps(newset_dic[k])+'\n'

    # 保存文件
    with open(outpath,'w') as f:
        f.write(js_str)

    # 打印脚本用时
    time1 = time.time()
    print(time1 - time0,end='\n')
  • Problem description:

My Python program raised IndexError: index 0 is out of bounds for axis 0 with size 0 when I call the metpy.calc.lfc() function.

The full error informations are as follow:

Traceback (most recent call last):
  File "cal_sounding_data.py", line 360, in <module>
    newset_dic[k] = specStationSounding(k,dataset_dic)
  File "cal_sounding_data.py", line 284, in specStationSounding
    result_dic = soundingCalculation(num,p,t,td,ws,wd)
  File "cal_sounding_data.py", line 170, in soundingCalculation
    lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
  File "/anaconda2/envs/py3/lib/python3.6/site-packages/metpy/xarray.py", line 138, in wrapper
    return func(*args, **kwargs)
  File "/anaconda2/envs/py3/lib/python3.6/site-packages/metpy/units.py", line 290, in wrapper
    return func(*args, **kwargs)
  File "/anaconda2/envs/py3/lib/python3.6/site-packages/metpy/calc/thermo.py", line 378, in lfc
    return x[0], y[0]
  File "/anaconda2/envs/py3/lib/python3.6/site-packages/pint/quantity.py", line 1281, in __getitem__
    value = self._magnitude[key]
IndexError: index 0 is out of bounds for axis 0 with size 0

I debugged it with pdb and inserted following code to catch the error:

try:
    lfc_p, lfc_T = mpcalc.lfc(P,T,Td)
except IndexError:
    pdb.set_trace()

I ran this program again and then checked variables of P, T and Td when it encounter that error.

It shows that:

(Pdb) P
<Quantity([ 1004.  1000.   943.   928.   925.   850.   839.   749.   700.   699.
   603.   500.   404.   400.   363.   306.   300.   250.   213.   200.
   176.   150.   120.   105.   100.    70.    66.    58.    50.    40.
    33.    30.    23.    20.    16.], 'hectopascal')>
(Pdb) T
<Quantity([ 24.2  24.   20.2  21.6  21.4  20.4  20.2  14.4  13.2  13.    6.8  -3.3
 -13.1 -13.7 -17.9 -25.5 -26.9 -37.9 -46.7 -48.7 -52.1 -58.9 -67.3 -66.5
 -66.7 -65.1 -66.1 -60.9 -60.5 -57.7 -50.1 -50.3 -50.1 -47.9 -43.1], 'degC')>
(Pdb) Td
<Quantity([  2.19000000e+01   2.21000000e+01   1.92000000e+01   2.05000000e+01
   2.04000000e+01   1.84000000e+01   1.74000000e+01   8.40000000e+00
  -2.80000000e+00  -3.00000000e+00  -1.52000000e+01  -2.03000000e+01
  -2.91000000e+01  -2.77000000e+01  -2.49000000e+01  -3.95000000e+01
  -4.19000000e+01  -5.19000000e+01  -6.07000000e+01  -6.27000000e+01
  -6.51000000e+01  -7.19000000e+01   9.99999000e+05   9.99999000e+05
   9.99999000e+05   9.99999000e+05   9.99999000e+05   9.99999000e+05
   9.99999000e+05   9.99999000e+05   9.99999000e+05   9.99999000e+05
   9.99999000e+05   9.99999000e+05   9.99999000e+05], 'degC')>

So it seems that P, T and Td are all correct. After all, they are actually not axis with size 0. Most of the time, this function can run well.

  • Expected output :
    Correct lfc_p and lfc_T.
  • Versions :
    • Python version : 3.6.6
    • MetPy version : 0.8.0

Metadata

Metadata

Assignees

Labels

Area: CalcPertains to calculationsType: BugSomething is not working like it should

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions