Skip to content

Commit 78c93bc

Browse files
authored
Merge pull request #4926 from roshansh-cmu/extracted_fix
fix extracted feature dummy generation
2 parents 5d4615f + 6fcbfbb commit 78c93bc

File tree

4 files changed

+30
-6
lines changed

4 files changed

+30
-6
lines changed

egs2/TEMPLATE/asr1/asr.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,8 +620,14 @@ if ! "${skip_data_prep}"; then
620620
_suf=""
621621
fi
622622
# Generate dummy wav.scp to avoid error by copy_data_dir.sh
623-
<data/"${dset}"/feats.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
624-
utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
623+
if [ ! -f data/"${dset}"/wav.scp ]; then
624+
if [ ! -f data/"${dset}"/segments ]; then
625+
<data/"${dset}"/feats.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
626+
else
627+
<data/"${dset}"/segments awk ' { print($2,"<DUMMY>") }' > data/"${dset}"/wav.scp
628+
fi
629+
fi
630+
utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
625631

626632
# Copy reference text files if there is more than 1 reference
627633
# shellcheck disable=SC2068

egs2/TEMPLATE/slu1/slu.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,13 @@ if ! "${skip_data_prep}"; then
573573
_suf=""
574574
fi
575575
# Generate dummy wav.scp to avoid error by copy_data_dir.sh
576-
<data/"${dset}"/cmvn.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
576+
if [ ! -f data/"${dset}"/wav.scp ]; then
577+
if [ ! -f data/"${dset}"/segments ]; then
578+
<data/"${dset}"/feats.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
579+
else
580+
<data/"${dset}"/segments awk ' { print($2,"<DUMMY>") }' > data/"${dset}"/wav.scp
581+
fi
582+
fi
577583
utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
578584

579585
# Derive the the frame length and feature dimension
@@ -1657,4 +1663,4 @@ else
16571663
log "Skip the uploading to HuggingFace stage"
16581664
fi
16591665

1660-
log "Successfully finished. [elapsed=${SECONDS}s]"
1666+
log "Successfully finished. [elapsed=${SECONDS}s]"

egs2/TEMPLATE/st1/st.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,13 @@ if ! "${skip_data_prep}"; then
616616
_suf=""
617617
fi
618618
# Generate dummy wav.scp to avoid error by copy_data_dir.sh
619-
<data/"${dset}"/cmvn.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
619+
if [ ! -f data/"${dset}"/wav.scp ]; then
620+
if [ ! -f data/"${dset}"/segments ]; then
621+
<data/"${dset}"/feats.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
622+
else
623+
<data/"${dset}"/segments awk ' { print($2,"<DUMMY>") }' > data/"${dset}"/wav.scp
624+
fi
625+
fi
620626
utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
621627

622628
# expand the utt_extra_files for multi-references

egs2/TEMPLATE/uasr1/uasr.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,13 @@ if ! "${skip_data_prep}"; then
589589
_suf=""
590590
fi
591591
# Generate dummy wav.scp to avoid error by copy_data_dir.sh
592-
<data/"${dset}"/cmvn.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
592+
if [ ! -f data/"${dset}"/wav.scp ]; then
593+
if [ ! -f data/"${dset}"/segments ]; then
594+
<data/"${dset}"/feats.scp awk ' { print($1,"<DUMMY>") }' > data/"${dset}"/wav.scp
595+
else
596+
<data/"${dset}"/segments awk ' { print($2,"<DUMMY>") }' > data/"${dset}"/wav.scp
597+
fi
598+
fi
593599
utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
594600

595601
# Derive the the frame length and feature dimension

0 commit comments

Comments
 (0)