Skip to content

Commit

Permalink
[src] modify feature-extraction binaries to support downsampling (kal…
Browse files Browse the repository at this point in the history
  • Loading branch information
pegahgh authored and danpovey committed Aug 9, 2017
1 parent 4a0106a commit cc2469e
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 84 deletions.
60 changes: 50 additions & 10 deletions src/feat/feature-common-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,70 @@
#ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
#define KALDI_FEAT_FEATURE_COMMON_INL_H_

#include "feat/resample.h"
// Do not include this file directly. It is included by feat/feature-common.h

namespace kaldi {

template <class F>
void OfflineFeatureTpl<F>::ComputeFeatures(
const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
if (sample_freq == new_sample_freq)
Compute(wave, vtln_warp, output);
else {
if (new_sample_freq < sample_freq) {
if (! computer_.GetFrameOptions().allow_downsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " ( use --allow_downsample=true option to allow "
<< " downsampling the waveform).";

// Downsample the waveform.
Vector<BaseFloat> downsampled_wave(wave);
DownsampleWaveForm(sample_freq, wave,
new_sample_freq, &downsampled_wave);
Compute(downsampled_wave, vtln_warp, output);
} else
KALDI_ERR << "The waveform is allowed to get downsampled."
<< "New sample Frequency " << new_sample_freq
<< " is larger than waveform original sampling frequency "
<< sample_freq;

}
}

template <class F>
void OfflineFeatureTpl<F>::ComputeFeatures(
const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) const {
OfflineFeatureTpl<F> temp(*this);
// This const version of ComputeFeatures() is a wrapper that
// calls the non-const ComputeFeatures() on a temporary object
// that is a copy of *this. It is not as efficient because of the
// overhead of copying *this.
temp.ComputeFeatures(wave, vtln_warp, output);
}

template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *deprecated_wave_remainder) {
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
cols_out = computer_.Dim();
if (rows_out == 0) {
output->Resize(0, 0);
if (deprecated_wave_remainder != NULL)
*deprecated_wave_remainder = wave;
return;
}
output->Resize(rows_out, cols_out);
if (deprecated_wave_remainder != NULL)
ExtractWaveformRemainder(wave, computer_.GetFrameOptions(),
deprecated_wave_remainder);
Vector<BaseFloat> window; // windowed waveform.
bool use_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 r = 0; r < rows_out; r++) { // r is frame index.
Expand All @@ -60,13 +101,12 @@ template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *deprecated_wave_remainder) const {
Matrix<BaseFloat> *output) const {
OfflineFeatureTpl<F> temp(*this);
// call the non-const version of Compute() on a temporary copy of this object.
// This is a workaround for const-ness that may sometimes be useful in
// multi-threaded code, although it's not optimally efficient.
temp.Compute(wave, vtln_warp, output, deprecated_wave_remainder);
temp.Compute(wave, vtln_warp, output);
}

} // end namespace kaldi
Expand Down
47 changes: 36 additions & 11 deletions src/feat/feature-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,25 +118,50 @@ class OfflineFeatureTpl {
computer_(opts),
feature_window_function_(computer_.GetFrameOptions()) { }

// Computes the features for one file (one sequence of features).
// Use of the 'deprecatd_wave_remainder' argument is highly deprecated; it is
// only provided for back-compatibility for code that may have
// relied on the older interface. It's deprecated because it
// doesn't support the --snip-edges=false option, and because
// we plan to eventually remove this argument so that there
// will be only one way to do online feature extraction.
// Internal (and back-compatibility) interface for computing features, which
// requires that the user has already checked that the sampling frequency
// of the waveform is equal to the sampling frequency specified in
// the frame-extraction options.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *deprecated_wave_remainder = NULL);
Matrix<BaseFloat> *output);

// This const version of Compute() is a wrapper that
// calls the non-const version on a temporary object.
// It's less efficient than the non-const version.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *deprecated_wave_remainder = NULL) const;
Matrix<BaseFloat> *output) const;

/**
Computes the features for one file (one sequence of features).
This is the newer interface where you specify the sample frequency
of the input waveform.
@param [in] wave The input waveform
@param [in] sample_freq The sampling frequency with which
'wave' was sampled.
if sample_freq is higher than the frequency
specified in the config, we will downsample
the waveform, but if lower, it's an error.
@param [in] vtln_warp The VTLN warping factor (will normally
be 1.0)
@param [out] output The matrix of features, where the row-index
is the frame index.
*/
void ComputeFeatures(const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output);
/**
This const version of ComputeFeatures() is a wrapper that
calls the non-const ComputeFeatures() on a temporary object
that is a copy of *this. It is not as efficient because of the
overhead of copying *this.
*/
void ComputeFeatures(const VectorBase<BaseFloat> &wave,
BaseFloat sample_freq,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output) const;

int32 Dim() const { return computer_.Dim(); }

Expand Down
10 changes: 5 additions & 5 deletions src/feat/feature-fbank-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static void UnitTestSimple() {
// use default parameters

// compute fbanks.
fbank.Compute(v, 1.0, &m, NULL);
fbank.Compute(v, 1.0, &m);

// possibly dump
// std::cout << "== Output features == \n" << m;
Expand Down Expand Up @@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
fbank.Compute(waveform, 1.0, &kaldi_features);


std::cout << "<<<=== Compare with HTK features...\n";
Expand Down Expand Up @@ -224,7 +224,7 @@ static void UnitTestHTKCompare2() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
fbank.Compute(waveform, 1.0, &kaldi_features);


std::cout << "<<<=== Compare with HTK features...\n";
Expand Down Expand Up @@ -308,7 +308,7 @@ static void UnitTestHTKCompare3() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
fbank.Compute(waveform, vtln_warp, &kaldi_features);


std::cout << "<<<=== Compare with HTK features...\n";
Expand Down Expand Up @@ -394,7 +394,7 @@ static void UnitTestHTKCompare4() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
fbank.Compute(waveform, vtln_warp, &kaldi_features);


std::cout << "<<<=== Compare with HTK features...\n";
Expand Down
14 changes: 7 additions & 7 deletions src/feat/feature-mfcc-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static void UnitTestSimple() {
// use default parameters

// compute mfccs.
mfcc.Compute(v, 1.0, &m, NULL);
mfcc.Compute(v, 1.0, &m);

// possibly dump
// std::cout << "== Output features == \n" << m;
Expand Down Expand Up @@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down Expand Up @@ -227,7 +227,7 @@ static void UnitTestHTKCompare2() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down Expand Up @@ -312,7 +312,7 @@ static void UnitTestHTKCompare3() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down Expand Up @@ -395,7 +395,7 @@ static void UnitTestHTKCompare4() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down Expand Up @@ -483,7 +483,7 @@ static void UnitTestHTKCompare5() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features, NULL);
mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down Expand Up @@ -568,7 +568,7 @@ static void UnitTestHTKCompare6() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down
4 changes: 2 additions & 2 deletions src/feat/feature-plp-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static void UnitTestSimple() {
// use default parameters

// compute mfccs.
plp.Compute(v, 1.0, &m, NULL);
plp.Compute(v, 1.0, &m);

// possibly dump
// std::cout << "== Output features == \n" << m;
Expand Down Expand Up @@ -102,7 +102,7 @@ static void UnitTestHTKCompare1() {

// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
plp.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
plp.Compute(waveform, 1.0, &kaldi_raw_features);

DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
Expand Down
2 changes: 1 addition & 1 deletion src/feat/feature-sdc-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ int main() {
op.use_energy = false;
Mfcc mfcc(op);
Matrix<BaseFloat> raw_features;
mfcc.Compute(waveform, 1.0, &raw_features, NULL);
mfcc.Compute(waveform, 1.0, &raw_features);

try {
for (int32 window = 1; window < 4; window++) {
Expand Down
7 changes: 6 additions & 1 deletion src/feat/feature-window.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ struct FrameExtractionOptions {
bool round_to_power_of_two;
BaseFloat blackman_coeff;
bool snip_edges;
bool allow_downsample;
// May be "hamming", "rectangular", "povey", "hanning", "blackman"
// "povey" is a window I made to be similar to Hamming but to go to zero at the
// edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
Expand All @@ -57,7 +58,8 @@ struct FrameExtractionOptions {
window_type("povey"),
round_to_power_of_two(true),
blackman_coeff(0.42),
snip_edges(true){ }
snip_edges(true),
allow_downsample(false) { }

void Register(OptionsItf *opts) {
opts->Register("sample-frequency", &samp_freq,
Expand All @@ -83,6 +85,9 @@ struct FrameExtractionOptions {
"completely fit in the file, and the number of frames depends on the "
"frame-length. If false, the number of frames depends only on the "
"frame-shift, and we reflect the data at the ends.");
opts->Register("allow-downsample", &allow_downsample,
"If true, allow the input waveform to have a higher frequency than"
"the specified --sample-frequency (and we'll downsample).");
}
int32 WindowShift() const {
return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
Expand Down
8 changes: 4 additions & 4 deletions src/feat/online-feature-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ void TestOnlineMfcc() {

// compute mfcc offline
Matrix<BaseFloat> mfcc_feats;
mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported

// compare
// The test waveform is about 1.44s long, so
Expand Down Expand Up @@ -217,7 +217,7 @@ void TestOnlinePlp() {

// compute plp offline
Matrix<BaseFloat> plp_feats;
plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported

// compare
// The test waveform is about 1.44s long, so
Expand Down Expand Up @@ -309,7 +309,7 @@ void TestOnlineAppendFeature() {

// compute mfcc offline
Matrix<BaseFloat> mfcc_feats;
mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported

// the parametrization object for 2nd stream plp feature
PlpOptions plp_op;
Expand All @@ -326,7 +326,7 @@ void TestOnlineAppendFeature() {

// compute plp offline
Matrix<BaseFloat> plp_feats;
plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported

// compare
// The test waveform is about 1.44s long, so
Expand Down
12 changes: 10 additions & 2 deletions src/feat/resample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ void LinearResample::Resample(const VectorBase<BaseFloat> &input,
int32 input_dim = input.Dim();
int64 tot_input_samp = input_sample_offset_ + input_dim,
tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);

KALDI_ASSERT(tot_output_samp >= output_sample_offset_);

output->Resize(tot_output_samp - output_sample_offset_);
Expand Down Expand Up @@ -365,5 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
return filter * window;
}


void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
KALDI_ASSERT(new_freq < orig_freq);
BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
int32 lowpass_filter_width = 6;
LinearResample signal_downsampler(orig_freq, new_freq,
lowpass_cutoff, lowpass_filter_width);
signal_downsampler.Resample(wave, true, new_wave);
}
} // namespace kaldi
Loading

0 comments on commit cc2469e

Please sign in to comment.