[src] modify feature-extraction binaries to support downsampling (kal…

…di-asr#1773)
chenzhehuai · Aug 9, 2017 · cc2469e · cc2469e
1 parent 4a0106a
commit cc2469e
Show file tree

Hide file tree

Showing 15 changed files with 146 additions and 84 deletions.
diff --git a/src/feat/feature-common-inl.h b/src/feat/feature-common-inl.h
@@ -20,29 +20,70 @@
 #ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
 #define KALDI_FEAT_FEATURE_COMMON_INL_H_
 
+#include "feat/resample.h"
 // Do not include this file directly.  It is included by feat/feature-common.h
 
 namespace kaldi {
 
+template <class F>
+void OfflineFeatureTpl<F>::ComputeFeatures(
+    const VectorBase<BaseFloat> &wave,
+    BaseFloat sample_freq,
+    BaseFloat vtln_warp,
+    Matrix<BaseFloat> *output) {
+  KALDI_ASSERT(output != NULL);
+  BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
+  if (sample_freq == new_sample_freq)
+    Compute(wave, vtln_warp, output);
+  else {
+    if (new_sample_freq < sample_freq) {
+      if (! computer_.GetFrameOptions().allow_downsample)
+        KALDI_ERR << "Waveform and config sample Frequency mismatch: "
+                  << sample_freq << " .vs " << new_sample_freq
+                  << " ( use --allow_downsample=true option to allow "
+                  << " downsampling the waveform).";
+
+      // Downsample the waveform.
+      Vector<BaseFloat> downsampled_wave(wave);
+      DownsampleWaveForm(sample_freq, wave,
+                         new_sample_freq, &downsampled_wave);
+      Compute(downsampled_wave, vtln_warp, output);
+    } else
+      KALDI_ERR << "The waveform is allowed to get downsampled."
+                << "New sample Frequency " << new_sample_freq
+                << " is larger than waveform original sampling frequency "
+                << sample_freq;
+
+  }
+}
+
+template <class F>
+void OfflineFeatureTpl<F>::ComputeFeatures(
+    const VectorBase<BaseFloat> &wave,
+    BaseFloat sample_freq,
+    BaseFloat vtln_warp,
+    Matrix<BaseFloat> *output) const {
+  OfflineFeatureTpl<F> temp(*this);
+  // This const version of ComputeFeatures() is a wrapper that
+  // calls the non-const ComputeFeatures() on a temporary object
+  // that is a copy of *this.  It is not as efficient because of the
+  // overhead of copying *this.
+  temp.ComputeFeatures(wave, vtln_warp, output);
+}
+
 template <class F>
 void OfflineFeatureTpl<F>::Compute(
     const VectorBase<BaseFloat> &wave,
     BaseFloat vtln_warp,
-    Matrix<BaseFloat> *output,
-    Vector<BaseFloat> *deprecated_wave_remainder) {
+    Matrix<BaseFloat> *output) {
   KALDI_ASSERT(output != NULL);
   int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
       cols_out = computer_.Dim();
   if (rows_out == 0) {
     output->Resize(0, 0);
-    if (deprecated_wave_remainder != NULL)
-      *deprecated_wave_remainder = wave;
     return;
   }
   output->Resize(rows_out, cols_out);
-  if (deprecated_wave_remainder != NULL)
-    ExtractWaveformRemainder(wave, computer_.GetFrameOptions(),
-                             deprecated_wave_remainder);
   Vector<BaseFloat> window;  // windowed waveform.
   bool use_raw_log_energy = computer_.NeedRawLogEnergy();
   for (int32 r = 0; r < rows_out; r++) {  // r is frame index.
@@ -60,13 +101,12 @@ template <class F>
 void OfflineFeatureTpl<F>::Compute(
     const VectorBase<BaseFloat> &wave,
     BaseFloat vtln_warp,
-    Matrix<BaseFloat> *output,
-    Vector<BaseFloat> *deprecated_wave_remainder) const {
+    Matrix<BaseFloat> *output) const {
   OfflineFeatureTpl<F> temp(*this);
   // call the non-const version of Compute() on a temporary copy of this object.
   // This is a workaround for const-ness that may sometimes be useful in
   // multi-threaded code, although it's not optimally efficient.
-  temp.Compute(wave, vtln_warp, output, deprecated_wave_remainder);
+  temp.Compute(wave, vtln_warp, output);
 }
 
 } // end namespace kaldi

diff --git a/src/feat/feature-common.h b/src/feat/feature-common.h
@@ -118,25 +118,50 @@ class OfflineFeatureTpl {
       computer_(opts),
       feature_window_function_(computer_.GetFrameOptions()) { }
 
-  // Computes the features for one file (one sequence of features).
-  // Use of the 'deprecatd_wave_remainder' argument is highly deprecated; it is
-  // only provided for back-compatibility for code that may have
-  // relied on the older interface.  It's deprecated because it
-  // doesn't support the --snip-edges=false option, and because
-  // we plan to eventually remove this argument so that there
-  // will be only one way to do online feature extraction.
+  // Internal (and back-compatibility) interface for computing features, which
+  // requires that the user has already checked that the sampling frequency
+  // of the waveform is equal to the sampling frequency specified in
+  // the frame-extraction options.
   void Compute(const VectorBase<BaseFloat> &wave,
                BaseFloat vtln_warp,
-               Matrix<BaseFloat> *output,
-               Vector<BaseFloat> *deprecated_wave_remainder = NULL);
+               Matrix<BaseFloat> *output);
 
   // This const version of Compute() is a wrapper that
   // calls the non-const version on a temporary object.
   // It's less efficient than the non-const version.
   void Compute(const VectorBase<BaseFloat> &wave,
                BaseFloat vtln_warp,
-               Matrix<BaseFloat> *output,
-               Vector<BaseFloat> *deprecated_wave_remainder = NULL) const;
+               Matrix<BaseFloat> *output) const;
+
+  /**
+     Computes the features for one file (one sequence of features).
+     This is the newer interface where you specify the sample frequency
+     of the input waveform.
+       @param [in] wave   The input waveform
+       @param [in] sample_freq  The sampling frequency with which
+                                'wave' was sampled.
+                                if sample_freq is higher than the frequency
+                                specified in the config, we will downsample
+                                the waveform, but if lower, it's an error.
+     @param [in] vtln_warp  The VTLN warping factor (will normally
+                            be 1.0)
+     @param [out]  output  The matrix of features, where the row-index
+                           is the frame index.
+  */
+  void ComputeFeatures(const VectorBase<BaseFloat> &wave,
+                       BaseFloat sample_freq,
+                       BaseFloat vtln_warp,
+                       Matrix<BaseFloat> *output);
+  /**
+     This const version of ComputeFeatures() is a wrapper that
+     calls the non-const ComputeFeatures() on a temporary object
+     that is a copy of *this.  It is not as efficient because of the
+     overhead of copying *this.
+  */
+  void ComputeFeatures(const VectorBase<BaseFloat> &wave,
+                       BaseFloat sample_freq,
+                       BaseFloat vtln_warp,
+                       Matrix<BaseFloat> *output) const;
 
   int32 Dim() const { return computer_.Dim(); }
 

diff --git a/src/feat/feature-fbank-test.cc b/src/feat/feature-fbank-test.cc
@@ -101,7 +101,7 @@ static void UnitTestSimple() {
   // use default parameters
 
   // compute fbanks.
-  fbank.Compute(v, 1.0, &m, NULL);
+  fbank.Compute(v, 1.0, &m);
 
   // possibly dump
   //   std::cout << "== Output features == \n" << m;
@@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_features;
-  fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
+  fbank.Compute(waveform, 1.0, &kaldi_features);
 
 
   std::cout << "<<<=== Compare with HTK features...\n";
@@ -224,7 +224,7 @@ static void UnitTestHTKCompare2() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_features;
-  fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
+  fbank.Compute(waveform, 1.0, &kaldi_features);
 
 
   std::cout << "<<<=== Compare with HTK features...\n";
@@ -308,7 +308,7 @@ static void UnitTestHTKCompare3() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_features;
-  fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
+  fbank.Compute(waveform, vtln_warp, &kaldi_features);
 
 
   std::cout << "<<<=== Compare with HTK features...\n";
@@ -394,7 +394,7 @@ static void UnitTestHTKCompare4() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_features;
-  fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
+  fbank.Compute(waveform, vtln_warp, &kaldi_features);
 
 
   std::cout << "<<<=== Compare with HTK features...\n";

diff --git a/src/feat/feature-mfcc-test.cc b/src/feat/feature-mfcc-test.cc
@@ -101,7 +101,7 @@ static void UnitTestSimple() {
   // use default parameters
 
   // compute mfccs.
-  mfcc.Compute(v, 1.0, &m, NULL);
+  mfcc.Compute(v, 1.0, &m);
 
   // possibly dump
   //   std::cout << "== Output features == \n" << m;
@@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+  mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;
@@ -227,7 +227,7 @@ static void UnitTestHTKCompare2() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+  mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;
@@ -312,7 +312,7 @@ static void UnitTestHTKCompare3() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+  mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;
@@ -395,7 +395,7 @@ static void UnitTestHTKCompare4() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+  mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;
@@ -483,7 +483,7 @@ static void UnitTestHTKCompare5() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features, NULL);
+  mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;
@@ -568,7 +568,7 @@ static void UnitTestHTKCompare6() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+  mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;

diff --git a/src/feat/feature-plp-test.cc b/src/feat/feature-plp-test.cc
@@ -60,7 +60,7 @@ static void UnitTestSimple() {
   // use default parameters
 
   // compute mfccs.
-  plp.Compute(v, 1.0, &m, NULL);
+  plp.Compute(v, 1.0, &m);
 
   // possibly dump
   //   std::cout << "== Output features == \n" << m;
@@ -102,7 +102,7 @@ static void UnitTestHTKCompare1() {
 
   // calculate kaldi features
   Matrix<BaseFloat> kaldi_raw_features;
-  plp.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+  plp.Compute(waveform, 1.0, &kaldi_raw_features);
 
   DeltaFeaturesOptions delta_opts;
   Matrix<BaseFloat> kaldi_features;

diff --git a/src/feat/feature-sdc-test.cc b/src/feat/feature-sdc-test.cc
@@ -148,7 +148,7 @@ int main() {
   op.use_energy = false;
   Mfcc mfcc(op);
   Matrix<BaseFloat> raw_features;
-  mfcc.Compute(waveform, 1.0, &raw_features, NULL);
+  mfcc.Compute(waveform, 1.0, &raw_features);
 
   try {
     for (int32 window = 1; window < 4; window++) {

diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h
@@ -43,6 +43,7 @@ struct FrameExtractionOptions {
   bool round_to_power_of_two;
   BaseFloat blackman_coeff;
   bool snip_edges;
+  bool allow_downsample;
   // May be "hamming", "rectangular", "povey", "hanning", "blackman"
   // "povey" is a window I made to be similar to Hamming but to go to zero at the
   // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
@@ -57,7 +58,8 @@ struct FrameExtractionOptions {
       window_type("povey"),
       round_to_power_of_two(true),
       blackman_coeff(0.42),
-      snip_edges(true){ }
+      snip_edges(true),
+      allow_downsample(false) { }
 
   void Register(OptionsItf *opts) {
     opts->Register("sample-frequency", &samp_freq,
@@ -83,6 +85,9 @@ struct FrameExtractionOptions {
                    "completely fit in the file, and the number of frames depends on the "
                    "frame-length.  If false, the number of frames depends only on the "
                    "frame-shift, and we reflect the data at the ends.");
+    opts->Register("allow-downsample", &allow_downsample,
+                   "If true, allow the input waveform to have a higher frequency than"
+                   "the specified --sample-frequency (and we'll downsample).");
   }
   int32 WindowShift() const {
     return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);

diff --git a/src/feat/online-feature-test.cc b/src/feat/online-feature-test.cc
@@ -167,7 +167,7 @@ void TestOnlineMfcc() {
 
   // compute mfcc offline
   Matrix<BaseFloat> mfcc_feats;
-  mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL);  // vtln not supported
+  mfcc.Compute(waveform, 1.0, &mfcc_feats);  // vtln not supported
 
   // compare
   // The test waveform is about 1.44s long, so
@@ -217,7 +217,7 @@ void TestOnlinePlp() {
 
   // compute plp offline
   Matrix<BaseFloat> plp_feats;
-  plp.Compute(waveform, 1.0, &plp_feats, NULL);  // vtln not supported
+  plp.Compute(waveform, 1.0, &plp_feats);  // vtln not supported
 
   // compare
   // The test waveform is about 1.44s long, so
@@ -309,7 +309,7 @@ void TestOnlineAppendFeature() {
 
   // compute mfcc offline
   Matrix<BaseFloat> mfcc_feats;
-  mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL);  // vtln not supported
+  mfcc.Compute(waveform, 1.0, &mfcc_feats);  // vtln not supported
 
   // the parametrization object for 2nd stream plp feature
   PlpOptions plp_op;
@@ -326,7 +326,7 @@ void TestOnlineAppendFeature() {
 
   // compute plp offline
   Matrix<BaseFloat> plp_feats;
-  plp.Compute(waveform, 1.0, &plp_feats, NULL);  // vtln not supported
+  plp.Compute(waveform, 1.0, &plp_feats);  // vtln not supported
 
   // compare
   // The test waveform is about 1.44s long, so

diff --git a/src/feat/resample.cc b/src/feat/resample.cc
@@ -155,7 +155,7 @@ void LinearResample::Resample(const VectorBase<BaseFloat> &input,
   int32 input_dim = input.Dim();
   int64 tot_input_samp = input_sample_offset_ + input_dim,
       tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
-  
+
   KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
 
   output->Resize(tot_output_samp - output_sample_offset_);
@@ -365,5 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
   return filter * window;
 }
 
-
+void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+                        BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
+  KALDI_ASSERT(new_freq < orig_freq);
+  BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
+  int32 lowpass_filter_width = 6;
+  LinearResample signal_downsampler(orig_freq, new_freq,
+                                    lowpass_cutoff, lowpass_filter_width);
+  signal_downsampler.Resample(wave, true, new_wave);
+}
 }  // namespace kaldi