[e_branchformer] simplified e_branchformer (#2484)

* [e_branchformer] simplified ctl * try to fix ut * try to fix ut * fix activation * fix att args * e-branformer works
wenet-e2e · Apr 17, 2024 · 4e9da62 · 4e9da62
1 parent 2b67e6c
commit 4e9da62
Show file tree

Hide file tree

Showing 5 changed files with 138 additions and 290 deletions.
diff --git a/examples/aishell/s0/conf/train_ebranchformer.yaml b/examples/aishell/s0/conf/train_ebranchformer.yaml
@@ -18,7 +18,7 @@ encoder_conf:
     activation_type: 'swish'
     causal: false
     pos_enc_layer_type: 'rel_pos'
-    attention_layer_type: 'rel_selfattn'
+    selfattention_layer_type: 'rel_selfattn'
 
 # decoder related
 decoder: transformer

diff --git a/wenet/branchformer/encoder.py b/wenet/branchformer/encoder.py
@@ -126,9 +126,12 @@ def __init__(
                     WENET_ATTENTION_CLASSES[selfattention_layer_type](
                         *encoder_selfattn_layer_args) if use_attn else None,
                     cgmlp_layer(*cgmlp_layer_args) if use_cgmlp else None,
-                    dropout_rate, merge_method, cgmlp_weight[lnum],
-                    attn_branch_drop_rate[lnum], stochastic_depth_rate[lnum],
-                    gradient_checkpointing) for lnum in range(num_blocks)
+                    dropout_rate,
+                    merge_method,
+                    cgmlp_weight[lnum],
+                    attn_branch_drop_rate[lnum],
+                    stochastic_depth_rate[lnum],
+                ) for lnum in range(num_blocks)
             ])
 
     @torch.jit.ignore(drop=True)

diff --git a/wenet/branchformer/encoder_layer.py b/wenet/branchformer/encoder_layer.py
@@ -46,7 +46,6 @@ def __init__(
         cgmlp_weight: float = 0.5,
         attn_branch_drop_rate: float = 0.0,
         stochastic_depth_rate: float = 0.0,
-        gradient_checkpointing: bool = False,
     ):
         super().__init__()
         assert (attn is not None) or (
@@ -106,7 +105,6 @@ def __init__(
                 raise ValueError(f"unknown merge method: {merge_method}")
         else:
             self.merge_proj = torch.nn.Identity()
-        self.gradient_checkpointing = gradient_checkpointing
 
     def _forward(
         self,