From a6893ec6fb84b276f2324a2555060695b223145a Mon Sep 17 00:00:00 2001
From: Dave Enyeart <enyeart@us.ibm.com>
Date: Wed, 7 Feb 2024 11:56:19 -0500
Subject: [PATCH] Bump github.com/containerd/containerd to 1.6.26 (release-2.5)
 (#4670)

Bump github.com/containerd/containerd from 1.6.18 to 1.6.26
Only move golang.org/x/tools to v0.14.0 (later versions introduce an issue with ./scripts/metrics_doc.sh check)
Only move google.golang.org/protobuf to v1.31.0 (later versions introduce an issue with chaincode build)

Signed-off-by: David Enyeart <enyeart@us.ibm.com>
---
 go.mod                                        |   14 +-
 go.sum                                        |   34 +-
 .../github.com/klauspost/compress/.gitignore  |    7 +
 .../klauspost/compress/.goreleaser.yml        |   18 +-
 .../github.com/klauspost/compress/README.md   |  269 +-
 .../github.com/klauspost/compress/SECURITY.md |   25 +
 .../klauspost/compress/fse/bitwriter.go       |    3 +-
 .../klauspost/compress/fse/compress.go        |   36 +-
 .../klauspost/compress/fse/decompress.go      |    4 +-
 .../klauspost/compress/huff0/bitreader.go     |  130 +-
 .../klauspost/compress/huff0/bitwriter.go     |  136 +-
 .../klauspost/compress/huff0/bytereader.go    |   54 -
 .../klauspost/compress/huff0/compress.go      |  150 +-
 .../klauspost/compress/huff0/decompress.go    |  744 ++-
 .../compress/huff0/decompress_amd64.go        |  226 +
 .../compress/huff0/decompress_amd64.s         |  830 ++++
 .../compress/huff0/decompress_generic.go      |  299 ++
 .../klauspost/compress/huff0/huff0.go         |    6 +-
 .../compress/internal/cpuinfo/cpuinfo.go      |   34 +
 .../internal/cpuinfo/cpuinfo_amd64.go         |   11 +
 .../compress/internal/cpuinfo/cpuinfo_amd64.s |   36 +
 .../compress/internal/snapref/encode_other.go |   38 +-
 vendor/github.com/klauspost/compress/s2sx.mod |    2 +-
 .../klauspost/compress/zstd/README.md         |  174 +-
 .../klauspost/compress/zstd/bitreader.go      |   46 +-
 .../klauspost/compress/zstd/bitwriter.go      |  101 +-
 .../klauspost/compress/zstd/blockdec.go       |  526 +--
 .../klauspost/compress/zstd/blockenc.go       |  146 +-
 .../klauspost/compress/zstd/bytebuf.go        |   27 +-
 .../klauspost/compress/zstd/bytereader.go     |    6 -
 .../klauspost/compress/zstd/decodeheader.go   |  147 +-
 .../klauspost/compress/zstd/decoder.go        |  685 ++-
 .../compress/zstd/decoder_options.go          |  103 +-
 .../klauspost/compress/zstd/dict.go           |  428 +-
 .../klauspost/compress/zstd/enc_base.go       |   17 +-
 .../klauspost/compress/zstd/enc_best.go       |  312 +-
 .../klauspost/compress/zstd/enc_better.go     |   60 +-
 .../klauspost/compress/zstd/enc_dfast.go      |   35 +-
 .../klauspost/compress/zstd/enc_fast.go       |  176 +-
 .../klauspost/compress/zstd/encoder.go        |  164 +-
 .../compress/zstd/encoder_options.go          |   61 +-
 .../klauspost/compress/zstd/framedec.go       |  342 +-
 .../klauspost/compress/zstd/frameenc.go       |    6 +-
 .../klauspost/compress/zstd/fse_decoder.go    |  128 +-
 .../compress/zstd/fse_decoder_amd64.go        |   65 +
 .../compress/zstd/fse_decoder_amd64.s         |  126 +
 .../compress/zstd/fse_decoder_generic.go      |   73 +
 .../klauspost/compress/zstd/fse_encoder.go    |   28 +-
 .../klauspost/compress/zstd/hash.go           |    6 -
 .../klauspost/compress/zstd/history.go        |   67 +-
 .../compress/zstd/internal/xxhash/README.md   |   49 +-
 .../compress/zstd/internal/xxhash/xxhash.go   |   47 +-
 .../zstd/internal/xxhash/xxhash_amd64.go      |   12 -
 .../zstd/internal/xxhash/xxhash_amd64.s       |  337 +-
 .../zstd/internal/xxhash/xxhash_arm64.s       |  184 +
 .../zstd/internal/xxhash/xxhash_asm.go        |   16 +
 .../zstd/internal/xxhash/xxhash_other.go      |   23 +-
 .../klauspost/compress/zstd/matchlen_amd64.go |   16 +
 .../klauspost/compress/zstd/matchlen_amd64.s  |   68 +
 .../compress/zstd/matchlen_generic.go         |   33 +
 .../klauspost/compress/zstd/seqdec.go         |  343 +-
 .../klauspost/compress/zstd/seqdec_amd64.go   |  394 ++
 .../klauspost/compress/zstd/seqdec_amd64.s    | 4151 +++++++++++++++++
 .../klauspost/compress/zstd/seqdec_generic.go |  237 +
 .../klauspost/compress/zstd/snappy.go         |    5 +-
 .../github.com/klauspost/compress/zstd/zip.go |   69 +-
 .../klauspost/compress/zstd/zstd.go           |   55 +-
 .../image-spec/specs-go/v1/config.go          |   29 +-
 .../image-spec/specs-go/v1/descriptor.go      |   22 +-
 .../image-spec/specs-go/v1/index.go           |    8 +-
 .../image-spec/specs-go/v1/layout.go          |    6 +-
 .../image-spec/specs-go/v1/manifest.go        |   10 +-
 .../image-spec/specs-go/v1/mediatype.go       |   38 +-
 .../image-spec/specs-go/version.go            |    6 +-
 .../genproto/{ => googleapis/rpc}/LICENSE     |    0
 vendor/google.golang.org/grpc/README.md       |   60 +-
 .../grpc/attributes/attributes.go             |   59 +-
 .../grpc/balancer/balancer.go                 |   62 +-
 .../grpc/balancer/base/balancer.go            |   22 +-
 ...r_conn_wrappers.go => balancer_wrapper.go} |  337 +-
 .../grpc_binarylog_v1/binarylog.pb.go         |    4 +-
 vendor/google.golang.org/grpc/call.go         |   11 +-
 vendor/google.golang.org/grpc/clientconn.go   |  696 ++-
 vendor/google.golang.org/grpc/codec.go        |    8 +-
 vendor/google.golang.org/grpc/codes/codes.go  |    8 +-
 .../google.golang.org/grpc/credentials/tls.go |   75 +-
 vendor/google.golang.org/grpc/dialoptions.go  |   78 +-
 .../grpc/encoding/encoding.go                 |   17 +-
 .../grpc/encoding/proto/proto.go              |    4 +-
 .../grpc/grpclog/component.go                 |   40 +-
 .../google.golang.org/grpc/grpclog/grpclog.go |   30 +-
 .../google.golang.org/grpc/grpclog/logger.go  |   30 +-
 .../grpc/grpclog/loggerv2.go                  |   56 +-
 .../google.golang.org/grpc/health/client.go   |    2 +-
 .../grpc/health/grpc_health_v1/health.pb.go   |    2 +-
 .../health/grpc_health_v1/health_grpc.pb.go   |   22 +-
 vendor/google.golang.org/grpc/idle.go         |  287 --
 vendor/google.golang.org/grpc/interceptor.go  |   12 +-
 .../grpc/internal/backoff/backoff.go          |   36 +
 .../balancer/gracefulswitch/gracefulswitch.go |   59 +-
 .../grpc/internal/balancerload/load.go        |    4 +-
 .../grpc/internal/binarylog/method_logger.go  |    4 +-
 .../grpc/internal/buffer/unbounded.go         |   57 +-
 .../grpc/internal/channelz/funcs.go           |   76 +-
 .../grpc/internal/channelz/logging.go         |   12 +-
 .../grpc/internal/channelz/types.go           |    5 +
 .../grpc/internal/channelz/util_linux.go      |    2 +-
 .../grpc/internal/channelz/util_nonlinux.go   |    2 +-
 .../grpc/internal/credentials/credentials.go  |    8 +-
 .../grpc/internal/envconfig/envconfig.go      |   11 +-
 .../grpc/internal/envconfig/xds.go            |   39 -
 .../grpc/internal/experimental.go             |   28 +
 .../grpc/internal/grpclog/grpclog.go          |   40 +-
 .../grpc/internal/grpclog/prefixLogger.go     |    8 +-
 .../grpc/internal/grpcrand/grpcrand.go        |    7 +
 .../internal/grpcsync/callback_serializer.go  |   75 +-
 .../grpc/internal/grpcsync/pubsub.go          |  121 +
 .../grpc/internal/idle/idle.go                |  278 ++
 .../grpc/internal/internal.go                 |   74 +-
 .../grpc/internal/metadata/metadata.go        |    2 +-
 .../grpc/internal/pretty/pretty.go            |    2 +-
 .../grpc/internal/resolver/config_selector.go |    4 +-
 .../internal/resolver/dns/dns_resolver.go     |  137 +-
 .../resolver/dns/internal/internal.go         |   70 +
 .../grpc/internal/resolver/unix/unix.go       |    4 +
 .../grpc/internal/status/status.go            |   36 +-
 .../grpc/internal/tcp_keepalive_others.go     |   29 +
 .../grpc/internal/tcp_keepalive_unix.go       |   54 +
 .../grpc/internal/tcp_keepalive_windows.go    |   54 +
 .../grpc/internal/transport/controlbuf.go     |   16 +-
 .../grpc/internal/transport/handler_server.go |   76 +-
 .../grpc/internal/transport/http2_client.go   |   78 +-
 .../grpc/internal/transport/http2_server.go   |  130 +-
 .../grpc/internal/transport/http_util.go      |   77 +-
 .../grpc/internal/transport/proxy.go          |   14 +-
 .../grpc/internal/transport/transport.go      |   39 +-
 .../grpc/metadata/metadata.go                 |   31 +-
 vendor/google.golang.org/grpc/peer/peer.go    |    2 +
 .../google.golang.org/grpc/picker_wrapper.go  |   53 +-
 vendor/google.golang.org/grpc/pickfirst.go    |   76 +-
 vendor/google.golang.org/grpc/preloader.go    |    2 +-
 .../grpc/resolver/dns/dns_resolver.go         |   36 +
 vendor/google.golang.org/grpc/resolver/map.go |  123 +-
 .../grpc/resolver/resolver.go                 |  100 +-
 .../grpc/resolver_conn_wrapper.go             |  239 -
 .../grpc/resolver_wrapper.go                  |  197 +
 vendor/google.golang.org/grpc/rpc_util.go     |   52 +-
 vendor/google.golang.org/grpc/server.go       |  485 +-
 .../grpc/shared_buffer_pool.go                |  154 +
 vendor/google.golang.org/grpc/stats/stats.go  |   14 +-
 .../google.golang.org/grpc/status/status.go   |   14 +-
 vendor/google.golang.org/grpc/stream.go       |  134 +-
 vendor/google.golang.org/grpc/tap/tap.go      |    6 +
 vendor/google.golang.org/grpc/trace.go        |    6 +-
 vendor/google.golang.org/grpc/version.go      |    2 +-
 vendor/google.golang.org/grpc/vet.sh          |  172 +-
 vendor/modules.txt                            |   28 +-
 157 files changed, 13520 insertions(+), 5415 deletions(-)
 create mode 100644 vendor/github.com/klauspost/compress/SECURITY.md
 delete mode 100644 vendor/github.com/klauspost/compress/huff0/bytereader.go
 create mode 100644 vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
 create mode 100644 vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
 create mode 100644 vendor/github.com/klauspost/compress/huff0/decompress_generic.go
 create mode 100644 vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go
 create mode 100644 vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
 create mode 100644 vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
 create mode 100644 vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
 create mode 100644 vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
 create mode 100644 vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
 delete mode 100644 vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
 create mode 100644 vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
 create mode 100644 vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
 create mode 100644 vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
 create mode 100644 vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
 create mode 100644 vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
 create mode 100644 vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
 create mode 100644 vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
 create mode 100644 vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
 rename vendor/google.golang.org/genproto/{ => googleapis/rpc}/LICENSE (100%)
 rename vendor/google.golang.org/grpc/{balancer_conn_wrappers.go => balancer_wrapper.go} (52%)
 delete mode 100644 vendor/google.golang.org/grpc/idle.go
 create mode 100644 vendor/google.golang.org/grpc/internal/experimental.go
 create mode 100644 vendor/google.golang.org/grpc/internal/grpcsync/pubsub.go
 create mode 100644 vendor/google.golang.org/grpc/internal/idle/idle.go
 create mode 100644 vendor/google.golang.org/grpc/internal/resolver/dns/internal/internal.go
 create mode 100644 vendor/google.golang.org/grpc/internal/tcp_keepalive_others.go
 create mode 100644 vendor/google.golang.org/grpc/internal/tcp_keepalive_unix.go
 create mode 100644 vendor/google.golang.org/grpc/internal/tcp_keepalive_windows.go
 create mode 100644 vendor/google.golang.org/grpc/resolver/dns/dns_resolver.go
 delete mode 100644 vendor/google.golang.org/grpc/resolver_conn_wrapper.go
 create mode 100644 vendor/google.golang.org/grpc/resolver_wrapper.go
 create mode 100644 vendor/google.golang.org/grpc/shared_buffer_pool.go

diff --git a/go.mod b/go.mod
index 7eae2ef3ba0..aa6f74fa2b3 100644
--- a/go.mod
+++ b/go.mod
@@ -42,7 +42,7 @@ require (
 	go.uber.org/zap v1.19.0
 	golang.org/x/crypto v0.18.0
 	golang.org/x/tools v0.14.0
-	google.golang.org/grpc v1.56.3
+	google.golang.org/grpc v1.61.0
 	gopkg.in/alecthomas/kingpin.v2 v2.2.6
 	gopkg.in/yaml.v2 v2.4.0
 )
@@ -57,12 +57,13 @@ require (
 	github.com/DataDog/zstd v1.4.5 // indirect
 	github.com/IBM/mathlib v0.0.0-20220112091634-0a7378db6912 // indirect
 	github.com/Microsoft/go-winio v0.6.1 // indirect
+	github.com/Microsoft/hcsshim v0.11.4 // indirect
 	github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
 	github.com/alecthomas/units v0.0.0-20210912230133-d1bdfacee922 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
 	github.com/consensys/gnark-crypto v0.6.0 // indirect
-	github.com/containerd/containerd v1.6.18 // indirect
+	github.com/containerd/containerd v1.6.26 // indirect
 	github.com/docker/docker v24.0.7+incompatible // indirect
 	github.com/docker/go-connections v0.4.0 // indirect
 	github.com/docker/go-units v0.5.0 // indirect
@@ -79,7 +80,7 @@ require (
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/hyperledger/fabric-amcl v0.0.0-20210603140002-2670f91851c8 // indirect
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
-	github.com/klauspost/compress v1.13.6 // indirect
+	github.com/klauspost/compress v1.17.6 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/magiconair/properties v1.8.1 // indirect
 	github.com/mattn/go-runewidth v0.0.4 // indirect
@@ -90,7 +91,7 @@ require (
 	github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect
 	github.com/morikuni/aec v1.0.0 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
-	github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
+	github.com/opencontainers/image-spec v1.1.0-rc6 // indirect
 	github.com/opencontainers/runc v1.1.12 // indirect
 	github.com/pelletier/go-toml v1.9.5 // indirect
 	github.com/pierrec/lz4 v2.6.0+incompatible // indirect
@@ -109,11 +110,12 @@ require (
 	go.uber.org/atomic v1.7.0 // indirect
 	go.uber.org/goleak v1.1.12 // indirect
 	go.uber.org/multierr v1.6.0 // indirect
-	golang.org/x/mod v0.14.0 // indirect
+	golang.org/x/mod v0.15.0 // indirect
 	golang.org/x/net v0.20.0 // indirect
+	golang.org/x/sync v0.6.0 // indirect
 	golang.org/x/sys v0.16.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
-	google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect
 	gopkg.in/ini.v1 v1.51.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
index dd3a26f1c8a..d34ddfb8b58 100644
--- a/go.sum
+++ b/go.sum
@@ -50,7 +50,8 @@ github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible h1
 github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
 github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
 github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
-github.com/Microsoft/hcsshim v0.9.6 h1:VwnDOgLeoi2du6dAznfmspNqTiwczvjv4K7NxuY9jsY=
+github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8=
+github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
 github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
 github.com/Shopify/sarama v1.20.1 h1:Bb0h3I++r4eX333Y0uZV2vwUXepJbt6ig05TUU1qt9I=
@@ -113,8 +114,8 @@ github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE
 github.com/consensys/bavard v0.1.8-0.20210915155054-088da2f7f54a/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI=
 github.com/consensys/gnark-crypto v0.6.0 h1:K48rcIJaX2YkQT2k51EiHIxTynpHsOLHF1FVV+0aS7w=
 github.com/consensys/gnark-crypto v0.6.0/go.mod h1:PicAZJP763+7N9LZFfj+MquTXq98pwjD6l8Ry8WdHSU=
-github.com/containerd/containerd v1.6.18 h1:qZbsLvmyu+Vlty0/Ex5xc0z2YtKpIsb5n45mAMI+2Ns=
-github.com/containerd/containerd v1.6.18/go.mod h1:1RdCUu95+gc2v9t3IL+zIlpClSmew7/0YS8O5eQZrOw=
+github.com/containerd/containerd v1.6.26 h1:VVfrE6ZpyisvB1fzoY8Vkiq4sy+i5oF4uk7zu03RaHs=
+github.com/containerd/containerd v1.6.26/go.mod h1:I4TRdsdoo5MlKob5khDJS2EPT1l1oMNaE2MBm6FrwxM=
 github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
 github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
 github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
@@ -240,8 +241,8 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
@@ -338,8 +339,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
-github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
+github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
@@ -434,8 +435,8 @@ github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9
 github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 h1:rc3tiVYb5z54aKaDfakKn0dDjIyPpTtszkjuMzyt7ec=
-github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
+github.com/opencontainers/image-spec v1.1.0-rc6 h1:XDqvyKsJEbRtATzkgItUqBA7QHk58yxX1Ov9HERHNqU=
+github.com/opencontainers/image-spec v1.1.0-rc6/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
 github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
 github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
 github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis=
@@ -665,8 +666,8 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
-golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -727,7 +728,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -915,8 +917,8 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc
 google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A=
-google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM=
@@ -934,8 +936,8 @@ google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKa
 google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
 google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
 google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc v1.56.3 h1:8I4C0Yq1EjstUzUJzpcRVbuYA2mODtEmpWiQoN/b2nc=
-google.golang.org/grpc v1.56.3/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s=
+google.golang.org/grpc v1.61.0 h1:TOvOcuXn30kRao+gfcvsebNEa5iZIiLkisYEkf7R7o0=
+google.golang.org/grpc v1.61.0/go.mod h1:VUbo7IFqmF1QtCAstipjG0GIoq49KvMe9+h1jFLBNJs=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -980,7 +982,7 @@ gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
-gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0=
+gotest.tools/v3 v3.5.0 h1:Ljk6PdHdOhAb5aDMWXjDLMMhph+BpztA4v1QdqEW2eY=
 honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore
index b35f8449bf2..d31b3781527 100644
--- a/vendor/github.com/klauspost/compress/.gitignore
+++ b/vendor/github.com/klauspost/compress/.gitignore
@@ -23,3 +23,10 @@ _testmain.go
 *.test
 *.prof
 /s2/cmd/_s2sx/sfx-exe
+
+# Linux perf files
+perf.data
+perf.data.old
+
+# gdb history
+.gdb_history
diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml
index c9014ce1da2..a22953805c6 100644
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@@ -88,16 +88,7 @@ builds:
 archives:
   -
     id: s2-binaries
-    name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
-    replacements:
-      aix: AIX
-      darwin: OSX
-      linux: Linux
-      windows: Windows
-      386: i386
-      amd64: x86_64
-      freebsd: FreeBSD
-      netbsd: NetBSD
+    name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
     format_overrides:
       - goos: windows
         format: zip
@@ -121,7 +112,7 @@ changelog:
 
 nfpms:
   -
-    file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
     vendor: Klaus Post
     homepage: https://github.com/klauspost/compress
     maintainer: Klaus Post <klauspost@gmail.com>
@@ -130,8 +121,3 @@ nfpms:
     formats:
       - deb
       - rpm
-    replacements:
-      darwin: Darwin
-      linux: Linux
-      freebsd: FreeBSD
-      amd64: x86_64
diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md
index 3429879eb69..ac281fb7205 100644
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -9,7 +9,6 @@ This package provides various compression algorithms.
 * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding.
 * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently.
 * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation.
-* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here.
 
 [![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories)
 [![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml)
@@ -17,6 +16,244 @@ This package provides various compression algorithms.
 
 # changelog
 
+* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5)
+	* flate: Fix reset with dictionary on custom window encodes by @klauspost in https://github.com/klauspost/compress/pull/912
+	* zstd: Add Frame header encoding and stripping by @klauspost in https://github.com/klauspost/compress/pull/908
+	* zstd: Limit better/best default window to 8MB by @klauspost in https://github.com/klauspost/compress/pull/913
+	* zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910
+	* s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917
+https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918
+
+* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4)
+	* huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887
+	* huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886
+	* gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892
+	* gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890
+	* gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891
+
+* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3)
+	* fse: Fix max header size https://github.com/klauspost/compress/pull/881
+	* zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877
+	* gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883
+
+* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
+	* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
+
+* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
+	* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
+	* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
+	* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
+
+* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
+	* Add experimental dictionary builder  https://github.com/klauspost/compress/pull/853
+	* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
+	* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
+	* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
+	* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
+	* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
+   
+* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
+	* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
+	* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
+
+* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6)
+	* zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806
+	* zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824
+	* gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815
+	* s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663
+
+* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5)
+	* zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802
+	* gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804
+
+* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4)
+	* zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784
+	* zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792
+	* zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785
+	* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
+	* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
+	* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
+	* gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
+
+* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
+	* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
+	* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767
+	* s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766
+	* zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773
+	* huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774
+
+* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0)
+	* s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support.  https://github.com/klauspost/compress/pull/685
+	* s2: Add Compression Size Estimate.  https://github.com/klauspost/compress/pull/752
+	* s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755
+	* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748
+	* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
+	* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
+
+<details>
+	<summary>See changes to v1.15.x</summary>
+	
+* Jan 21st, 2023 (v1.15.15)
+	* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
+	* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
+	* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745
+	* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740
+
+* Jan 3rd, 2023 (v1.15.14)
+
+	* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718
+	* zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720
+	* export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722
+	* s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723
+
+* Dec 11, 2022 (v1.15.13)
+	* zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder  https://github.com/klauspost/compress/pull/691
+	* zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708
+
+* Oct 26, 2022 (v1.15.12)
+
+	* zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680
+	* gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683
+
+* Sept 26, 2022 (v1.15.11)
+
+	* flate: Improve level 1-3 compression  https://github.com/klauspost/compress/pull/678
+	* zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677
+	* zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668
+	* zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667
+
+* Sept 16, 2022 (v1.15.10)
+
+	* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649
+	* Add Go 1.19 - deprecate Go 1.16  https://github.com/klauspost/compress/pull/651
+	* flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656
+	* zstd: Improve "better" compresssion  https://github.com/klauspost/compress/pull/657
+	* s2: Improve "best" compression https://github.com/klauspost/compress/pull/658
+	* s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635
+	* s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646
+	* Use arrays for constant size copies https://github.com/klauspost/compress/pull/659
+
+* July 21, 2022 (v1.15.9)
+
+	* zstd: Fix decoder crash on amd64 (no BMI) on invalid input https://github.com/klauspost/compress/pull/645
+	* zstd: Disable decoder extended memory copies (amd64) due to possible crashes https://github.com/klauspost/compress/pull/644
+	* zstd: Allow single segments up to "max decoded size" by @klauspost in https://github.com/klauspost/compress/pull/643
+
+* July 13, 2022 (v1.15.8)
+
+	* gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641
+	* s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638
+	* zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636
+	* zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637
+	* huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634
+	* zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640
+	* gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639
+
+* June 29, 2022 (v1.15.7)
+
+	* s2: Fix absolute forward seeks  https://github.com/klauspost/compress/pull/633
+	* zip: Merge upstream  https://github.com/klauspost/compress/pull/631
+	* zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624
+	* zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598
+	* flate: Faster histograms  https://github.com/klauspost/compress/pull/620
+	* deflate: Use compound hcode  https://github.com/klauspost/compress/pull/622
+
+* June 3, 2022 (v1.15.6)
+	* s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613
+	* s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611
+	* zstd: Always use configured block size https://github.com/klauspost/compress/pull/605
+	* zstd: Fix incorrect hash table placement for dict encoding in default https://github.com/klauspost/compress/pull/606
+	* zstd: Apply default config to ZipDecompressor without options https://github.com/klauspost/compress/pull/608
+	* gzhttp: Exclude more common archive formats https://github.com/klauspost/compress/pull/612
+	* s2: Add ReaderIgnoreCRC https://github.com/klauspost/compress/pull/609
+	* s2: Remove sanity load on index creation https://github.com/klauspost/compress/pull/607
+	* snappy: Use dedicated function for scoring https://github.com/klauspost/compress/pull/614
+	* s2c+s2d: Use official snappy framed extension https://github.com/klauspost/compress/pull/610
+
+* May 25, 2022 (v1.15.5)
+	* s2: Add concurrent stream decompression https://github.com/klauspost/compress/pull/602
+	* s2: Fix final emit oob read crash on amd64 https://github.com/klauspost/compress/pull/601
+	* huff0: asm implementation of Decompress1X by @WojciechMula https://github.com/klauspost/compress/pull/596
+	* zstd: Use 1 less goroutine for stream decoding https://github.com/klauspost/compress/pull/588
+	* zstd: Copy literal in 16 byte blocks when possible https://github.com/klauspost/compress/pull/592
+	* zstd: Speed up when WithDecoderLowmem(false) https://github.com/klauspost/compress/pull/599
+	* zstd: faster next state update in BMI2 version of decode by @WojciechMula in https://github.com/klauspost/compress/pull/593
+	* huff0: Do not check max size when reading table. https://github.com/klauspost/compress/pull/586
+	* flate: Inplace hashing for level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/590
+
+
+* May 11, 2022 (v1.15.4)
+	* huff0: decompress directly into output by @WojciechMula in [#577](https://github.com/klauspost/compress/pull/577)
+	* inflate: Keep dict on stack [#581](https://github.com/klauspost/compress/pull/581)
+	* zstd: Faster decoding memcopy in asm [#583](https://github.com/klauspost/compress/pull/583)
+	* zstd: Fix ignored crc [#580](https://github.com/klauspost/compress/pull/580)
+
+* May 5, 2022 (v1.15.3)
+	* zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572)
+	* s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575)
+
+* Apr 26, 2022 (v1.15.2)
+	* zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster.  [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537)
+	* zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539)
+	* s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555)
+	* Minimum version is Go 1.16, added CI test on 1.18.
+
+* Mar 11, 2022 (v1.15.1)
+	* huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512)
+	* zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514)
+	* zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520)
+	* zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521)
+	* zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523)
+
+* Mar 3, 2022 (v1.15.0)
+	* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
+	* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
+	* huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507)
+	* flate: Inline literal emission by @klauspost in [#509](https://github.com/klauspost/compress/pull/509)
+	* gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400)
+	* gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510)
+
+Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
+
+Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
+
+While the release has been extensively tested, it is recommended to testing when upgrading.
+
+</details>
+
+<details>
+	<summary>See changes to v1.14.x</summary>
+	
+* Feb 22, 2022 (v1.14.4)
+	* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
+	* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
+	* zip: don't read data descriptor early by @saracen in [#501](https://github.com/klauspost/compress/pull/501)  #501
+	* huff0: Use static decompression buffer up to 30% faster by @klauspost in [#499](https://github.com/klauspost/compress/pull/499) [#500](https://github.com/klauspost/compress/pull/500)
+
+* Feb 17, 2022 (v1.14.3)
+	* flate: Improve fastest levels compression speed ~10% more throughput. [#482](https://github.com/klauspost/compress/pull/482) [#489](https://github.com/klauspost/compress/pull/489) [#490](https://github.com/klauspost/compress/pull/490) [#491](https://github.com/klauspost/compress/pull/491) [#494](https://github.com/klauspost/compress/pull/494)  [#478](https://github.com/klauspost/compress/pull/478)
+	* flate: Faster decompression speed, ~5-10%. [#483](https://github.com/klauspost/compress/pull/483)
+	* s2: Faster compression with Go v1.18 and amd64 microarch level 3+. [#484](https://github.com/klauspost/compress/pull/484) [#486](https://github.com/klauspost/compress/pull/486)
+
+* Jan 25, 2022 (v1.14.2)
+	* zstd: improve header decoder by @dsnet  [#476](https://github.com/klauspost/compress/pull/476)
+	* zstd: Add bigger default blocks  [#469](https://github.com/klauspost/compress/pull/469)
+	* zstd: Remove unused decompression buffer [#470](https://github.com/klauspost/compress/pull/470)
+	* zstd: Fix logically dead code by @ningmingxiao [#472](https://github.com/klauspost/compress/pull/472)
+	* flate: Improve level 7-9 [#471](https://github.com/klauspost/compress/pull/471) [#473](https://github.com/klauspost/compress/pull/473)
+	* zstd: Add noasm tag for xxhash [#475](https://github.com/klauspost/compress/pull/475)
+
+* Jan 11, 2022 (v1.14.1)
+	* s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462)
+	* flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458)
+	* zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468)
+	* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
+	* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
+</details>
+
+<details>
+	<summary>See changes to v1.13.x</summary>
+	
 * Aug 30, 2021 (v1.13.5)
 	* gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425)
 	* s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413)
@@ -45,7 +282,12 @@ This package provides various compression algorithms.
 	* Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors.
 	* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
 	* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
+</details>
+
 
+<details>
+	<summary>See changes to v1.12.x</summary>
+	
 * May 25, 2021 (v1.12.3)
 	* deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374)
 	* deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375)
@@ -67,9 +309,10 @@ This package provides various compression algorithms.
 	* s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352)
 	* zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346)
 	* s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349)
+</details>
 
 <details>
-	<summary>See changes prior to v1.12.1</summary>
+	<summary>See changes to v1.11.x</summary>
 	
 * Mar 26, 2021 (v1.11.13)
 	* zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345)
@@ -128,7 +371,7 @@ This package provides various compression algorithms.
 </details>
 
 <details>
-	<summary>See changes prior to v1.11.0</summary>
+	<summary>See changes to v1.10.x</summary>
  
 * July 8, 2020 (v1.10.11) 
 	* zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278)
@@ -290,11 +533,6 @@ This package provides various compression algorithms.
 
 # deflate usage
 
-* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/).
-* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/).
-* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/).
-* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/)
-
 The packages are drop-in replacements for standard libraries. Simply replace the import path to use them:
 
 | old import         | new import                              | Documentation
@@ -316,6 +554,8 @@ Memory usage is typically 1MB for a Writer. stdlib is in the same range.
 If you expect to have a lot of concurrently allocated Writers consider using 
 the stateless compress described below.
 
+For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing).
+
 # Stateless compression
 
 This package offers stateless compression as a special option for gzip/deflate. 
@@ -334,7 +574,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S
 
 A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
 
-```
+```go
 	// replace 'ioutil.Discard' with your output.
 	gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
 	if err != nil {
@@ -432,6 +672,17 @@ For more information see my blog post on [Fast Linear Time Compression](http://b
 
 This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip.
 
+# Other packages
+
+Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code):
+
+* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
+* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
+* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
+* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
+* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
+* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
+* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
 
 # license
 
diff --git a/vendor/github.com/klauspost/compress/SECURITY.md b/vendor/github.com/klauspost/compress/SECURITY.md
new file mode 100644
index 00000000000..ca6685e2b72
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/SECURITY.md
@@ -0,0 +1,25 @@
+# Security Policy
+
+## Supported Versions
+
+Security updates are applied only to the latest release.
+
+## Vulnerability Definition
+
+A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability.
+
+Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently.
+
+Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue.
+
+It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability.
+
+Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround.
+
+## Reporting a Vulnerability
+
+If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
+
+Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that.
+
+This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base.
diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go
index 43e463611b1..e82fa3bb7b6 100644
--- a/vendor/github.com/klauspost/compress/fse/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go
@@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() {
 
 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
 }
 
 // reset and continue writing by appending to out.
diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go
index 6f341914c67..074018d8f94 100644
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error {
 		c1.encodeZero(tt[src[ip-2]])
 		ip -= 2
 	}
+	src = src[:ip]
 
 	// Main compression loop.
 	switch {
 	case !s.zeroBits && s.actualTableLog <= 8:
 		// We can encode 4 symbols without requiring a flush.
 		// We do not need to check if any output is 0 bits.
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encode(tt[v0])
 			c1.encode(tt[v1])
 			c2.encode(tt[v2])
 			c1.encode(tt[v3])
-			ip -= 4
 		}
 	case !s.zeroBits:
 		// We do not need to check if any output is 0 bits.
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encode(tt[v0])
 			c1.encode(tt[v1])
 			s.bw.flush32()
 			c2.encode(tt[v2])
 			c1.encode(tt[v3])
-			ip -= 4
 		}
 	case s.actualTableLog <= 8:
 		// We can encode 4 symbols without requiring a flush
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encodeZero(tt[v0])
 			c1.encodeZero(tt[v1])
 			c2.encodeZero(tt[v2])
 			c1.encodeZero(tt[v3])
-			ip -= 4
 		}
 	default:
-		for ip >= 4 {
+		for ; len(src) >= 4; src = src[:len(src)-4] {
 			s.bw.flush32()
-			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
 			c2.encodeZero(tt[v0])
 			c1.encodeZero(tt[v1])
 			s.bw.flush32()
 			c2.encodeZero(tt[v2])
 			c1.encodeZero(tt[v3])
-			ip -= 4
 		}
 	}
 
@@ -202,7 +199,8 @@ func (s *Scratch) compress(src []byte) error {
 	c2.flush(s.actualTableLog)
 	c1.flush(s.actualTableLog)
 
-	return s.bw.close()
+	s.bw.close()
+	return nil
 }
 
 // writeCount will write the normalized histogram count to header.
@@ -214,7 +212,7 @@ func (s *Scratch) writeCount() error {
 		previous0 bool
 		charnum   uint16
 
-		maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
+		maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3
 
 		// Write Table Size
 		bitStream = uint32(tableLog - minTablelog)
@@ -459,15 +457,17 @@ func (s *Scratch) countSimple(in []byte) (max int) {
 	for _, v := range in {
 		s.count[v]++
 	}
-	m := uint32(0)
+	m, symlen := uint32(0), s.symbolLen
 	for i, v := range s.count[:] {
+		if v == 0 {
+			continue
+		}
 		if v > m {
 			m = v
 		}
-		if v > 0 {
-			s.symbolLen = uint16(i) + 1
-		}
+		symlen = uint16(i) + 1
 	}
+	s.symbolLen = symlen
 	return int(m)
 }
 
diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go
index 926f5f15356..cc05d0f7ea9 100644
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error {
 // If the buffer is over-read an error is returned.
 func (s *Scratch) decompress() error {
 	br := &s.bits
-	br.init(s.br.unread())
+	if err := br.init(s.br.unread()); err != nil {
+		return err
+	}
 
 	var s1, s2 decoder
 	// Initialize and decode first state and symbol.
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
index a4979e8868a..e36d9742f94 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -8,115 +8,10 @@ package huff0
 import (
 	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
 )
 
-// bitReader reads a bitstream in reverse.
-// The last set bit indicates the start of the stream and is used
-// for aligning the input.
-type bitReader struct {
-	in       []byte
-	off      uint // next byte to read is at in[off - 1]
-	value    uint64
-	bitsRead uint8
-}
-
-// init initializes and resets the bit reader.
-func (b *bitReader) init(in []byte) error {
-	if len(in) < 1 {
-		return errors.New("corrupt stream: too short")
-	}
-	b.in = in
-	b.off = uint(len(in))
-	// The highest bit of the last byte indicates where to start
-	v := in[len(in)-1]
-	if v == 0 {
-		return errors.New("corrupt stream, did not find end of stream")
-	}
-	b.bitsRead = 64
-	b.value = 0
-	if len(in) >= 8 {
-		b.fillFastStart()
-	} else {
-		b.fill()
-		b.fill()
-	}
-	b.bitsRead += 8 - uint8(highBit32(uint32(v)))
-	return nil
-}
-
-// peekBitsFast requires that at least one bit is requested every time.
-// There are no checks if the buffer is filled.
-func (b *bitReader) peekBitsFast(n uint8) uint16 {
-	const regMask = 64 - 1
-	v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
-	return v
-}
-
-// fillFast() will make sure at least 32 bits are available.
-// There must be at least 4 bytes available.
-func (b *bitReader) fillFast() {
-	if b.bitsRead < 32 {
-		return
-	}
-
-	// 2 bounds checks.
-	v := b.in[b.off-4 : b.off]
-	v = v[:4]
-	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
-	b.value = (b.value << 32) | uint64(low)
-	b.bitsRead -= 32
-	b.off -= 4
-}
-
-func (b *bitReader) advance(n uint8) {
-	b.bitsRead += n
-}
-
-// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
-func (b *bitReader) fillFastStart() {
-	// Do single re-slice to avoid bounds checks.
-	b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
-	b.bitsRead = 0
-	b.off -= 8
-}
-
-// fill() will make sure at least 32 bits are available.
-func (b *bitReader) fill() {
-	if b.bitsRead < 32 {
-		return
-	}
-	if b.off > 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
-		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
-		b.value = (b.value << 32) | uint64(low)
-		b.bitsRead -= 32
-		b.off -= 4
-		return
-	}
-	for b.off > 0 {
-		b.value = (b.value << 8) | uint64(b.in[b.off-1])
-		b.bitsRead -= 8
-		b.off--
-	}
-}
-
-// finished returns true if all bits have been read from the bit stream.
-func (b *bitReader) finished() bool {
-	return b.off == 0 && b.bitsRead >= 64
-}
-
-// close the bitstream and returns an error if out-of-buffer reads occurred.
-func (b *bitReader) close() error {
-	// Release reference.
-	b.in = nil
-	if b.bitsRead > 64 {
-		return io.ErrUnexpectedEOF
-	}
-	return nil
-}
-
 // bitReader reads a bitstream in reverse.
 // The last set bit indicates the start of the stream and is used
 // for aligning the input.
@@ -172,7 +67,6 @@ func (b *bitReaderBytes) fillFast() {
 
 	// 2 bounds checks.
 	v := b.in[b.off-4 : b.off]
-	v = v[:4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value |= uint64(low) << (b.bitsRead - 32)
 	b.bitsRead -= 32
@@ -193,8 +87,7 @@ func (b *bitReaderBytes) fill() {
 		return
 	}
 	if b.off > 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+		v := b.in[b.off-4 : b.off]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value |= uint64(low) << (b.bitsRead - 32)
 		b.bitsRead -= 32
@@ -213,10 +106,17 @@ func (b *bitReaderBytes) finished() bool {
 	return b.off == 0 && b.bitsRead >= 64
 }
 
+func (b *bitReaderBytes) remaining() uint {
+	return b.off*8 + uint(64-b.bitsRead)
+}
+
 // close the bitstream and returns an error if out-of-buffer reads occurred.
 func (b *bitReaderBytes) close() error {
 	// Release reference.
 	b.in = nil
+	if b.remaining() > 0 {
+		return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining())
+	}
 	if b.bitsRead > 64 {
 		return io.ErrUnexpectedEOF
 	}
@@ -277,7 +177,6 @@ func (b *bitReaderShifted) fillFast() {
 
 	// 2 bounds checks.
 	v := b.in[b.off-4 : b.off]
-	v = v[:4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
 	b.bitsRead -= 32
@@ -298,8 +197,7 @@ func (b *bitReaderShifted) fill() {
 		return
 	}
 	if b.off > 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+		v := b.in[b.off-4 : b.off]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
 		b.bitsRead -= 32
@@ -313,15 +211,17 @@ func (b *bitReaderShifted) fill() {
 	}
 }
 
-// finished returns true if all bits have been read from the bit stream.
-func (b *bitReaderShifted) finished() bool {
-	return b.off == 0 && b.bitsRead >= 64
+func (b *bitReaderShifted) remaining() uint {
+	return b.off*8 + uint(64-b.bitsRead)
 }
 
 // close the bitstream and returns an error if out-of-buffer reads occurred.
 func (b *bitReaderShifted) close() error {
 	// Release reference.
 	b.in = nil
+	if b.remaining() > 0 {
+		return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining())
+	}
 	if b.bitsRead > 64 {
 		return io.ErrUnexpectedEOF
 	}
diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
index 6bce4e87d4f..0ebc9aaac76 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -5,8 +5,6 @@
 
 package huff0
 
-import "fmt"
-
 // bitWriter will write bits.
 // First bit will be LSB of the first byte of output.
 type bitWriter struct {
@@ -15,22 +13,6 @@ type bitWriter struct {
 	out          []byte
 }
 
-// bitMask16 is bitmasks. Has extra to avoid bounds check.
-var bitMask16 = [32]uint16{
-	0, 1, 3, 7, 0xF, 0x1F,
-	0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
-	0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
-	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
-	0xFFFF, 0xFFFF} /* up to 16 bits */
-
-// addBits16NC will add up to 16 bits.
-// It will not check if there is space for them,
-// so the caller must ensure that it has flushed recently.
-func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
-	b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
-	b.nBits += bits
-}
-
 // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
 // It will not check if there is space for them, so the caller must ensure that it has flushed recently.
 func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@@ -70,102 +52,20 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
 	b.nBits += encA.nBits + encB.nBits
 }
 
-// addBits16ZeroNC will add up to 16 bits.
+// encFourSymbols adds up to 32 bits from four symbols.
 // It will not check if there is space for them,
-// so the caller must ensure that it has flushed recently.
-// This is fastest if bits can be zero.
-func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
-	if bits == 0 {
-		return
-	}
-	value <<= (16 - bits) & 15
-	value >>= (16 - bits) & 15
-	b.bitContainer |= uint64(value) << (b.nBits & 63)
-	b.nBits += bits
-}
-
-// flush will flush all pending full bytes.
-// There will be at least 56 bits available for writing when this has been called.
-// Using flush32 is faster, but leaves less space for writing.
-func (b *bitWriter) flush() {
-	v := b.nBits >> 3
-	switch v {
-	case 0:
-		return
-	case 1:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-		)
-		b.bitContainer >>= 1 << 3
-	case 2:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-		)
-		b.bitContainer >>= 2 << 3
-	case 3:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-		)
-		b.bitContainer >>= 3 << 3
-	case 4:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-		)
-		b.bitContainer >>= 4 << 3
-	case 5:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-		)
-		b.bitContainer >>= 5 << 3
-	case 6:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-			byte(b.bitContainer>>40),
-		)
-		b.bitContainer >>= 6 << 3
-	case 7:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-			byte(b.bitContainer>>40),
-			byte(b.bitContainer>>48),
-		)
-		b.bitContainer >>= 7 << 3
-	case 8:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-			byte(b.bitContainer>>40),
-			byte(b.bitContainer>>48),
-			byte(b.bitContainer>>56),
-		)
-		b.bitContainer = 0
-		b.nBits = 0
-		return
-	default:
-		panic(fmt.Errorf("bits (%d) > 64", b.nBits))
-	}
-	b.nBits &= 7
+// so the caller must ensure that b has been flushed recently.
+func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
+	bitsA := encA.nBits
+	bitsB := bitsA + encB.nBits
+	bitsC := bitsB + encC.nBits
+	bitsD := bitsC + encD.nBits
+	combined := uint64(encA.val) |
+		(uint64(encB.val) << (bitsA & 63)) |
+		(uint64(encC.val) << (bitsB & 63)) |
+		(uint64(encD.val) << (bitsC & 63))
+	b.bitContainer |= combined << (b.nBits & 63)
+	b.nBits += bitsD
 }
 
 // flush32 will flush out, so there are at least 32 bits available for writing.
@@ -194,17 +94,9 @@ func (b *bitWriter) flushAlign() {
 
 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
-}
-
-// reset and continue writing by appending to out.
-func (b *bitWriter) reset(out []byte) {
-	b.bitContainer = 0
-	b.nBits = 0
-	b.out = out
 }
diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go
deleted file mode 100644
index 50bcdf6ea99..00000000000
--- a/vendor/github.com/klauspost/compress/huff0/bytereader.go
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
-
-package huff0
-
-// byteReader provides a byte reader that reads
-// little endian values from a byte stream.
-// The input stream is manually advanced.
-// The reader performs no bounds checks.
-type byteReader struct {
-	b   []byte
-	off int
-}
-
-// init will initialize the reader and set the input.
-func (b *byteReader) init(in []byte) {
-	b.b = in
-	b.off = 0
-}
-
-// advance the stream b n bytes.
-func (b *byteReader) advance(n uint) {
-	b.off += int(n)
-}
-
-// Int32 returns a little endian int32 starting at current offset.
-func (b byteReader) Int32() int32 {
-	v3 := int32(b.b[b.off+3])
-	v2 := int32(b.b[b.off+2])
-	v1 := int32(b.b[b.off+1])
-	v0 := int32(b.b[b.off])
-	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// Uint32 returns a little endian uint32 starting at current offset.
-func (b byteReader) Uint32() uint32 {
-	v3 := uint32(b.b[b.off+3])
-	v2 := uint32(b.b[b.off+2])
-	v1 := uint32(b.b[b.off+1])
-	v0 := uint32(b.b[b.off])
-	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// unread returns the unread portion of the input.
-func (b byteReader) unread() []byte {
-	return b.b[b.off:]
-}
-
-// remain will return the number of bytes remaining.
-func (b byteReader) remain() int {
-	return len(b.b) - b.off
-}
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
index 8323dc05389..84aa3d12f00 100644
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -2,6 +2,7 @@ package huff0
 
 import (
 	"fmt"
+	"math"
 	"runtime"
 	"sync"
 )
@@ -226,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err
 }
 
 func (s *Scratch) compress1X(src []byte) ([]byte, error) {
-	return s.compress1xDo(s.Out, src)
+	return s.compress1xDo(s.Out, src), nil
 }
 
-func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+func (s *Scratch) compress1xDo(dst, src []byte) []byte {
 	var bw = bitWriter{out: dst}
 
 	// N is length divisible by 4.
@@ -247,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 			tmp := src[n : n+4]
 			// tmp should be len 4
 			bw.flush32()
-			bw.encTwoSymbols(cTable, tmp[3], tmp[2])
-			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+			bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]])
 		}
 	} else {
 		for ; n >= 0; n -= 4 {
@@ -260,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
 		}
 	}
-	err := bw.close()
-	return bw.out, err
+	bw.close()
+	return bw.out
 }
 
 var sixZeros [6]byte
@@ -283,11 +283,11 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) {
 		}
 		src = src[len(toDo):]
 
-		var err error
 		idx := len(s.Out)
-		s.Out, err = s.compress1xDo(s.Out, toDo)
-		if err != nil {
-			return nil, err
+		s.Out = s.compress1xDo(s.Out, toDo)
+		if len(s.Out)-idx > math.MaxUint16 {
+			// We cannot store the size in the jump table
+			return nil, ErrIncompressible
 		}
 		// Write compressed length as little endian before block.
 		if i < 3 {
@@ -311,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 
 	segmentSize := (len(src) + 3) / 4
 	var wg sync.WaitGroup
-	var errs [4]error
 	wg.Add(4)
 	for i := 0; i < 4; i++ {
 		toDo := src
@@ -322,16 +321,17 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 
 		// Separate goroutine for each block.
 		go func(i int) {
-			s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+			s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
 			wg.Done()
 		}(i)
 	}
 	wg.Wait()
 	for i := 0; i < 4; i++ {
-		if errs[i] != nil {
-			return nil, errs[i]
-		}
 		o := s.tmpOut[i]
+		if len(o) > math.MaxUint16 {
+			// We cannot store the size in the jump table
+			return nil, ErrIncompressible
+		}
 		// Write compressed length as little endian before block.
 		if i < 3 {
 			// Last length is not written.
@@ -350,35 +350,36 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 // Does not update s.clearCount.
 func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 	reuse = true
+	_ = s.count // Assert that s != nil to speed up the following loop.
 	for _, v := range in {
 		s.count[v]++
 	}
 	m := uint32(0)
 	if len(s.prevTable) > 0 {
 		for i, v := range s.count[:] {
+			if v == 0 {
+				continue
+			}
 			if v > m {
 				m = v
 			}
-			if v > 0 {
-				s.symbolLen = uint16(i) + 1
-				if i >= len(s.prevTable) {
-					reuse = false
-				} else {
-					if s.prevTable[i].nBits == 0 {
-						reuse = false
-					}
-				}
+			s.symbolLen = uint16(i) + 1
+			if i >= len(s.prevTable) {
+				reuse = false
+			} else if s.prevTable[i].nBits == 0 {
+				reuse = false
 			}
 		}
 		return int(m), reuse
 	}
 	for i, v := range s.count[:] {
+		if v == 0 {
+			continue
+		}
 		if v > m {
 			m = v
 		}
-		if v > 0 {
-			s.symbolLen = uint16(i) + 1
-		}
+		s.symbolLen = uint16(i) + 1
 	}
 	return int(m), false
 }
@@ -395,6 +396,7 @@ func (s *Scratch) canUseTable(c cTable) bool {
 	return true
 }
 
+//lint:ignore U1000 used for debugging
 func (s *Scratch) validateTable(c cTable) bool {
 	if len(c) < int(s.symbolLen) {
 		return false
@@ -414,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool {
 
 // minTableLog provides the minimum logSize to safely represent a distribution.
 func (s *Scratch) minTableLog() uint8 {
-	minBitsSrc := highBit32(uint32(s.br.remain())) + 1
+	minBitsSrc := highBit32(uint32(s.srcLen)) + 1
 	minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
 	if minBitsSrc < minBitsSymbols {
 		return uint8(minBitsSrc)
@@ -426,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 {
 func (s *Scratch) optimalTableLog() {
 	tableLog := s.TableLog
 	minBits := s.minTableLog()
-	maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
+	maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
 	if maxBitsSrc < tableLog {
 		// Accuracy can be reduced
 		tableLog = maxBitsSrc
@@ -474,34 +476,35 @@ func (s *Scratch) buildCTable() error {
 	// Different from reference implementation.
 	huffNode0 := s.nodes[0 : huffNodesLen+1]
 
-	for huffNode[nonNullRank].count == 0 {
+	for huffNode[nonNullRank].count() == 0 {
 		nonNullRank--
 	}
 
 	lowS := int16(nonNullRank)
 	nodeRoot := nodeNb + lowS - 1
 	lowN := nodeNb
-	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
-	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+	huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count())
+	huffNode[lowS].setParent(nodeNb)
+	huffNode[lowS-1].setParent(nodeNb)
 	nodeNb++
 	lowS -= 2
 	for n := nodeNb; n <= nodeRoot; n++ {
-		huffNode[n].count = 1 << 30
+		huffNode[n].setCount(1 << 30)
 	}
 	// fake entry, strong barrier
-	huffNode0[0].count = 1 << 31
+	huffNode0[0].setCount(1 << 31)
 
 	// create parents
 	for nodeNb <= nodeRoot {
 		var n1, n2 int16
-		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
 			n1 = lowS
 			lowS--
 		} else {
 			n1 = lowN
 			lowN++
 		}
-		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+		if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
 			n2 = lowS
 			lowS--
 		} else {
@@ -509,18 +512,19 @@ func (s *Scratch) buildCTable() error {
 			lowN++
 		}
 
-		huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
-		huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+		huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count())
+		huffNode0[n1+1].setParent(nodeNb)
+		huffNode0[n2+1].setParent(nodeNb)
 		nodeNb++
 	}
 
 	// distribute weights (unlimited tree height)
-	huffNode[nodeRoot].nbBits = 0
+	huffNode[nodeRoot].setNbBits(0)
 	for n := nodeRoot - 1; n >= startNode; n-- {
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
 	}
 	for n := uint16(0); n <= nonNullRank; n++ {
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+		huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
 	}
 	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
 	maxNbBits := s.actualTableLog
@@ -532,7 +536,7 @@ func (s *Scratch) buildCTable() error {
 	var nbPerRank [tableLogMax + 1]uint16
 	var valPerRank [16]uint16
 	for _, v := range huffNode[:nonNullRank+1] {
-		nbPerRank[v.nbBits]++
+		nbPerRank[v.nbBits()]++
 	}
 	// determine stating value per rank
 	{
@@ -547,7 +551,7 @@ func (s *Scratch) buildCTable() error {
 
 	// push nbBits per symbol, symbol order
 	for _, v := range huffNode[:nonNullRank+1] {
-		s.cTable[v.symbol].nBits = v.nbBits
+		s.cTable[v.symbol()].nBits = v.nbBits()
 	}
 
 	// assign value within rank, symbol order
@@ -593,12 +597,12 @@ func (s *Scratch) huffSort() {
 		pos := rank[r].current
 		rank[r].current++
 		prev := nodes[(pos-1)&huffNodesMask]
-		for pos > rank[r].base && c > prev.count {
+		for pos > rank[r].base && c > prev.count() {
 			nodes[pos&huffNodesMask] = prev
 			pos--
 			prev = nodes[(pos-1)&huffNodesMask]
 		}
-		nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+		nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n))
 	}
 }
 
@@ -607,7 +611,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	huffNode := s.nodes[1 : huffNodesLen+1]
 	//huffNode = huffNode[: huffNodesLen]
 
-	largestBits := huffNode[lastNonNull].nbBits
+	largestBits := huffNode[lastNonNull].nbBits()
 
 	// early exit : no elt > maxNbBits
 	if largestBits <= maxNbBits {
@@ -617,14 +621,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	baseCost := int(1) << (largestBits - maxNbBits)
 	n := uint32(lastNonNull)
 
-	for huffNode[n].nbBits > maxNbBits {
-		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
-		huffNode[n].nbBits = maxNbBits
+	for huffNode[n].nbBits() > maxNbBits {
+		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits()))
+		huffNode[n].setNbBits(maxNbBits)
 		n--
 	}
 	// n stops at huffNode[n].nbBits <= maxNbBits
 
-	for huffNode[n].nbBits == maxNbBits {
+	for huffNode[n].nbBits() == maxNbBits {
 		n--
 	}
 	// n end at index of smallest symbol using < maxNbBits
@@ -645,10 +649,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 		{
 			currentNbBits := maxNbBits
 			for pos := int(n); pos >= 0; pos-- {
-				if huffNode[pos].nbBits >= currentNbBits {
+				if huffNode[pos].nbBits() >= currentNbBits {
 					continue
 				}
-				currentNbBits = huffNode[pos].nbBits // < maxNbBits
+				currentNbBits = huffNode[pos].nbBits() // < maxNbBits
 				rankLast[maxNbBits-currentNbBits] = uint32(pos)
 			}
 		}
@@ -665,8 +669,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 				if lowPos == noSymbol {
 					break
 				}
-				highTotal := huffNode[highPos].count
-				lowTotal := 2 * huffNode[lowPos].count
+				highTotal := huffNode[highPos].count()
+				lowTotal := 2 * huffNode[lowPos].count()
 				if highTotal <= lowTotal {
 					break
 				}
@@ -682,13 +686,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 				// this rank is no longer empty
 				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
 			}
-			huffNode[rankLast[nBitsToDecrease]].nbBits++
+			huffNode[rankLast[nBitsToDecrease]].setNbBits(1 +
+				huffNode[rankLast[nBitsToDecrease]].nbBits())
 			if rankLast[nBitsToDecrease] == 0 {
 				/* special case, reached largest symbol */
 				rankLast[nBitsToDecrease] = noSymbol
 			} else {
 				rankLast[nBitsToDecrease]--
-				if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+				if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease {
 					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
 				}
 			}
@@ -696,15 +701,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 
 		for totalCost < 0 { /* Sometimes, cost correction overshoot */
 			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
-				for huffNode[n].nbBits == maxNbBits {
+				for huffNode[n].nbBits() == maxNbBits {
 					n--
 				}
-				huffNode[n+1].nbBits--
+				huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1)
 				rankLast[1] = n + 1
 				totalCost++
 				continue
 			}
-			huffNode[rankLast[1]+1].nbBits--
+			huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1)
 			rankLast[1]++
 			totalCost++
 		}
@@ -712,9 +717,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	return maxNbBits
 }
 
-type nodeElt struct {
-	count  uint32
-	parent uint16
-	symbol byte
-	nbBits uint8
+// A nodeElt is the fields
+//
+//	count  uint32
+//	parent uint16
+//	symbol byte
+//	nbBits uint8
+//
+// in some order, all squashed into an integer so that the compiler
+// always loads and stores entire nodeElts instead of separate fields.
+type nodeElt uint64
+
+func makeNodeElt(count uint32, symbol byte) nodeElt {
+	return nodeElt(count) | nodeElt(symbol)<<48
 }
+
+func (e *nodeElt) count() uint32  { return uint32(*e) }
+func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) }
+func (e *nodeElt) symbol() byte   { return byte(*e >> 48) }
+func (e *nodeElt) nbBits() uint8  { return uint8(*e >> 56) }
+
+func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) }
+func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 }
+func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 }
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 9b7cc8e97bb..54bd08b25c0 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -4,13 +4,13 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"sync"
 
 	"github.com/klauspost/compress/fse"
 )
 
 type dTable struct {
 	single []dEntrySingle
-	double []dEntryDouble
 }
 
 // single-symbols decoding
@@ -18,13 +18,6 @@ type dEntrySingle struct {
 	entry uint16
 }
 
-// double-symbols decoding
-type dEntryDouble struct {
-	seq   uint16
-	nBits uint8
-	len   uint8
-}
-
 // Uses special code for all tables that are < 8 bits.
 const use8BitTables = true
 
@@ -34,7 +27,7 @@ const use8BitTables = true
 // If no Scratch is provided a new one is allocated.
 // The returned Scratch can be used for encoding or decoding input using this table.
 func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
-	s, err = s.prepare(in)
+	s, err = s.prepare(nil)
 	if err != nil {
 		return s, nil, err
 	}
@@ -68,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
 		b, err := fse.Decompress(in[:iSize], s.fse)
 		s.fse.Out = nil
 		if err != nil {
-			return s, nil, err
+			return s, nil, fmt.Errorf("fse decompress returned: %w", err)
 		}
 		if len(b) > 255 {
 			return s, nil, errors.New("corrupt input: output table too large")
@@ -216,6 +209,7 @@ func (s *Scratch) Decoder() *Decoder {
 	return &Decoder{
 		dt:             s.dt,
 		actualTableLog: s.actualTableLog,
+		bufs:           &s.decPool,
 	}
 }
 
@@ -223,103 +217,15 @@ func (s *Scratch) Decoder() *Decoder {
 type Decoder struct {
 	dt             dTable
 	actualTableLog uint8
+	bufs           *sync.Pool
 }
 
-// Decompress1X will decompress a 1X encoded stream.
-// The cap of the output buffer will be the maximum decompressed size.
-// The length of the supplied input must match the end of a block exactly.
-func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
-	if len(d.dt.single) == 0 {
-		return nil, errors.New("no table loaded")
-	}
-	if use8BitTables && d.actualTableLog <= 8 {
-		return d.decompress1X8Bit(dst, src)
-	}
-	var br bitReaderShifted
-	err := br.init(src)
-	if err != nil {
-		return dst, err
-	}
-	maxDecodedSize := cap(dst)
-	dst = dst[:0]
-
-	// Avoid bounds check by always having full sized table.
-	const tlSize = 1 << tableLogMax
-	const tlMask = tlSize - 1
-	dt := d.dt.single[:tlSize]
-
-	// Use temp table to avoid bound checks/append penalty.
-	var buf [256]byte
-	var off uint8
-
-	for br.off >= 8 {
-		br.fillFast()
-		v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
-		br.advance(uint8(v.entry))
-		buf[off+0] = uint8(v.entry >> 8)
-
-		v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
-		br.advance(uint8(v.entry))
-		buf[off+1] = uint8(v.entry >> 8)
-
-		// Refill
-		br.fillFast()
-
-		v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
-		br.advance(uint8(v.entry))
-		buf[off+2] = uint8(v.entry >> 8)
-
-		v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
-		br.advance(uint8(v.entry))
-		buf[off+3] = uint8(v.entry >> 8)
-
-		off += 4
-		if off == 0 {
-			if len(dst)+256 > maxDecodedSize {
-				br.close()
-				return nil, ErrMaxDecodedSizeExceeded
-			}
-			dst = append(dst, buf[:]...)
-		}
-	}
-
-	if len(dst)+int(off) > maxDecodedSize {
-		br.close()
-		return nil, ErrMaxDecodedSizeExceeded
-	}
-	dst = append(dst, buf[:off]...)
-
-	// br < 8, so uint8 is fine
-	bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
-	for bitsLeft > 0 {
-		br.fill()
-		if false && br.bitsRead >= 32 {
-			if br.off >= 4 {
-				v := br.in[br.off-4:]
-				v = v[:4]
-				low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
-				br.value = (br.value << 32) | uint64(low)
-				br.bitsRead -= 32
-				br.off -= 4
-			} else {
-				for br.off > 0 {
-					br.value = (br.value << 8) | uint64(br.in[br.off-1])
-					br.bitsRead -= 8
-					br.off--
-				}
-			}
-		}
-		if len(dst) >= maxDecodedSize {
-			br.close()
-			return nil, ErrMaxDecodedSizeExceeded
-		}
-		v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
-		nBits := uint8(v.entry)
-		br.advance(nBits)
-		bitsLeft -= nBits
-		dst = append(dst, uint8(v.entry>>8))
+func (d *Decoder) buffer() *[4][256]byte {
+	buf, ok := d.bufs.Get().(*[4][256]byte)
+	if ok {
+		return buf
 	}
-	return dst, br.close()
+	return &[4][256]byte{}
 }
 
 // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
@@ -341,12 +247,13 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 	dt := d.dt.single[:256]
 
 	// Use temp table to avoid bound checks/append penalty.
-	var buf [256]byte
+	bufs := d.buffer()
+	buf := &bufs[0]
 	var off uint8
 
 	switch d.actualTableLog {
 	case 8:
-		const shift = 8 - 8
+		const shift = 0
 		for br.off >= 4 {
 			br.fillFast()
 			v := dt[uint8(br.value>>(56+shift))]
@@ -369,6 +276,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
 					br.close()
+					d.bufs.Put(bufs)
 					return nil, ErrMaxDecodedSizeExceeded
 				}
 				dst = append(dst, buf[:]...)
@@ -398,6 +306,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
 					br.close()
+					d.bufs.Put(bufs)
 					return nil, ErrMaxDecodedSizeExceeded
 				}
 				dst = append(dst, buf[:]...)
@@ -426,6 +335,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			off += 4
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
+					d.bufs.Put(bufs)
 					br.close()
 					return nil, ErrMaxDecodedSizeExceeded
 				}
@@ -455,6 +365,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			off += 4
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
+					d.bufs.Put(bufs)
 					br.close()
 					return nil, ErrMaxDecodedSizeExceeded
 				}
@@ -484,6 +395,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			off += 4
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
+					d.bufs.Put(bufs)
 					br.close()
 					return nil, ErrMaxDecodedSizeExceeded
 				}
@@ -513,6 +425,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			off += 4
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
+					d.bufs.Put(bufs)
 					br.close()
 					return nil, ErrMaxDecodedSizeExceeded
 				}
@@ -542,6 +455,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			off += 4
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
+					d.bufs.Put(bufs)
 					br.close()
 					return nil, ErrMaxDecodedSizeExceeded
 				}
@@ -571,6 +485,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			off += 4
 			if off == 0 {
 				if len(dst)+256 > maxDecodedSize {
+					d.bufs.Put(bufs)
 					br.close()
 					return nil, ErrMaxDecodedSizeExceeded
 				}
@@ -578,10 +493,12 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 			}
 		}
 	default:
+		d.bufs.Put(bufs)
 		return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
 	}
 
 	if len(dst)+int(off) > maxDecodedSize {
+		d.bufs.Put(bufs)
 		br.close()
 		return nil, ErrMaxDecodedSizeExceeded
 	}
@@ -601,6 +518,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 		}
 		if len(dst) >= maxDecodedSize {
 			br.close()
+			d.bufs.Put(bufs)
 			return nil, ErrMaxDecodedSizeExceeded
 		}
 		v := dt[br.peekByteFast()>>shift]
@@ -609,6 +527,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
 		bitsLeft -= int8(nBits)
 		dst = append(dst, uint8(v.entry>>8))
 	}
+	d.bufs.Put(bufs)
 	return dst, br.close()
 }
 
@@ -628,7 +547,8 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 	dt := d.dt.single[:256]
 
 	// Use temp table to avoid bound checks/append penalty.
-	var buf [256]byte
+	bufs := d.buffer()
+	buf := &bufs[0]
 	var off uint8
 
 	const shift = 56
@@ -655,6 +575,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 		off += 4
 		if off == 0 {
 			if len(dst)+256 > maxDecodedSize {
+				d.bufs.Put(bufs)
 				br.close()
 				return nil, ErrMaxDecodedSizeExceeded
 			}
@@ -663,6 +584,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 	}
 
 	if len(dst)+int(off) > maxDecodedSize {
+		d.bufs.Put(bufs)
 		br.close()
 		return nil, ErrMaxDecodedSizeExceeded
 	}
@@ -679,6 +601,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 			}
 		}
 		if len(dst) >= maxDecodedSize {
+			d.bufs.Put(bufs)
 			br.close()
 			return nil, ErrMaxDecodedSizeExceeded
 		}
@@ -688,199 +611,10 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 		bitsLeft -= int8(nBits)
 		dst = append(dst, uint8(v.entry>>8))
 	}
+	d.bufs.Put(bufs)
 	return dst, br.close()
 }
 
-// Decompress4X will decompress a 4X encoded stream.
-// The length of the supplied input must match the end of a block exactly.
-// The *capacity* of the dst slice must match the destination size of
-// the uncompressed data exactly.
-func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
-	if len(d.dt.single) == 0 {
-		return nil, errors.New("no table loaded")
-	}
-	if len(src) < 6+(4*1) {
-		return nil, errors.New("input too small")
-	}
-	if use8BitTables && d.actualTableLog <= 8 {
-		return d.decompress4X8bit(dst, src)
-	}
-
-	var br [4]bitReaderShifted
-	start := 6
-	for i := 0; i < 3; i++ {
-		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
-		if start+length >= len(src) {
-			return nil, errors.New("truncated input (or invalid offset)")
-		}
-		err := br[i].init(src[start : start+length])
-		if err != nil {
-			return nil, err
-		}
-		start += length
-	}
-	err := br[3].init(src[start:])
-	if err != nil {
-		return nil, err
-	}
-
-	// destination, offset to match first output
-	dstSize := cap(dst)
-	dst = dst[:dstSize]
-	out := dst
-	dstEvery := (dstSize + 3) / 4
-
-	const tlSize = 1 << tableLogMax
-	const tlMask = tlSize - 1
-	single := d.dt.single[:tlSize]
-
-	// Use temp table to avoid bound checks/append penalty.
-	var buf [256]byte
-	var off uint8
-	var decoded int
-
-	// Decode 2 values from each decoder/loop.
-	const bufoff = 256 / 4
-	for {
-		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
-			break
-		}
-
-		{
-			const stream = 0
-			const stream2 = 1
-			br[stream].fillFast()
-			br[stream2].fillFast()
-
-			val := br[stream].peekBitsFast(d.actualTableLog)
-			v := single[val&tlMask]
-			br[stream].advance(uint8(v.entry))
-			buf[off+bufoff*stream] = uint8(v.entry >> 8)
-
-			val2 := br[stream2].peekBitsFast(d.actualTableLog)
-			v2 := single[val2&tlMask]
-			br[stream2].advance(uint8(v2.entry))
-			buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
-
-			val = br[stream].peekBitsFast(d.actualTableLog)
-			v = single[val&tlMask]
-			br[stream].advance(uint8(v.entry))
-			buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
-
-			val2 = br[stream2].peekBitsFast(d.actualTableLog)
-			v2 = single[val2&tlMask]
-			br[stream2].advance(uint8(v2.entry))
-			buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
-		}
-
-		{
-			const stream = 2
-			const stream2 = 3
-			br[stream].fillFast()
-			br[stream2].fillFast()
-
-			val := br[stream].peekBitsFast(d.actualTableLog)
-			v := single[val&tlMask]
-			br[stream].advance(uint8(v.entry))
-			buf[off+bufoff*stream] = uint8(v.entry >> 8)
-
-			val2 := br[stream2].peekBitsFast(d.actualTableLog)
-			v2 := single[val2&tlMask]
-			br[stream2].advance(uint8(v2.entry))
-			buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
-
-			val = br[stream].peekBitsFast(d.actualTableLog)
-			v = single[val&tlMask]
-			br[stream].advance(uint8(v.entry))
-			buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
-
-			val2 = br[stream2].peekBitsFast(d.actualTableLog)
-			v2 = single[val2&tlMask]
-			br[stream2].advance(uint8(v2.entry))
-			buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
-		}
-
-		off += 2
-
-		if off == bufoff {
-			if bufoff > dstEvery {
-				return nil, errors.New("corruption detected: stream overrun 1")
-			}
-			copy(out, buf[:bufoff])
-			copy(out[dstEvery:], buf[bufoff:bufoff*2])
-			copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
-			copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
-			off = 0
-			out = out[bufoff:]
-			decoded += 256
-			// There must at least be 3 buffers left.
-			if len(out) < dstEvery*3 {
-				return nil, errors.New("corruption detected: stream overrun 2")
-			}
-		}
-	}
-	if off > 0 {
-		ioff := int(off)
-		if len(out) < dstEvery*3+ioff {
-			return nil, errors.New("corruption detected: stream overrun 3")
-		}
-		copy(out, buf[:off])
-		copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
-		copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
-		copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
-		decoded += int(off) * 4
-		out = out[off:]
-	}
-
-	// Decode remaining.
-	for i := range br {
-		offset := dstEvery * i
-		br := &br[i]
-		bitsLeft := br.off*8 + uint(64-br.bitsRead)
-		for bitsLeft > 0 {
-			br.fill()
-			if false && br.bitsRead >= 32 {
-				if br.off >= 4 {
-					v := br.in[br.off-4:]
-					v = v[:4]
-					low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
-					br.value = (br.value << 32) | uint64(low)
-					br.bitsRead -= 32
-					br.off -= 4
-				} else {
-					for br.off > 0 {
-						br.value = (br.value << 8) | uint64(br.in[br.off-1])
-						br.bitsRead -= 8
-						br.off--
-					}
-				}
-			}
-			// end inline...
-			if offset >= len(out) {
-				return nil, errors.New("corruption detected: stream overrun 4")
-			}
-
-			// Read value and increment offset.
-			val := br.peekBitsFast(d.actualTableLog)
-			v := single[val&tlMask].entry
-			nBits := uint8(v)
-			br.advance(nBits)
-			bitsLeft -= uint(nBits)
-			out[offset] = uint8(v >> 8)
-			offset++
-		}
-		decoded += offset - dstEvery*i
-		err = br.close()
-		if err != nil {
-			return nil, err
-		}
-	}
-	if dstSize != decoded {
-		return nil, errors.New("corruption detected: short output block")
-	}
-	return dst, nil
-}
-
 // Decompress4X will decompress a 4X encoded stream.
 // The length of the supplied input must match the end of a block exactly.
 // The *capacity* of the dst slice must match the destination size of
@@ -914,18 +648,18 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
 	out := dst
 	dstEvery := (dstSize + 3) / 4
 
-	shift := (8 - d.actualTableLog) & 7
+	shift := (56 + (8 - d.actualTableLog)) & 63
 
 	const tlSize = 1 << 8
 	single := d.dt.single[:tlSize]
 
 	// Use temp table to avoid bound checks/append penalty.
-	var buf [256]byte
+	buf := d.buffer()
 	var off uint8
 	var decoded int
 
 	// Decode 4 values from each decoder/loop.
-	const bufoff = 256 / 4
+	const bufoff = 256
 	for {
 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 			break
@@ -935,120 +669,144 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
 			// Interleave 2 decodes.
 			const stream = 0
 			const stream2 = 1
-			br[stream].fillFast()
-			br[stream2].fillFast()
-
-			v := single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 := single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+1] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+2] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+3] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
+			br1 := &br[stream]
+			br2 := &br[stream2]
+			br1.fillFast()
+			br2.fillFast()
+
+			v := single[uint8(br1.value>>shift)].entry
+			v2 := single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off] = uint8(v >> 8)
+			buf[stream2][off] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+1] = uint8(v >> 8)
+			buf[stream2][off+1] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+2] = uint8(v >> 8)
+			buf[stream2][off+2] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+3] = uint8(v >> 8)
+			buf[stream2][off+3] = uint8(v2 >> 8)
 		}
 
 		{
 			const stream = 2
 			const stream2 = 3
-			br[stream].fillFast()
-			br[stream2].fillFast()
-
-			v := single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 := single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+1] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+2] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+3] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
+			br1 := &br[stream]
+			br2 := &br[stream2]
+			br1.fillFast()
+			br2.fillFast()
+
+			v := single[uint8(br1.value>>shift)].entry
+			v2 := single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off] = uint8(v >> 8)
+			buf[stream2][off] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+1] = uint8(v >> 8)
+			buf[stream2][off+1] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+2] = uint8(v >> 8)
+			buf[stream2][off+2] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+3] = uint8(v >> 8)
+			buf[stream2][off+3] = uint8(v2 >> 8)
 		}
 
 		off += 4
 
-		if off == bufoff {
+		if off == 0 {
 			if bufoff > dstEvery {
+				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 1")
 			}
-			copy(out, buf[:bufoff])
-			copy(out[dstEvery:], buf[bufoff:bufoff*2])
-			copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
-			copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
-			off = 0
-			out = out[bufoff:]
-			decoded += 256
 			// There must at least be 3 buffers left.
-			if len(out) < dstEvery*3 {
+			if len(out)-bufoff < dstEvery*3 {
+				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 2")
 			}
+			//copy(out, buf[0][:])
+			//copy(out[dstEvery:], buf[1][:])
+			//copy(out[dstEvery*2:], buf[2][:])
+			*(*[bufoff]byte)(out) = buf[0]
+			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+			out = out[bufoff:]
+			decoded += bufoff * 4
 		}
 	}
 	if off > 0 {
 		ioff := int(off)
 		if len(out) < dstEvery*3+ioff {
+			d.bufs.Put(buf)
 			return nil, errors.New("corruption detected: stream overrun 3")
 		}
-		copy(out, buf[:off])
-		copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
-		copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
-		copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+		copy(out, buf[0][:off])
+		copy(out[dstEvery:], buf[1][:off])
+		copy(out[dstEvery*2:], buf[2][:off])
+		copy(out[dstEvery*3:], buf[3][:off])
 		decoded += int(off) * 4
 		out = out[off:]
 	}
 
 	// Decode remaining.
+	// Decode remaining.
+	remainBytes := dstEvery - (decoded / 4)
 	for i := range br {
 		offset := dstEvery * i
+		endsAt := offset + remainBytes
+		if endsAt > len(out) {
+			endsAt = len(out)
+		}
 		br := &br[i]
-		bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+		bitsLeft := br.remaining()
 		for bitsLeft > 0 {
 			if br.finished() {
+				d.bufs.Put(buf)
 				return nil, io.ErrUnexpectedEOF
 			}
 			if br.bitsRead >= 56 {
@@ -1068,24 +826,31 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
 				}
 			}
 			// end inline...
-			if offset >= len(out) {
+			if offset >= endsAt {
+				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 4")
 			}
 
 			// Read value and increment offset.
-			v := single[br.peekByteFast()>>shift].entry
+			v := single[uint8(br.value>>shift)].entry
 			nBits := uint8(v)
 			br.advance(nBits)
-			bitsLeft -= int(nBits)
+			bitsLeft -= uint(nBits)
 			out[offset] = uint8(v >> 8)
 			offset++
 		}
+		if offset != endsAt {
+			d.bufs.Put(buf)
+			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
+		}
 		decoded += offset - dstEvery*i
 		err = br.close()
 		if err != nil {
+			d.bufs.Put(buf)
 			return nil, err
 		}
 	}
+	d.bufs.Put(buf)
 	if dstSize != decoded {
 		return nil, errors.New("corruption detected: short output block")
 	}
@@ -1121,18 +886,17 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
 	out := dst
 	dstEvery := (dstSize + 3) / 4
 
-	const shift = 0
+	const shift = 56
 	const tlSize = 1 << 8
-	const tlMask = tlSize - 1
 	single := d.dt.single[:tlSize]
 
 	// Use temp table to avoid bound checks/append penalty.
-	var buf [256]byte
+	buf := d.buffer()
 	var off uint8
 	var decoded int
 
 	// Decode 4 values from each decoder/loop.
-	const bufoff = 256 / 4
+	const bufoff = 256
 	for {
 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 			break
@@ -1142,98 +906,116 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
 			// Interleave 2 decodes.
 			const stream = 0
 			const stream2 = 1
-			br[stream].fillFast()
-			br[stream2].fillFast()
-
-			v := single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 := single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+1] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+2] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+3] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
+			br1 := &br[stream]
+			br2 := &br[stream2]
+			br1.fillFast()
+			br2.fillFast()
+
+			v := single[uint8(br1.value>>shift)].entry
+			v2 := single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off] = uint8(v >> 8)
+			buf[stream2][off] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+1] = uint8(v >> 8)
+			buf[stream2][off+1] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+2] = uint8(v >> 8)
+			buf[stream2][off+2] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+3] = uint8(v >> 8)
+			buf[stream2][off+3] = uint8(v2 >> 8)
 		}
 
 		{
 			const stream = 2
 			const stream2 = 3
-			br[stream].fillFast()
-			br[stream2].fillFast()
-
-			v := single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 := single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+1] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+2] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
-
-			v = single[br[stream].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream+3] = uint8(v >> 8)
-			br[stream].advance(uint8(v))
-
-			v2 = single[br[stream2].peekByteFast()>>shift].entry
-			buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
-			br[stream2].advance(uint8(v2))
+			br1 := &br[stream]
+			br2 := &br[stream2]
+			br1.fillFast()
+			br2.fillFast()
+
+			v := single[uint8(br1.value>>shift)].entry
+			v2 := single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off] = uint8(v >> 8)
+			buf[stream2][off] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+1] = uint8(v >> 8)
+			buf[stream2][off+1] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+2] = uint8(v >> 8)
+			buf[stream2][off+2] = uint8(v2 >> 8)
+
+			v = single[uint8(br1.value>>shift)].entry
+			v2 = single[uint8(br2.value>>shift)].entry
+			br1.bitsRead += uint8(v)
+			br1.value <<= v & 63
+			br2.bitsRead += uint8(v2)
+			br2.value <<= v2 & 63
+			buf[stream][off+3] = uint8(v >> 8)
+			buf[stream2][off+3] = uint8(v2 >> 8)
 		}
 
 		off += 4
 
-		if off == bufoff {
+		if off == 0 {
 			if bufoff > dstEvery {
+				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 1")
 			}
-			copy(out, buf[:bufoff])
-			copy(out[dstEvery:], buf[bufoff:bufoff*2])
-			copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
-			copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
-			off = 0
-			out = out[bufoff:]
-			decoded += 256
 			// There must at least be 3 buffers left.
-			if len(out) < dstEvery*3 {
+			if len(out)-bufoff < dstEvery*3 {
+				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 2")
 			}
+
+			//copy(out, buf[0][:])
+			//copy(out[dstEvery:], buf[1][:])
+			//copy(out[dstEvery*2:], buf[2][:])
+			// copy(out[dstEvery*3:], buf[3][:])
+			*(*[bufoff]byte)(out) = buf[0]
+			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+			out = out[bufoff:]
+			decoded += bufoff * 4
 		}
 	}
 	if off > 0 {
@@ -1241,21 +1023,27 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
 		if len(out) < dstEvery*3+ioff {
 			return nil, errors.New("corruption detected: stream overrun 3")
 		}
-		copy(out, buf[:off])
-		copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
-		copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
-		copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+		copy(out, buf[0][:off])
+		copy(out[dstEvery:], buf[1][:off])
+		copy(out[dstEvery*2:], buf[2][:off])
+		copy(out[dstEvery*3:], buf[3][:off])
 		decoded += int(off) * 4
 		out = out[off:]
 	}
 
 	// Decode remaining.
+	remainBytes := dstEvery - (decoded / 4)
 	for i := range br {
 		offset := dstEvery * i
+		endsAt := offset + remainBytes
+		if endsAt > len(out) {
+			endsAt = len(out)
+		}
 		br := &br[i]
-		bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+		bitsLeft := br.remaining()
 		for bitsLeft > 0 {
 			if br.finished() {
+				d.bufs.Put(buf)
 				return nil, io.ErrUnexpectedEOF
 			}
 			if br.bitsRead >= 56 {
@@ -1275,24 +1063,32 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
 				}
 			}
 			// end inline...
-			if offset >= len(out) {
+			if offset >= endsAt {
+				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 4")
 			}
 
 			// Read value and increment offset.
-			v := single[br.peekByteFast()>>shift].entry
+			v := single[br.peekByteFast()].entry
 			nBits := uint8(v)
 			br.advance(nBits)
-			bitsLeft -= int(nBits)
+			bitsLeft -= uint(nBits)
 			out[offset] = uint8(v >> 8)
 			offset++
 		}
+		if offset != endsAt {
+			d.bufs.Put(buf)
+			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
+		}
+
 		decoded += offset - dstEvery*i
 		err = br.close()
 		if err != nil {
+			d.bufs.Put(buf)
 			return nil, err
 		}
 	}
+	d.bufs.Put(buf)
 	if dstSize != decoded {
 		return nil, errors.New("corruption detected: short output block")
 	}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
new file mode 100644
index 00000000000..ba7e8e6b027
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
@@ -0,0 +1,226 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// This file contains the specialisation of Decoder.Decompress4X
+// and Decoder.Decompress1X that use an asm implementation of thir main loops.
+package huff0
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/klauspost/compress/internal/cpuinfo"
+)
+
+// decompress4x_main_loop_x86 is an x86 assembler implementation
+// of Decompress4X when tablelog > 8.
+//
+//go:noescape
+func decompress4x_main_loop_amd64(ctx *decompress4xContext)
+
+// decompress4x_8b_loop_x86 is an x86 assembler implementation
+// of Decompress4X when tablelog <= 8 which decodes 4 entries
+// per loop.
+//
+//go:noescape
+func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
+
+// fallback8BitSize is the size where using Go version is faster.
+const fallback8BitSize = 800
+
+type decompress4xContext struct {
+	pbr      *[4]bitReaderShifted
+	peekBits uint8
+	out      *byte
+	dstEvery int
+	tbl      *dEntrySingle
+	decoded  int
+	limit    *byte
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
+	if len(d.dt.single) == 0 {
+		return nil, errors.New("no table loaded")
+	}
+	if len(src) < 6+(4*1) {
+		return nil, errors.New("input too small")
+	}
+
+	use8BitTables := d.actualTableLog <= 8
+	if cap(dst) < fallback8BitSize && use8BitTables {
+		return d.decompress4X8bit(dst, src)
+	}
+
+	var br [4]bitReaderShifted
+	// Decode "jump table"
+	start := 6
+	for i := 0; i < 3; i++ {
+		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+		if start+length >= len(src) {
+			return nil, errors.New("truncated input (or invalid offset)")
+		}
+		err := br[i].init(src[start : start+length])
+		if err != nil {
+			return nil, err
+		}
+		start += length
+	}
+	err := br[3].init(src[start:])
+	if err != nil {
+		return nil, err
+	}
+
+	// destination, offset to match first output
+	dstSize := cap(dst)
+	dst = dst[:dstSize]
+	out := dst
+	dstEvery := (dstSize + 3) / 4
+
+	const tlSize = 1 << tableLogMax
+	const tlMask = tlSize - 1
+	single := d.dt.single[:tlSize]
+
+	var decoded int
+
+	if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) {
+		ctx := decompress4xContext{
+			pbr:      &br,
+			peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
+			out:      &out[0],
+			dstEvery: dstEvery,
+			tbl:      &single[0],
+			limit:    &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last.
+		}
+		if use8BitTables {
+			decompress4x_8b_main_loop_amd64(&ctx)
+		} else {
+			decompress4x_main_loop_amd64(&ctx)
+		}
+
+		decoded = ctx.decoded
+		out = out[decoded/4:]
+	}
+
+	// Decode remaining.
+	remainBytes := dstEvery - (decoded / 4)
+	for i := range br {
+		offset := dstEvery * i
+		endsAt := offset + remainBytes
+		if endsAt > len(out) {
+			endsAt = len(out)
+		}
+		br := &br[i]
+		bitsLeft := br.remaining()
+		for bitsLeft > 0 {
+			br.fill()
+			if offset >= endsAt {
+				return nil, errors.New("corruption detected: stream overrun 4")
+			}
+
+			// Read value and increment offset.
+			val := br.peekBitsFast(d.actualTableLog)
+			v := single[val&tlMask].entry
+			nBits := uint8(v)
+			br.advance(nBits)
+			bitsLeft -= uint(nBits)
+			out[offset] = uint8(v >> 8)
+			offset++
+		}
+		if offset != endsAt {
+			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
+		}
+		decoded += offset - dstEvery*i
+		err = br.close()
+		if err != nil {
+			return nil, err
+		}
+	}
+	if dstSize != decoded {
+		return nil, errors.New("corruption detected: short output block")
+	}
+	return dst, nil
+}
+
+// decompress4x_main_loop_x86 is an x86 assembler implementation
+// of Decompress1X when tablelog > 8.
+//
+//go:noescape
+func decompress1x_main_loop_amd64(ctx *decompress1xContext)
+
+// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
+// of Decompress1X when tablelog > 8.
+//
+//go:noescape
+func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
+
+type decompress1xContext struct {
+	pbr      *bitReaderShifted
+	peekBits uint8
+	out      *byte
+	outCap   int
+	tbl      *dEntrySingle
+	decoded  int
+}
+
+// Error reported by asm implementations
+const error_max_decoded_size_exeeded = -1
+
+// Decompress1X will decompress a 1X encoded stream.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
+	if len(d.dt.single) == 0 {
+		return nil, errors.New("no table loaded")
+	}
+	var br bitReaderShifted
+	err := br.init(src)
+	if err != nil {
+		return dst, err
+	}
+	maxDecodedSize := cap(dst)
+	dst = dst[:maxDecodedSize]
+
+	const tlSize = 1 << tableLogMax
+	const tlMask = tlSize - 1
+
+	if maxDecodedSize >= 4 {
+		ctx := decompress1xContext{
+			pbr:      &br,
+			out:      &dst[0],
+			outCap:   maxDecodedSize,
+			peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
+			tbl:      &d.dt.single[0],
+		}
+
+		if cpuinfo.HasBMI2() {
+			decompress1x_main_loop_bmi2(&ctx)
+		} else {
+			decompress1x_main_loop_amd64(&ctx)
+		}
+		if ctx.decoded == error_max_decoded_size_exeeded {
+			return nil, ErrMaxDecodedSizeExceeded
+		}
+
+		dst = dst[:ctx.decoded]
+	}
+
+	// br < 8, so uint8 is fine
+	bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
+	for bitsLeft > 0 {
+		br.fill()
+		if len(dst) >= maxDecodedSize {
+			br.close()
+			return nil, ErrMaxDecodedSizeExceeded
+		}
+		v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
+		nBits := uint8(v.entry)
+		br.advance(nBits)
+		bitsLeft -= nBits
+		dst = append(dst, uint8(v.entry>>8))
+	}
+	return dst, br.close()
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
new file mode 100644
index 00000000000..c4c7ab2d1fe
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
@@ -0,0 +1,830 @@
+// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
+
+//go:build amd64 && !appengine && !noasm && gc
+
+// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
+TEXT ·decompress4x_main_loop_amd64(SB), $0-8
+	// Preload values
+	MOVQ    ctx+0(FP), AX
+	MOVBQZX 8(AX), DI
+	MOVQ    16(AX), BX
+	MOVQ    48(AX), SI
+	MOVQ    24(AX), R8
+	MOVQ    32(AX), R9
+	MOVQ    (AX), R10
+
+	// Main loop
+main_loop:
+	XORL  DX, DX
+	CMPQ  BX, SI
+	SETGE DL
+
+	// br0.fillFast32()
+	MOVQ    32(R10), R11
+	MOVBQZX 40(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill0
+	MOVQ    24(R10), AX
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, AX
+	MOVQ    (R10), R13
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 24(R10)
+	ORQ  R13, R11
+
+	// exhausted += (br0.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
+
+skip_fill0:
+	// val0 := br0.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br0.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br0.peekTopBits(peekBits)
+	MOVQ DI, CX
+	MOVQ R11, R13
+	SHRQ CL, R13
+
+	// v1 := table[val1&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br0.advance(uint8(v1.entry))
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// these two writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	MOVW AX, (BX)
+
+	// update the bitreader structure
+	MOVQ R11, 32(R10)
+	MOVB R12, 40(R10)
+
+	// br1.fillFast32()
+	MOVQ    80(R10), R11
+	MOVBQZX 88(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill1
+	MOVQ    72(R10), AX
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, AX
+	MOVQ    48(R10), R13
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 72(R10)
+	ORQ  R13, R11
+
+	// exhausted += (br1.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
+
+skip_fill1:
+	// val0 := br1.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br1.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br1.peekTopBits(peekBits)
+	MOVQ DI, CX
+	MOVQ R11, R13
+	SHRQ CL, R13
+
+	// v1 := table[val1&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br1.advance(uint8(v1.entry))
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// these two writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	MOVW AX, (BX)(R8*1)
+
+	// update the bitreader structure
+	MOVQ R11, 80(R10)
+	MOVB R12, 88(R10)
+
+	// br2.fillFast32()
+	MOVQ    128(R10), R11
+	MOVBQZX 136(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill2
+	MOVQ    120(R10), AX
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, AX
+	MOVQ    96(R10), R13
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 120(R10)
+	ORQ  R13, R11
+
+	// exhausted += (br2.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
+
+skip_fill2:
+	// val0 := br2.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br2.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br2.peekTopBits(peekBits)
+	MOVQ DI, CX
+	MOVQ R11, R13
+	SHRQ CL, R13
+
+	// v1 := table[val1&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br2.advance(uint8(v1.entry))
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// these two writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	MOVW AX, (BX)(R8*2)
+
+	// update the bitreader structure
+	MOVQ R11, 128(R10)
+	MOVB R12, 136(R10)
+
+	// br3.fillFast32()
+	MOVQ    176(R10), R11
+	MOVBQZX 184(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill3
+	MOVQ    168(R10), AX
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, AX
+	MOVQ    144(R10), R13
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (AX)(R13*1), R13
+	MOVQ R12, CX
+	SHLQ CL, R13
+	MOVQ AX, 168(R10)
+	ORQ  R13, R11
+
+	// exhausted += (br3.off < 4)
+	CMPQ AX, $0x04
+	ADCB $+0, DL
+
+skip_fill3:
+	// val0 := br3.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br3.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br3.peekTopBits(peekBits)
+	MOVQ DI, CX
+	MOVQ R11, R13
+	SHRQ CL, R13
+
+	// v1 := table[val1&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br3.advance(uint8(v1.entry))
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// these two writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	LEAQ (R8)(R8*2), CX
+	MOVW AX, (BX)(CX*1)
+
+	// update the bitreader structure
+	MOVQ  R11, 176(R10)
+	MOVB  R12, 184(R10)
+	ADDQ  $0x02, BX
+	TESTB DL, DL
+	JZ    main_loop
+	MOVQ  ctx+0(FP), AX
+	SUBQ  16(AX), BX
+	SHLQ  $0x02, BX
+	MOVQ  BX, 40(AX)
+	RET
+
+// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
+TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
+	// Preload values
+	MOVQ    ctx+0(FP), CX
+	MOVBQZX 8(CX), DI
+	MOVQ    16(CX), BX
+	MOVQ    48(CX), SI
+	MOVQ    24(CX), R8
+	MOVQ    32(CX), R9
+	MOVQ    (CX), R10
+
+	// Main loop
+main_loop:
+	XORL  DX, DX
+	CMPQ  BX, SI
+	SETGE DL
+
+	// br0.fillFast32()
+	MOVQ    32(R10), R11
+	MOVBQZX 40(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill0
+	MOVQ    24(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    (R10), R14
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 24(R10)
+	ORQ  R14, R11
+
+	// exhausted += (br0.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
+
+skip_fill0:
+	// val0 := br0.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br0.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br0.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v1 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br0.advance(uint8(v1.entry)
+	MOVB   CH, AH
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// val2 := br0.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v2 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br0.advance(uint8(v2.entry)
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val3 := br0.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v3 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br0.advance(uint8(v3.entry)
+	MOVB   CH, AL
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// these four writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
+	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
+	MOVL AX, (BX)
+
+	// update the bitreader structure
+	MOVQ R11, 32(R10)
+	MOVB R12, 40(R10)
+
+	// br1.fillFast32()
+	MOVQ    80(R10), R11
+	MOVBQZX 88(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill1
+	MOVQ    72(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    48(R10), R14
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 72(R10)
+	ORQ  R14, R11
+
+	// exhausted += (br1.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
+
+skip_fill1:
+	// val0 := br1.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br1.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br1.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v1 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br1.advance(uint8(v1.entry)
+	MOVB   CH, AH
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// val2 := br1.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v2 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br1.advance(uint8(v2.entry)
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val3 := br1.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v3 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br1.advance(uint8(v3.entry)
+	MOVB   CH, AL
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// these four writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
+	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
+	MOVL AX, (BX)(R8*1)
+
+	// update the bitreader structure
+	MOVQ R11, 80(R10)
+	MOVB R12, 88(R10)
+
+	// br2.fillFast32()
+	MOVQ    128(R10), R11
+	MOVBQZX 136(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill2
+	MOVQ    120(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    96(R10), R14
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 120(R10)
+	ORQ  R14, R11
+
+	// exhausted += (br2.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
+
+skip_fill2:
+	// val0 := br2.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br2.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br2.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v1 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br2.advance(uint8(v1.entry)
+	MOVB   CH, AH
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// val2 := br2.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v2 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br2.advance(uint8(v2.entry)
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val3 := br2.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v3 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br2.advance(uint8(v3.entry)
+	MOVB   CH, AL
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// these four writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
+	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
+	MOVL AX, (BX)(R8*2)
+
+	// update the bitreader structure
+	MOVQ R11, 128(R10)
+	MOVB R12, 136(R10)
+
+	// br3.fillFast32()
+	MOVQ    176(R10), R11
+	MOVBQZX 184(R10), R12
+	CMPQ    R12, $0x20
+	JBE     skip_fill3
+	MOVQ    168(R10), R13
+	SUBQ    $0x20, R12
+	SUBQ    $0x04, R13
+	MOVQ    144(R10), R14
+
+	// b.value |= uint64(low) << (b.bitsRead & 63)
+	MOVL (R13)(R14*1), R14
+	MOVQ R12, CX
+	SHLQ CL, R14
+	MOVQ R13, 168(R10)
+	ORQ  R14, R11
+
+	// exhausted += (br3.off < 4)
+	CMPQ R13, $0x04
+	ADCB $+0, DL
+
+skip_fill3:
+	// val0 := br3.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v0 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br3.advance(uint8(v0.entry)
+	MOVB CH, AL
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val1 := br3.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v1 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br3.advance(uint8(v1.entry)
+	MOVB   CH, AH
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// val2 := br3.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v2 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br3.advance(uint8(v2.entry)
+	MOVB CH, AH
+	SHLQ CL, R11
+	ADDB CL, R12
+
+	// val3 := br3.peekTopBits(peekBits)
+	MOVQ R11, R13
+	MOVQ DI, CX
+	SHRQ CL, R13
+
+	// v3 := table[val0&mask]
+	MOVW (R9)(R13*2), CX
+
+	// br3.advance(uint8(v3.entry)
+	MOVB   CH, AL
+	SHLQ   CL, R11
+	ADDB   CL, R12
+	BSWAPL AX
+
+	// these four writes get coalesced
+	// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
+	// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
+	// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
+	// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
+	LEAQ (R8)(R8*2), CX
+	MOVL AX, (BX)(CX*1)
+
+	// update the bitreader structure
+	MOVQ  R11, 176(R10)
+	MOVB  R12, 184(R10)
+	ADDQ  $0x04, BX
+	TESTB DL, DL
+	JZ    main_loop
+	MOVQ  ctx+0(FP), AX
+	SUBQ  16(AX), BX
+	SHLQ  $0x02, BX
+	MOVQ  BX, 40(AX)
+	RET
+
+// func decompress1x_main_loop_amd64(ctx *decompress1xContext)
+TEXT ·decompress1x_main_loop_amd64(SB), $0-8
+	MOVQ    ctx+0(FP), CX
+	MOVQ    16(CX), DX
+	MOVQ    24(CX), BX
+	CMPQ    BX, $0x04
+	JB      error_max_decoded_size_exceeded
+	LEAQ    (DX)(BX*1), BX
+	MOVQ    (CX), SI
+	MOVQ    (SI), R8
+	MOVQ    24(SI), R9
+	MOVQ    32(SI), R10
+	MOVBQZX 40(SI), R11
+	MOVQ    32(CX), SI
+	MOVBQZX 8(CX), DI
+	JMP     loop_condition
+
+main_loop:
+	// Check if we have room for 4 bytes in the output buffer
+	LEAQ 4(DX), CX
+	CMPQ CX, BX
+	JGE  error_max_decoded_size_exceeded
+
+	// Decode 4 values
+	CMPQ R11, $0x20
+	JL   bitReader_fillFast_1_end
+	SUBQ $0x20, R11
+	SUBQ $0x04, R9
+	MOVL (R8)(R9*1), R12
+	MOVQ R11, CX
+	SHLQ CL, R12
+	ORQ  R12, R10
+
+bitReader_fillFast_1_end:
+	MOVQ    DI, CX
+	MOVQ    R10, R12
+	SHRQ    CL, R12
+	MOVW    (SI)(R12*2), CX
+	MOVB    CH, AL
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLQ    CL, R10
+	MOVQ    DI, CX
+	MOVQ    R10, R12
+	SHRQ    CL, R12
+	MOVW    (SI)(R12*2), CX
+	MOVB    CH, AH
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLQ    CL, R10
+	BSWAPL  AX
+	CMPQ    R11, $0x20
+	JL      bitReader_fillFast_2_end
+	SUBQ    $0x20, R11
+	SUBQ    $0x04, R9
+	MOVL    (R8)(R9*1), R12
+	MOVQ    R11, CX
+	SHLQ    CL, R12
+	ORQ     R12, R10
+
+bitReader_fillFast_2_end:
+	MOVQ    DI, CX
+	MOVQ    R10, R12
+	SHRQ    CL, R12
+	MOVW    (SI)(R12*2), CX
+	MOVB    CH, AH
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLQ    CL, R10
+	MOVQ    DI, CX
+	MOVQ    R10, R12
+	SHRQ    CL, R12
+	MOVW    (SI)(R12*2), CX
+	MOVB    CH, AL
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLQ    CL, R10
+	BSWAPL  AX
+
+	// Store the decoded values
+	MOVL AX, (DX)
+	ADDQ $0x04, DX
+
+loop_condition:
+	CMPQ R9, $0x08
+	JGE  main_loop
+
+	// Update ctx structure
+	MOVQ ctx+0(FP), AX
+	SUBQ 16(AX), DX
+	MOVQ DX, 40(AX)
+	MOVQ (AX), AX
+	MOVQ R9, 24(AX)
+	MOVQ R10, 32(AX)
+	MOVB R11, 40(AX)
+	RET
+
+	// Report error
+error_max_decoded_size_exceeded:
+	MOVQ ctx+0(FP), AX
+	MOVQ $-1, CX
+	MOVQ CX, 40(AX)
+	RET
+
+// func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
+// Requires: BMI2
+TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
+	MOVQ    ctx+0(FP), CX
+	MOVQ    16(CX), DX
+	MOVQ    24(CX), BX
+	CMPQ    BX, $0x04
+	JB      error_max_decoded_size_exceeded
+	LEAQ    (DX)(BX*1), BX
+	MOVQ    (CX), SI
+	MOVQ    (SI), R8
+	MOVQ    24(SI), R9
+	MOVQ    32(SI), R10
+	MOVBQZX 40(SI), R11
+	MOVQ    32(CX), SI
+	MOVBQZX 8(CX), DI
+	JMP     loop_condition
+
+main_loop:
+	// Check if we have room for 4 bytes in the output buffer
+	LEAQ 4(DX), CX
+	CMPQ CX, BX
+	JGE  error_max_decoded_size_exceeded
+
+	// Decode 4 values
+	CMPQ  R11, $0x20
+	JL    bitReader_fillFast_1_end
+	SUBQ  $0x20, R11
+	SUBQ  $0x04, R9
+	MOVL  (R8)(R9*1), CX
+	SHLXQ R11, CX, CX
+	ORQ   CX, R10
+
+bitReader_fillFast_1_end:
+	SHRXQ   DI, R10, CX
+	MOVW    (SI)(CX*2), CX
+	MOVB    CH, AL
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLXQ   CX, R10, R10
+	SHRXQ   DI, R10, CX
+	MOVW    (SI)(CX*2), CX
+	MOVB    CH, AH
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLXQ   CX, R10, R10
+	BSWAPL  AX
+	CMPQ    R11, $0x20
+	JL      bitReader_fillFast_2_end
+	SUBQ    $0x20, R11
+	SUBQ    $0x04, R9
+	MOVL    (R8)(R9*1), CX
+	SHLXQ   R11, CX, CX
+	ORQ     CX, R10
+
+bitReader_fillFast_2_end:
+	SHRXQ   DI, R10, CX
+	MOVW    (SI)(CX*2), CX
+	MOVB    CH, AH
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLXQ   CX, R10, R10
+	SHRXQ   DI, R10, CX
+	MOVW    (SI)(CX*2), CX
+	MOVB    CH, AL
+	MOVBQZX CL, CX
+	ADDQ    CX, R11
+	SHLXQ   CX, R10, R10
+	BSWAPL  AX
+
+	// Store the decoded values
+	MOVL AX, (DX)
+	ADDQ $0x04, DX
+
+loop_condition:
+	CMPQ R9, $0x08
+	JGE  main_loop
+
+	// Update ctx structure
+	MOVQ ctx+0(FP), AX
+	SUBQ 16(AX), DX
+	MOVQ DX, 40(AX)
+	MOVQ (AX), AX
+	MOVQ R9, 24(AX)
+	MOVQ R10, 32(AX)
+	MOVB R11, 40(AX)
+	RET
+
+	// Report error
+error_max_decoded_size_exceeded:
+	MOVQ ctx+0(FP), AX
+	MOVQ $-1, CX
+	MOVQ CX, 40(AX)
+	RET
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
new file mode 100644
index 00000000000..908c17de63f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
@@ -0,0 +1,299 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// This file contains a generic implementation of Decoder.Decompress4X.
+package huff0
+
+import (
+	"errors"
+	"fmt"
+)
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
+	if len(d.dt.single) == 0 {
+		return nil, errors.New("no table loaded")
+	}
+	if len(src) < 6+(4*1) {
+		return nil, errors.New("input too small")
+	}
+	if use8BitTables && d.actualTableLog <= 8 {
+		return d.decompress4X8bit(dst, src)
+	}
+
+	var br [4]bitReaderShifted
+	// Decode "jump table"
+	start := 6
+	for i := 0; i < 3; i++ {
+		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+		if start+length >= len(src) {
+			return nil, errors.New("truncated input (or invalid offset)")
+		}
+		err := br[i].init(src[start : start+length])
+		if err != nil {
+			return nil, err
+		}
+		start += length
+	}
+	err := br[3].init(src[start:])
+	if err != nil {
+		return nil, err
+	}
+
+	// destination, offset to match first output
+	dstSize := cap(dst)
+	dst = dst[:dstSize]
+	out := dst
+	dstEvery := (dstSize + 3) / 4
+
+	const tlSize = 1 << tableLogMax
+	const tlMask = tlSize - 1
+	single := d.dt.single[:tlSize]
+
+	// Use temp table to avoid bound checks/append penalty.
+	buf := d.buffer()
+	var off uint8
+	var decoded int
+
+	// Decode 2 values from each decoder/loop.
+	const bufoff = 256
+	for {
+		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+			break
+		}
+
+		{
+			const stream = 0
+			const stream2 = 1
+			br[stream].fillFast()
+			br[stream2].fillFast()
+
+			val := br[stream].peekBitsFast(d.actualTableLog)
+			val2 := br[stream2].peekBitsFast(d.actualTableLog)
+			v := single[val&tlMask]
+			v2 := single[val2&tlMask]
+			br[stream].advance(uint8(v.entry))
+			br[stream2].advance(uint8(v2.entry))
+			buf[stream][off] = uint8(v.entry >> 8)
+			buf[stream2][off] = uint8(v2.entry >> 8)
+
+			val = br[stream].peekBitsFast(d.actualTableLog)
+			val2 = br[stream2].peekBitsFast(d.actualTableLog)
+			v = single[val&tlMask]
+			v2 = single[val2&tlMask]
+			br[stream].advance(uint8(v.entry))
+			br[stream2].advance(uint8(v2.entry))
+			buf[stream][off+1] = uint8(v.entry >> 8)
+			buf[stream2][off+1] = uint8(v2.entry >> 8)
+		}
+
+		{
+			const stream = 2
+			const stream2 = 3
+			br[stream].fillFast()
+			br[stream2].fillFast()
+
+			val := br[stream].peekBitsFast(d.actualTableLog)
+			val2 := br[stream2].peekBitsFast(d.actualTableLog)
+			v := single[val&tlMask]
+			v2 := single[val2&tlMask]
+			br[stream].advance(uint8(v.entry))
+			br[stream2].advance(uint8(v2.entry))
+			buf[stream][off] = uint8(v.entry >> 8)
+			buf[stream2][off] = uint8(v2.entry >> 8)
+
+			val = br[stream].peekBitsFast(d.actualTableLog)
+			val2 = br[stream2].peekBitsFast(d.actualTableLog)
+			v = single[val&tlMask]
+			v2 = single[val2&tlMask]
+			br[stream].advance(uint8(v.entry))
+			br[stream2].advance(uint8(v2.entry))
+			buf[stream][off+1] = uint8(v.entry >> 8)
+			buf[stream2][off+1] = uint8(v2.entry >> 8)
+		}
+
+		off += 2
+
+		if off == 0 {
+			if bufoff > dstEvery {
+				d.bufs.Put(buf)
+				return nil, errors.New("corruption detected: stream overrun 1")
+			}
+			// There must at least be 3 buffers left.
+			if len(out)-bufoff < dstEvery*3 {
+				d.bufs.Put(buf)
+				return nil, errors.New("corruption detected: stream overrun 2")
+			}
+			//copy(out, buf[0][:])
+			//copy(out[dstEvery:], buf[1][:])
+			//copy(out[dstEvery*2:], buf[2][:])
+			//copy(out[dstEvery*3:], buf[3][:])
+			*(*[bufoff]byte)(out) = buf[0]
+			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+			out = out[bufoff:]
+			decoded += bufoff * 4
+		}
+	}
+	if off > 0 {
+		ioff := int(off)
+		if len(out) < dstEvery*3+ioff {
+			d.bufs.Put(buf)
+			return nil, errors.New("corruption detected: stream overrun 3")
+		}
+		copy(out, buf[0][:off])
+		copy(out[dstEvery:], buf[1][:off])
+		copy(out[dstEvery*2:], buf[2][:off])
+		copy(out[dstEvery*3:], buf[3][:off])
+		decoded += int(off) * 4
+		out = out[off:]
+	}
+
+	// Decode remaining.
+	remainBytes := dstEvery - (decoded / 4)
+	for i := range br {
+		offset := dstEvery * i
+		endsAt := offset + remainBytes
+		if endsAt > len(out) {
+			endsAt = len(out)
+		}
+		br := &br[i]
+		bitsLeft := br.remaining()
+		for bitsLeft > 0 {
+			br.fill()
+			if offset >= endsAt {
+				d.bufs.Put(buf)
+				return nil, errors.New("corruption detected: stream overrun 4")
+			}
+
+			// Read value and increment offset.
+			val := br.peekBitsFast(d.actualTableLog)
+			v := single[val&tlMask].entry
+			nBits := uint8(v)
+			br.advance(nBits)
+			bitsLeft -= uint(nBits)
+			out[offset] = uint8(v >> 8)
+			offset++
+		}
+		if offset != endsAt {
+			d.bufs.Put(buf)
+			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
+		}
+		decoded += offset - dstEvery*i
+		err = br.close()
+		if err != nil {
+			return nil, err
+		}
+	}
+	d.bufs.Put(buf)
+	if dstSize != decoded {
+		return nil, errors.New("corruption detected: short output block")
+	}
+	return dst, nil
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
+	if len(d.dt.single) == 0 {
+		return nil, errors.New("no table loaded")
+	}
+	if use8BitTables && d.actualTableLog <= 8 {
+		return d.decompress1X8Bit(dst, src)
+	}
+	var br bitReaderShifted
+	err := br.init(src)
+	if err != nil {
+		return dst, err
+	}
+	maxDecodedSize := cap(dst)
+	dst = dst[:0]
+
+	// Avoid bounds check by always having full sized table.
+	const tlSize = 1 << tableLogMax
+	const tlMask = tlSize - 1
+	dt := d.dt.single[:tlSize]
+
+	// Use temp table to avoid bound checks/append penalty.
+	bufs := d.buffer()
+	buf := &bufs[0]
+	var off uint8
+
+	for br.off >= 8 {
+		br.fillFast()
+		v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+		br.advance(uint8(v.entry))
+		buf[off+0] = uint8(v.entry >> 8)
+
+		v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+		br.advance(uint8(v.entry))
+		buf[off+1] = uint8(v.entry >> 8)
+
+		// Refill
+		br.fillFast()
+
+		v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+		br.advance(uint8(v.entry))
+		buf[off+2] = uint8(v.entry >> 8)
+
+		v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+		br.advance(uint8(v.entry))
+		buf[off+3] = uint8(v.entry >> 8)
+
+		off += 4
+		if off == 0 {
+			if len(dst)+256 > maxDecodedSize {
+				br.close()
+				d.bufs.Put(bufs)
+				return nil, ErrMaxDecodedSizeExceeded
+			}
+			dst = append(dst, buf[:]...)
+		}
+	}
+
+	if len(dst)+int(off) > maxDecodedSize {
+		d.bufs.Put(bufs)
+		br.close()
+		return nil, ErrMaxDecodedSizeExceeded
+	}
+	dst = append(dst, buf[:off]...)
+
+	// br < 8, so uint8 is fine
+	bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
+	for bitsLeft > 0 {
+		br.fill()
+		if false && br.bitsRead >= 32 {
+			if br.off >= 4 {
+				v := br.in[br.off-4:]
+				v = v[:4]
+				low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+				br.value = (br.value << 32) | uint64(low)
+				br.bitsRead -= 32
+				br.off -= 4
+			} else {
+				for br.off > 0 {
+					br.value = (br.value << 8) | uint64(br.in[br.off-1])
+					br.bitsRead -= 8
+					br.off--
+				}
+			}
+		}
+		if len(dst) >= maxDecodedSize {
+			d.bufs.Put(bufs)
+			br.close()
+			return nil, ErrMaxDecodedSizeExceeded
+		}
+		v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
+		nBits := uint8(v.entry)
+		br.advance(nBits)
+		bitsLeft -= nBits
+		dst = append(dst, uint8(v.entry>>8))
+	}
+	d.bufs.Put(bufs)
+	return dst, br.close()
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
index 3ee00ecb470..77ecd68e0a7 100644
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"math"
 	"math/bits"
+	"sync"
 
 	"github.com/klauspost/compress/fse"
 )
@@ -87,7 +88,7 @@ type Scratch struct {
 	// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
 	MaxDecodedSize int
 
-	br byteReader
+	srcLen int
 
 	// MaxSymbolValue will override the maximum symbol value of the next block.
 	MaxSymbolValue uint8
@@ -116,6 +117,7 @@ type Scratch struct {
 	nodes          []nodeElt
 	tmpOut         [4][]byte
 	fse            *fse.Scratch
+	decPool        sync.Pool // *[4][256]byte buffers.
 	huffWeight     [maxSymbolValue + 1]byte
 }
 
@@ -168,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) {
 	if s.fse == nil {
 		s.fse = &fse.Scratch{}
 	}
-	s.br.init(in)
+	s.srcLen = len(in)
 
 	return s, nil
 }
diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go
new file mode 100644
index 00000000000..3954c51219b
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go
@@ -0,0 +1,34 @@
+// Package cpuinfo gives runtime info about the current CPU.
+//
+// This is a very limited module meant for use internally
+// in this project. For more versatile solution check
+// https://github.com/klauspost/cpuid.
+package cpuinfo
+
+// HasBMI1 checks whether an x86 CPU supports the BMI1 extension.
+func HasBMI1() bool {
+	return hasBMI1
+}
+
+// HasBMI2 checks whether an x86 CPU supports the BMI2 extension.
+func HasBMI2() bool {
+	return hasBMI2
+}
+
+// DisableBMI2 will disable BMI2, for testing purposes.
+// Call returned function to restore previous state.
+func DisableBMI2() func() {
+	old := hasBMI2
+	hasBMI2 = false
+	return func() {
+		hasBMI2 = old
+	}
+}
+
+// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions.
+func HasBMI() bool {
+	return HasBMI1() && HasBMI2()
+}
+
+var hasBMI1 bool
+var hasBMI2 bool
diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
new file mode 100644
index 00000000000..e802579c4f9
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
@@ -0,0 +1,11 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+package cpuinfo
+
+// go:noescape
+func x86extensions() (bmi1, bmi2 bool)
+
+func init() {
+	hasBMI1, hasBMI2 = x86extensions()
+}
diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
new file mode 100644
index 00000000000..4465fbe9e90
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
@@ -0,0 +1,36 @@
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+#include "funcdata.h"
+#include "go_asm.h"
+
+TEXT ·x86extensions(SB), NOSPLIT, $0
+	// 1. determine max EAX value
+	XORQ AX, AX
+	CPUID
+
+	CMPQ AX, $7
+	JB   unsupported
+
+	// 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction"
+	MOVQ $7, AX
+	MOVQ $0, CX
+	CPUID
+
+	BTQ   $3, BX // bit 3 = BMI1
+	SETCS AL
+
+	BTQ   $8, BX // bit 8 = BMI2
+	SETCS AH
+
+	MOVB AL, bmi1+0(FP)
+	MOVB AH, bmi2+1(FP)
+	RET
+
+unsupported:
+	XORQ AX, AX
+	MOVB AL, bmi1+0(FP)
+	MOVB AL, bmi2+1(FP)
+	RET
diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
index 511bba65db8..2aa6a95a028 100644
--- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
@@ -18,6 +18,7 @@ func load64(b []byte, i int) uint64 {
 // emitLiteral writes a literal chunk and returns the number of bytes written.
 //
 // It assumes that:
+//
 //	dst is long enough to hold the encoded bytes
 //	1 <= len(lit) && len(lit) <= 65536
 func emitLiteral(dst, lit []byte) int {
@@ -42,6 +43,7 @@ func emitLiteral(dst, lit []byte) int {
 // emitCopy writes a copy chunk and returns the number of bytes written.
 //
 // It assumes that:
+//
 //	dst is long enough to hold the encoded bytes
 //	1 <= offset && offset <= 65535
 //	4 <= length && length <= 65535
@@ -85,28 +87,40 @@ func emitCopy(dst []byte, offset, length int) int {
 	return i + 2
 }
 
-// extendMatch returns the largest k such that k <= len(src) and that
-// src[i:i+k-j] and src[j:k] have the same contents.
-//
-// It assumes that:
-//	0 <= i && i < j && j <= len(src)
-func extendMatch(src []byte, i, j int) int {
-	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
-	}
-	return j
-}
-
 func hash(u, shift uint32) uint32 {
 	return (u * 0x1e35a7bd) >> shift
 }
 
+// EncodeBlockInto exposes encodeBlock but checks dst size.
+func EncodeBlockInto(dst, src []byte) (d int) {
+	if MaxEncodedLen(len(src)) > len(dst) {
+		return 0
+	}
+
+	// encodeBlock breaks on too big blocks, so split.
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		if len(p) < minNonLiteralBlockSize {
+			d += emitLiteral(dst[d:], p)
+		} else {
+			d += encodeBlock(dst[d:], p)
+		}
+	}
+	return d
+}
+
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
 //
 // It also assumes that:
+//
 //	len(dst) >= MaxEncodedLen(len(src)) &&
-// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 func encodeBlock(dst, src []byte) (d int) {
 	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
 	// The table element type is uint16, as s < sLimit and sLimit < len(src)
diff --git a/vendor/github.com/klauspost/compress/s2sx.mod b/vendor/github.com/klauspost/compress/s2sx.mod
index 2263853fcad..5a4412f9070 100644
--- a/vendor/github.com/klauspost/compress/s2sx.mod
+++ b/vendor/github.com/klauspost/compress/s2sx.mod
@@ -1,4 +1,4 @@
 module github.com/klauspost/compress
 
-go 1.16
+go 1.19
 
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index c8f0f16fc1e..92e2347bbc0 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -12,6 +12,8 @@ The `zstd` package is provided as open source software using a Go standard licen
 
 Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
 
+For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go).
+
 ## Installation
 
 Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
@@ -78,6 +80,9 @@ of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is
 in the future. So if you want to limit concurrency for future updates, specify the concurrency
 you would like.
 
+If you would like stream encoding to be done without spawning async goroutines, use `WithEncoderConcurrency(1)`
+which will compress input as each block is completed, blocking on writes until each has completed.
+
 You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined 
 compression settings can be specified.
 
@@ -104,7 +109,8 @@ and seems to ignore concatenated streams, even though [it is part of the spec](h
 For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`.
 
 `EncodeAll` will encode all input in src and append it to dst.
-This function can be called concurrently, but each call will only run on a single goroutine.
+This function can be called concurrently. 
+Each call will only run on a same goroutine as the caller.
 
 Encoded blocks can be concatenated and the result will be the combined input stream.
 Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`.
@@ -149,10 +155,10 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
 
 This package:
 file    out     level   insize      outsize     millis  mb/s
-silesia.tar zskp    1   211947520   73101992    643     313.87
-silesia.tar zskp    2   211947520   67504318    969     208.38
-silesia.tar zskp    3   211947520   64595893    2007    100.68
-silesia.tar zskp    4   211947520   60995370    8825    22.90
+silesia.tar zskp    1   211947520   73821326    634     318.47
+silesia.tar zskp    2   211947520   67655404    1508    133.96
+silesia.tar zskp    3   211947520   64746933    3000    67.37
+silesia.tar zskp    4   211947520   60073508    16926   11.94
 
 cgo zstd:
 silesia.tar zstd    1   211947520   73605392    543     371.56
@@ -161,99 +167,99 @@ silesia.tar zstd    6   211947520   62916450    1913    105.66
 silesia.tar zstd    9   211947520   60212393    5063    39.92
 
 gzip, stdlib/this package:
-silesia.tar gzstd   1   211947520   80007735    1654    122.21
-silesia.tar gzkp    1   211947520   80136201    1152    175.45
+silesia.tar gzstd   1   211947520   80007735    1498    134.87
+silesia.tar gzkp    1   211947520   80088272    1009    200.31
 
 GOB stream of binary data. Highly compressible.
 https://files.klauspost.com/compress/gob-stream.7z
 
 file        out     level   insize  outsize     millis  mb/s
-gob-stream  zskp    1   1911399616  235022249   3088    590.30
-gob-stream  zskp    2   1911399616  205669791   3786    481.34
-gob-stream  zskp    3   1911399616  175034659   9636    189.17
-gob-stream  zskp    4   1911399616  165609838   50369   36.19
+gob-stream  zskp    1   1911399616  233948096   3230    564.34
+gob-stream  zskp    2   1911399616  203997694   4997    364.73
+gob-stream  zskp    3   1911399616  173526523   13435   135.68
+gob-stream  zskp    4   1911399616  162195235   47559   38.33
 
 gob-stream  zstd    1   1911399616  249810424   2637    691.26
 gob-stream  zstd    3   1911399616  208192146   3490    522.31
 gob-stream  zstd    6   1911399616  193632038   6687    272.56
 gob-stream  zstd    9   1911399616  177620386   16175   112.70
 
-gob-stream  gzstd   1   1911399616  357382641   10251   177.82
-gob-stream  gzkp    1   1911399616  359753026   5438    335.20
+gob-stream  gzstd   1   1911399616  357382013   9046    201.49
+gob-stream  gzkp    1   1911399616  359136669   4885    373.08
 
 The test data for the Large Text Compression Benchmark is the first
 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
 http://mattmahoney.net/dc/textdata.html
 
 file    out level   insize      outsize     millis  mb/s
-enwik9  zskp    1   1000000000  343848582   3609    264.18
-enwik9  zskp    2   1000000000  317276632   5746    165.97
-enwik9  zskp    3   1000000000  292243069   12162   78.41
-enwik9  zskp    4   1000000000  262183768   82837   11.51
+enwik9  zskp    1   1000000000  343833605   3687    258.64
+enwik9  zskp    2   1000000000  317001237   7672    124.29
+enwik9  zskp    3   1000000000  291915823   15923   59.89
+enwik9  zskp    4   1000000000  261710291   77697   12.27
 
 enwik9  zstd    1   1000000000  358072021   3110    306.65
 enwik9  zstd    3   1000000000  313734672   4784    199.35
 enwik9  zstd    6   1000000000  295138875   10290   92.68
 enwik9  zstd    9   1000000000  278348700   28549   33.40
 
-enwik9  gzstd   1   1000000000  382578136   9604    99.30
-enwik9  gzkp    1   1000000000  383825945   6544    145.73
+enwik9  gzstd   1   1000000000  382578136   8608    110.78
+enwik9  gzkp    1   1000000000  382781160   5628    169.45
 
 Highly compressible JSON file.
 https://files.klauspost.com/compress/github-june-2days-2019.json.zst
 
 file                        out level   insize      outsize     millis  mb/s
-github-june-2days-2019.json zskp    1   6273951764  699045015   10620   563.40
-github-june-2days-2019.json zskp    2   6273951764  617881763   11687   511.96
-github-june-2days-2019.json zskp    3   6273951764  524340691   34043   175.75
-github-june-2days-2019.json zskp    4   6273951764  470320075   170190  35.16
+github-june-2days-2019.json zskp    1   6273951764  697439532   9789    611.17
+github-june-2days-2019.json zskp    2   6273951764  610876538   18553   322.49
+github-june-2days-2019.json zskp    3   6273951764  517662858   44186   135.41
+github-june-2days-2019.json zskp    4   6273951764  464617114   165373  36.18
 
 github-june-2days-2019.json zstd    1   6273951764  766284037   8450    708.00
 github-june-2days-2019.json zstd    3   6273951764  661889476   10927   547.57
 github-june-2days-2019.json zstd    6   6273951764  642756859   22996   260.18
 github-june-2days-2019.json zstd    9   6273951764  601974523   52413   114.16
 
-github-june-2days-2019.json gzstd   1   6273951764  1164400847  29948   199.79
-github-june-2days-2019.json gzkp    1   6273951764  1125417694  21788   274.61
+github-june-2days-2019.json gzstd   1   6273951764  1164397768  26793   223.32
+github-june-2days-2019.json gzkp    1   6273951764  1120631856  17693   338.16
 
 VM Image, Linux mint with a few installed applications:
 https://files.klauspost.com/compress/rawstudio-mint14.7z
 
 file                    out level   insize      outsize     millis  mb/s
-rawstudio-mint14.tar    zskp    1   8558382592  3667489370  20210   403.84
-rawstudio-mint14.tar    zskp    2   8558382592  3364592300  31873   256.07
-rawstudio-mint14.tar    zskp    3   8558382592  3158085214  77675   105.08
-rawstudio-mint14.tar    zskp    4   8558382592  2965110639  857750  9.52
+rawstudio-mint14.tar    zskp    1   8558382592  3718400221  18206   448.29
+rawstudio-mint14.tar    zskp    2   8558382592  3326118337  37074   220.15
+rawstudio-mint14.tar    zskp    3   8558382592  3163842361  87306   93.49
+rawstudio-mint14.tar    zskp    4   8558382592  2970480650  783862  10.41
 
 rawstudio-mint14.tar    zstd    1   8558382592  3609250104  17136   476.27
 rawstudio-mint14.tar    zstd    3   8558382592  3341679997  29262   278.92
 rawstudio-mint14.tar    zstd    6   8558382592  3235846406  77904   104.77
 rawstudio-mint14.tar    zstd    9   8558382592  3160778861  140946  57.91
 
-rawstudio-mint14.tar    gzstd   1   8558382592  3926257486  57722   141.40
-rawstudio-mint14.tar    gzkp    1   8558382592  3962605659  45113   180.92
+rawstudio-mint14.tar    gzstd   1   8558382592  3926234992  51345   158.96
+rawstudio-mint14.tar    gzkp    1   8558382592  3960117298  36722   222.26
 
 CSV data:
 https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
 
 file                    out level   insize      outsize     millis  mb/s
-nyc-taxi-data-10M.csv   zskp    1   3325605752  641339945   8925    355.35
-nyc-taxi-data-10M.csv   zskp    2   3325605752  591748091   11268   281.44
-nyc-taxi-data-10M.csv   zskp    3   3325605752  530289687   25239   125.66
-nyc-taxi-data-10M.csv   zskp    4   3325605752  476268884   135958  23.33
+nyc-taxi-data-10M.csv   zskp    1   3325605752  641319332   9462    335.17
+nyc-taxi-data-10M.csv   zskp    2   3325605752  588976126   17570   180.50
+nyc-taxi-data-10M.csv   zskp    3   3325605752  529329260   32432   97.79
+nyc-taxi-data-10M.csv   zskp    4   3325605752  474949772   138025  22.98
 
 nyc-taxi-data-10M.csv   zstd    1   3325605752  687399637   8233    385.18
 nyc-taxi-data-10M.csv   zstd    3   3325605752  598514411   10065   315.07
 nyc-taxi-data-10M.csv   zstd    6   3325605752  570522953   20038   158.27
 nyc-taxi-data-10M.csv   zstd    9   3325605752  517554797   64565   49.12
 
-nyc-taxi-data-10M.csv   gzstd   1   3325605752  928656485   23876   132.83
-nyc-taxi-data-10M.csv   gzkp    1   3325605752  922257165   16780   189.00
+nyc-taxi-data-10M.csv   gzstd   1   3325605752  928654908   21270   149.11
+nyc-taxi-data-10M.csv   gzkp    1   3325605752  922273214   13929   227.68
 ```
 
 ## Decompressor
 
-Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
+Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
 
 This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
 kindly supplied by [fuzzit.dev](https://fuzzit.dev/). 
@@ -283,8 +289,13 @@ func Decompress(in io.Reader, out io.Writer) error {
 }
 ```
 
-It is important to use the "Close" function when you no longer need the Reader to stop running goroutines. 
-See "Allocation-less operation" below.
+It is important to use the "Close" function when you no longer need the Reader to stop running goroutines, 
+when running with default settings.
+Goroutines will exit once an error has been returned, including `io.EOF` at the end of a stream.
+
+Streams are decoded concurrently in 4 asynchronous stages to give the best possible throughput.
+However, if you prefer synchronous decompression, use `WithDecoderConcurrency(1)` which will decompress data 
+as it is being requested only.
 
 For decoding buffers, it could look something like this:
 
@@ -293,7 +304,7 @@ import "github.com/klauspost/compress/zstd"
 
 // Create a reader that caches decompressors.
 // For this operation type we supply a nil Reader.
-var decoder, _ = zstd.NewReader(nil)
+var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0))
 
 // Decompress a buffer. We don't supply a destination buffer,
 // so it will be allocated by the decoder.
@@ -303,9 +314,12 @@ func Decompress(src []byte) ([]byte, error) {
 ```
 
 Both of these cases should provide the functionality needed. 
-The decoder can be used for *concurrent* decompression of multiple buffers. 
+The decoder can be used for *concurrent* decompression of multiple buffers.
+By default 4 decompressors will be created. 
+
 It will only allow a certain number of concurrent operations to run. 
-To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.   
+To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.
+It is possible to use `WithDecoderConcurrency(0)` to create GOMAXPROCS decoders.
 
 ### Dictionaries
 
@@ -357,62 +371,48 @@ In this case no unneeded allocations should be made.
 The buffer decoder does everything on the same goroutine and does nothing concurrently.
 It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that.
 
-The stream decoder operates on
+The stream decoder will create goroutines that:
 
-* One goroutine reads input and splits the input to several block decoders.
-* A number of decoders will decode blocks.
-* A goroutine coordinates these blocks and sends history from one to the next.
+1) Reads input and splits the input into blocks.
+2) Decompression of literals.
+3) Decompression of sequences.
+4) Reconstruction of output stream.
 
 So effectively this also means the decoder will "read ahead" and prepare data to always be available for output.
 
+The concurrency level will, for streams, determine how many blocks ahead the compression will start.
+
 Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency.
 
-In practice this means that concurrency is often limited to utilizing about 2 cores effectively.
- 
- 
+In practice this means that concurrency is often limited to utilizing about 3 cores effectively.
+  
 ### Benchmarks
 
-These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
-
 The first two are streaming decodes and the last are smaller inputs. 
- 
+
+Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used.
+
 ```
-BenchmarkDecoderSilesia-8                          3     385000067 ns/op     550.51 MB/s        5498 B/op          8 allocs/op
-BenchmarkDecoderSilesiaCgo-8                       6     197666567 ns/op    1072.25 MB/s      270672 B/op          8 allocs/op
-
-BenchmarkDecoderEnwik9-8                           1    2027001600 ns/op     493.34 MB/s       10496 B/op         18 allocs/op
-BenchmarkDecoderEnwik9Cgo-8                        2     979499200 ns/op    1020.93 MB/s      270672 B/op          8 allocs/op
-
-Concurrent performance:
-
-BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16                28915         42469 ns/op    4340.07 MB/s         114 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16           116505          9965 ns/op    11900.16 MB/s         16 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16              8952        134272 ns/op    3588.70 MB/s         915 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16               11820        102538 ns/op    4161.90 MB/s         594 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16             34782         34184 ns/op    3661.88 MB/s          60 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16              27712         43447 ns/op    3500.58 MB/s          99 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16                 62826         18750 ns/op    21845.10 MB/s        104 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16          631545          1794 ns/op    57078.74 MB/s          2 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16         1690140           712 ns/op    172938.13 MB/s         1 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16                 10432        113593 ns/op    6180.73 MB/s        1143 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/html.zst-16                    113206         10671 ns/op    9596.27 MB/s          15 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16          1530615           779 ns/op    5229.49 MB/s           0 B/op          0 allocs/op
-
-BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16             65217         16192 ns/op    11383.34 MB/s         46 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16        292671          4039 ns/op    29363.19 MB/s          6 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16          26314         46021 ns/op    10470.43 MB/s        293 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16            33897         34900 ns/op    12227.96 MB/s        205 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16         104348         11433 ns/op    10949.01 MB/s         20 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16           75949         15510 ns/op    9805.60 MB/s          32 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16             173910          6756 ns/op    60624.29 MB/s         37 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16       923076          1339 ns/op    76474.87 MB/s          1 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16       922920          1351 ns/op    91102.57 MB/s          2 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16              27649         43618 ns/op    16096.19 MB/s        407 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16                 279073          4160 ns/op    24614.18 MB/s          6 B/op          0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16        749938          1579 ns/op    2581.71 MB/s           0 B/op          0 allocs/op
+BenchmarkDecoderSilesia-32    	                   5	 206878840 ns/op	1024.50 MB/s	   49808 B/op	      43 allocs/op
+BenchmarkDecoderEnwik9-32                          1	1271809000 ns/op	 786.28 MB/s	   72048 B/op	      52 allocs/op
+
+Concurrent blocks, performance:
+
+BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32         	   67356	     17857 ns/op	10321.96 MB/s	        22.48 pct	     102 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32     	  266656	      4421 ns/op	26823.21 MB/s	        11.89 pct	      19 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32      	   20992	     56842 ns/op	8477.17 MB/s	        39.90 pct	     754 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32        	   27456	     43932 ns/op	9714.01 MB/s	        33.27 pct	     524 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32      	   78432	     15047 ns/op	8319.15 MB/s	        40.34 pct	      66 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32       	   65800	     18436 ns/op	8249.63 MB/s	        37.75 pct	      88 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32          	  102993	     11523 ns/op	35546.09 MB/s	         3.637 pct	     143 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32    	 1000000	      1070 ns/op	95720.98 MB/s	        80.53 pct	       3 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32    	  749802	      1752 ns/op	70272.35 MB/s	       100.0 pct	       5 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32          	   22640	     52934 ns/op	13263.37 MB/s	        26.25 pct	    1014 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html.zst-32              	  226412	      5232 ns/op	19572.27 MB/s	        14.49 pct	      20 B/op	       0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32     	  923041	      1276 ns/op	3194.71 MB/s	        31.26 pct	       0 B/op	       0 allocs/op
 ```
 
-This reflects the performance around May 2020, but this may be out of date.
+This reflects the performance around May 2022, but this may be out of date.
 
 ## Zstd inside ZIP files
 
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go
index 85445853715..25ca983941d 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -7,6 +7,7 @@ package zstd
 import (
 	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
 	"math/bits"
 )
@@ -16,7 +17,6 @@ import (
 // for aligning the input.
 type bitReader struct {
 	in       []byte
-	off      uint   // next byte to read is at in[off - 1]
 	value    uint64 // Maybe use [16]byte, but shifting is awkward.
 	bitsRead uint8
 }
@@ -27,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
 		return errors.New("corrupt stream: too short")
 	}
 	b.in = in
-	b.off = uint(len(in))
 	// The highest bit of the last byte indicates where to start
 	v := in[len(in)-1]
 	if v == 0 {
@@ -50,16 +49,16 @@ func (b *bitReader) getBits(n uint8) int {
 	if n == 0 /*|| b.bitsRead >= 64 */ {
 		return 0
 	}
-	return b.getBitsFast(n)
+	return int(b.get32BitsFast(n))
 }
 
-// getBitsFast requires that at least one bit is requested every time.
+// get32BitsFast requires that at least one bit is requested every time.
 // There are no checks if the buffer is filled.
-func (b *bitReader) getBitsFast(n uint8) int {
+func (b *bitReader) get32BitsFast(n uint8) uint32 {
 	const regMask = 64 - 1
 	v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
 	b.bitsRead += n
-	return int(v)
+	return v
 }
 
 // fillFast() will make sure at least 32 bits are available.
@@ -68,21 +67,19 @@ func (b *bitReader) fillFast() {
 	if b.bitsRead < 32 {
 		return
 	}
-	// 2 bounds checks.
-	v := b.in[b.off-4:]
-	v = v[:4]
+	v := b.in[len(b.in)-4:]
+	b.in = b.in[:len(b.in)-4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value = (b.value << 32) | uint64(low)
 	b.bitsRead -= 32
-	b.off -= 4
 }
 
 // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
 func (b *bitReader) fillFastStart() {
-	// Do single re-slice to avoid bounds checks.
-	b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+	v := b.in[len(b.in)-8:]
+	b.in = b.in[:len(b.in)-8]
+	b.value = binary.LittleEndian.Uint64(v)
 	b.bitsRead = 0
-	b.off -= 8
 }
 
 // fill() will make sure at least 32 bits are available.
@@ -90,25 +87,25 @@ func (b *bitReader) fill() {
 	if b.bitsRead < 32 {
 		return
 	}
-	if b.off >= 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+	if len(b.in) >= 4 {
+		v := b.in[len(b.in)-4:]
+		b.in = b.in[:len(b.in)-4]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value = (b.value << 32) | uint64(low)
 		b.bitsRead -= 32
-		b.off -= 4
 		return
 	}
-	for b.off > 0 {
-		b.value = (b.value << 8) | uint64(b.in[b.off-1])
-		b.bitsRead -= 8
-		b.off--
+
+	b.bitsRead -= uint8(8 * len(b.in))
+	for len(b.in) > 0 {
+		b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
+		b.in = b.in[:len(b.in)-1]
 	}
 }
 
 // finished returns true if all bits have been read from the bit stream.
 func (b *bitReader) finished() bool {
-	return b.off == 0 && b.bitsRead >= 64
+	return len(b.in) == 0 && b.bitsRead >= 64
 }
 
 // overread returns true if more bits have been requested than is on the stream.
@@ -118,13 +115,16 @@ func (b *bitReader) overread() bool {
 
 // remain returns the number of bits remaining.
 func (b *bitReader) remain() uint {
-	return b.off*8 + 64 - uint(b.bitsRead)
+	return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
 }
 
 // close the bitstream and returns an error if out-of-buffer reads occurred.
 func (b *bitReader) close() error {
 	// Release reference.
 	b.in = nil
+	if !b.finished() {
+		return fmt.Errorf("%d extra bits on block, should be 0", b.remain())
+	}
 	if b.bitsRead > 64 {
 		return io.ErrUnexpectedEOF
 	}
diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
index 303ae90f944..1952f175b0d 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@@ -5,8 +5,6 @@
 
 package zstd
 
-import "fmt"
-
 // bitWriter will write bits.
 // First bit will be LSB of the first byte of output.
 type bitWriter struct {
@@ -38,7 +36,7 @@ func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
 	b.nBits += bits
 }
 
-// addBits32NC will add up to 32 bits.
+// addBits32NC will add up to 31 bits.
 // It will not check if there is space for them,
 // so the caller must ensure that it has flushed recently.
 func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
@@ -46,6 +44,26 @@ func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
 	b.nBits += bits
 }
 
+// addBits64NC will add up to 64 bits.
+// There must be space for 32 bits.
+func (b *bitWriter) addBits64NC(value uint64, bits uint8) {
+	if bits <= 31 {
+		b.addBits32Clean(uint32(value), bits)
+		return
+	}
+	b.addBits32Clean(uint32(value), 32)
+	b.flush32()
+	b.addBits32Clean(uint32(value>>32), bits-32)
+}
+
+// addBits32Clean will add up to 32 bits.
+// It will not check if there is space for them.
+// The input must not contain more bits than specified.
+func (b *bitWriter) addBits32Clean(value uint32, bits uint8) {
+	b.bitContainer |= uint64(value) << (b.nBits & 63)
+	b.nBits += bits
+}
+
 // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
 // It will not check if there is space for them, so the caller must ensure that it has flushed recently.
 func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@@ -53,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
 	b.nBits += bits
 }
 
-// flush will flush all pending full bytes.
-// There will be at least 56 bits available for writing when this has been called.
-// Using flush32 is faster, but leaves less space for writing.
-func (b *bitWriter) flush() {
-	v := b.nBits >> 3
-	switch v {
-	case 0:
-	case 1:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-		)
-	case 2:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-		)
-	case 3:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-		)
-	case 4:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-		)
-	case 5:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-		)
-	case 6:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-			byte(b.bitContainer>>40),
-		)
-	case 7:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-			byte(b.bitContainer>>40),
-			byte(b.bitContainer>>48),
-		)
-	case 8:
-		b.out = append(b.out,
-			byte(b.bitContainer),
-			byte(b.bitContainer>>8),
-			byte(b.bitContainer>>16),
-			byte(b.bitContainer>>24),
-			byte(b.bitContainer>>32),
-			byte(b.bitContainer>>40),
-			byte(b.bitContainer>>48),
-			byte(b.bitContainer>>56),
-		)
-	default:
-		panic(fmt.Errorf("bits (%d) > 64", b.nBits))
-	}
-	b.bitContainer >>= v << 3
-	b.nBits &= 7
-}
-
 // flush32 will flush out, so there are at least 32 bits available for writing.
 func (b *bitWriter) flush32() {
 	if b.nBits < 32 {
@@ -153,12 +97,11 @@ func (b *bitWriter) flushAlign() {
 
 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
 }
 
 // reset and continue writing by appending to out.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 8a98c4562e0..9f17ce601ff 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -5,9 +5,14 @@
 package zstd
 
 import (
+	"bytes"
+	"encoding/binary"
 	"errors"
 	"fmt"
+	"hash/crc32"
 	"io"
+	"os"
+	"path/filepath"
 	"sync"
 
 	"github.com/klauspost/compress/huff0"
@@ -38,14 +43,14 @@ const (
 	// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
 	maxCompressedBlockSize = 128 << 10
 
+	compressedBlockOverAlloc    = 16
+	maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
+
 	// Maximum possible block size (all Raw+Uncompressed).
 	maxBlockSize = (1 << 21) - 1
 
-	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
-	maxCompressedLiteralSize = 1 << 18
-	maxRLELiteralSize        = 1 << 20
-	maxMatchLen              = 131074
-	maxSequences             = 0x7f00 + 0xffff
+	maxMatchLen  = 131074
+	maxSequences = 0x7f00 + 0xffff
 
 	// We support slightly less than the reference decoder to be able to
 	// use ints on 32 bit archs.
@@ -76,20 +81,28 @@ type blockDec struct {
 	// Window size of the block.
 	WindowSize uint64
 
-	history     chan *history
-	input       chan struct{}
-	result      chan decodeOutput
-	sequenceBuf []seq
-	err         error
-	decWG       sync.WaitGroup
+	err error
+
+	// Check against this crc, if hasCRC is true.
+	checkCRC uint32
+	hasCRC   bool
 
 	// Frame to use for singlethreaded decoding.
 	// Should not be used by the decoder itself since parent may be another frame.
 	localFrame *frameDec
 
+	sequence []seqVals
+
+	async struct {
+		newHist  *history
+		literals []byte
+		seqData  []byte
+		seqSize  int // Size of uncompressed sequences
+		fcs      uint64
+	}
+
 	// Block is RLE, this is the size.
 	RLESize uint32
-	tmp     [4]byte
 
 	Type blockType
 
@@ -109,13 +122,8 @@ func (b *blockDec) String() string {
 
 func newBlockDec(lowMem bool) *blockDec {
 	b := blockDec{
-		lowMem:  lowMem,
-		result:  make(chan decodeOutput, 1),
-		input:   make(chan struct{}, 1),
-		history: make(chan *history, 1),
+		lowMem: lowMem,
 	}
-	b.decWG.Add(1)
-	go b.startDecoder()
 	return &b
 }
 
@@ -133,11 +141,17 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 	b.Type = blockType((bh >> 1) & 3)
 	// find size.
 	cSize := int(bh >> 3)
-	maxSize := maxBlockSize
+	maxSize := maxCompressedBlockSizeAlloc
 	switch b.Type {
 	case blockTypeReserved:
 		return ErrReservedBlockType
 	case blockTypeRLE:
+		if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
+			if debugDecoder {
+				printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
+			}
+			return ErrWindowSizeExceeded
+		}
 		b.RLESize = uint32(cSize)
 		if b.lowMem {
 			maxSize = cSize
@@ -148,9 +162,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 			println("Data size on stream:", cSize)
 		}
 		b.RLESize = 0
-		maxSize = maxCompressedBlockSize
+		maxSize = maxCompressedBlockSizeAlloc
 		if windowSize < maxCompressedBlockSize && b.lowMem {
-			maxSize = int(windowSize)
+			maxSize = int(windowSize) + compressedBlockOverAlloc
 		}
 		if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
 			if debugDecoder {
@@ -158,7 +172,19 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 			}
 			return ErrCompressedSizeTooBig
 		}
+		// Empty compressed blocks must at least be 2 bytes
+		// for Literals_Block_Type and one for Sequences_Section_Header.
+		if cSize < 2 {
+			return ErrBlockTooSmall
+		}
 	case blockTypeRaw:
+		if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
+			if debugDecoder {
+				printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
+			}
+			return ErrWindowSizeExceeded
+		}
+
 		b.RLESize = 0
 		// We do not need a destination for raw blocks.
 		maxSize = -1
@@ -167,16 +193,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 	}
 
 	// Read block data.
-	if cap(b.dataStorage) < cSize {
+	if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
+		// byteBuf doesn't need a destination buffer.
 		if b.lowMem || cSize > maxCompressedBlockSize {
-			b.dataStorage = make([]byte, 0, cSize)
+			b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
 		} else {
-			b.dataStorage = make([]byte, 0, maxCompressedBlockSize)
+			b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
 		}
 	}
-	if cap(b.dst) <= maxSize {
-		b.dst = make([]byte, 0, maxSize+1)
-	}
 	b.data, err = br.readBig(cSize, b.dataStorage)
 	if err != nil {
 		if debugDecoder {
@@ -185,6 +209,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 		}
 		return err
 	}
+	if cap(b.dst) <= maxSize {
+		b.dst = make([]byte, 0, maxSize+1)
+	}
 	return nil
 }
 
@@ -193,85 +220,14 @@ func (b *blockDec) sendErr(err error) {
 	b.Last = true
 	b.Type = blockTypeReserved
 	b.err = err
-	b.input <- struct{}{}
 }
 
 // Close will release resources.
 // Closed blockDec cannot be reset.
 func (b *blockDec) Close() {
-	close(b.input)
-	close(b.history)
-	close(b.result)
-	b.decWG.Wait()
 }
 
-// decodeAsync will prepare decoding the block when it receives input.
-// This will separate output and history.
-func (b *blockDec) startDecoder() {
-	defer b.decWG.Done()
-	for range b.input {
-		//println("blockDec: Got block input")
-		switch b.Type {
-		case blockTypeRLE:
-			if cap(b.dst) < int(b.RLESize) {
-				if b.lowMem {
-					b.dst = make([]byte, b.RLESize)
-				} else {
-					b.dst = make([]byte, maxBlockSize)
-				}
-			}
-			o := decodeOutput{
-				d:   b,
-				b:   b.dst[:b.RLESize],
-				err: nil,
-			}
-			v := b.data[0]
-			for i := range o.b {
-				o.b[i] = v
-			}
-			hist := <-b.history
-			hist.append(o.b)
-			b.result <- o
-		case blockTypeRaw:
-			o := decodeOutput{
-				d:   b,
-				b:   b.data,
-				err: nil,
-			}
-			hist := <-b.history
-			hist.append(o.b)
-			b.result <- o
-		case blockTypeCompressed:
-			b.dst = b.dst[:0]
-			err := b.decodeCompressed(nil)
-			o := decodeOutput{
-				d:   b,
-				b:   b.dst,
-				err: err,
-			}
-			if debugDecoder {
-				println("Decompressed to", len(b.dst), "bytes, error:", err)
-			}
-			b.result <- o
-		case blockTypeReserved:
-			// Used for returning errors.
-			<-b.history
-			b.result <- decodeOutput{
-				d:   b,
-				b:   nil,
-				err: b.err,
-			}
-		default:
-			panic("Invalid block type")
-		}
-		if debugDecoder {
-			println("blockDec: Finished block")
-		}
-	}
-}
-
-// decodeAsync will prepare decoding the block when it receives the history.
-// If history is provided, it will not fetch it from the channel.
+// decodeBuf
 func (b *blockDec) decodeBuf(hist *history) error {
 	switch b.Type {
 	case blockTypeRLE:
@@ -279,7 +235,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
 			if b.lowMem {
 				b.dst = make([]byte, b.RLESize)
 			} else {
-				b.dst = make([]byte, maxBlockSize)
+				b.dst = make([]byte, maxCompressedBlockSize)
 			}
 		}
 		b.dst = b.dst[:b.RLESize]
@@ -294,14 +250,23 @@ func (b *blockDec) decodeBuf(hist *history) error {
 		return nil
 	case blockTypeCompressed:
 		saved := b.dst
-		b.dst = hist.b
-		hist.b = nil
+		// Append directly to history
+		if hist.ignoreBuffer == 0 {
+			b.dst = hist.b
+			hist.b = nil
+		} else {
+			b.dst = b.dst[:0]
+		}
 		err := b.decodeCompressed(hist)
 		if debugDecoder {
 			println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
 		}
-		hist.b = b.dst
-		b.dst = saved
+		if hist.ignoreBuffer == 0 {
+			hist.b = b.dst
+			b.dst = saved
+		} else {
+			hist.appendKeep(b.dst)
+		}
 		return err
 	case blockTypeReserved:
 		// Used for returning errors.
@@ -311,30 +276,18 @@ func (b *blockDec) decodeBuf(hist *history) error {
 	}
 }
 
-// decodeCompressed will start decompressing a block.
-// If no history is supplied the decoder will decodeAsync as much as possible
-// before fetching from blockDec.history
-func (b *blockDec) decodeCompressed(hist *history) error {
-	in := b.data
-	delayedHistory := hist == nil
-
-	if delayedHistory {
-		// We must always grab history.
-		defer func() {
-			if hist == nil {
-				<-b.history
-			}
-		}()
-	}
+func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) {
 	// There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
 	if len(in) < 2 {
-		return ErrBlockTooSmall
+		return in, ErrBlockTooSmall
 	}
+
 	litType := literalsBlockType(in[0] & 3)
 	var litRegenSize int
 	var litCompSize int
 	sizeFormat := (in[0] >> 2) & 3
 	var fourStreams bool
+	var literals []byte
 	switch litType {
 	case literalsBlockRaw, literalsBlockRLE:
 		switch sizeFormat {
@@ -350,7 +303,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			//  Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
 			if len(in) < 3 {
 				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
-				return ErrBlockTooSmall
+				return in, ErrBlockTooSmall
 			}
 			litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
 			in = in[3:]
@@ -361,7 +314,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			// Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
 			if len(in) < 3 {
 				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
-				return ErrBlockTooSmall
+				return in, ErrBlockTooSmall
 			}
 			n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
 			litRegenSize = int(n & 1023)
@@ -372,7 +325,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			fourStreams = true
 			if len(in) < 4 {
 				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
-				return ErrBlockTooSmall
+				return in, ErrBlockTooSmall
 			}
 			n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
 			litRegenSize = int(n & 16383)
@@ -382,7 +335,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			fourStreams = true
 			if len(in) < 5 {
 				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
-				return ErrBlockTooSmall
+				return in, ErrBlockTooSmall
 			}
 			n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
 			litRegenSize = int(n & 262143)
@@ -393,13 +346,15 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 	if debugDecoder {
 		println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
 	}
-	var literals []byte
-	var huff *huff0.Scratch
+	if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize {
+		return in, ErrWindowSizeExceeded
+	}
+
 	switch litType {
 	case literalsBlockRaw:
 		if len(in) < litRegenSize {
 			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
-			return ErrBlockTooSmall
+			return in, ErrBlockTooSmall
 		}
 		literals = in[:litRegenSize]
 		in = in[litRegenSize:]
@@ -407,19 +362,13 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 	case literalsBlockRLE:
 		if len(in) < 1 {
 			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
-			return ErrBlockTooSmall
+			return in, ErrBlockTooSmall
 		}
 		if cap(b.literalBuf) < litRegenSize {
 			if b.lowMem {
-				b.literalBuf = make([]byte, litRegenSize)
+				b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
 			} else {
-				if litRegenSize > maxCompressedLiteralSize {
-					// Exceptional
-					b.literalBuf = make([]byte, litRegenSize)
-				} else {
-					b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
-
-				}
+				b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
 			}
 		}
 		literals = b.literalBuf[:litRegenSize]
@@ -434,7 +383,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 	case literalsBlockTreeless:
 		if len(in) < litCompSize {
 			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
-			return ErrBlockTooSmall
+			return in, ErrBlockTooSmall
 		}
 		// Store compressed literals, so we defer decoding until we get history.
 		literals = in[:litCompSize]
@@ -442,31 +391,68 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		if debugDecoder {
 			printf("Found %d compressed literals\n", litCompSize)
 		}
+		huff := hist.huffTree
+		if huff == nil {
+			return in, errors.New("literal block was treeless, but no history was defined")
+		}
+		// Ensure we have space to store it.
+		if cap(b.literalBuf) < litRegenSize {
+			if b.lowMem {
+				b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
+			} else {
+				b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
+			}
+		}
+		var err error
+		// Use our out buffer.
+		huff.MaxDecodedSize = litRegenSize
+		if fourStreams {
+			literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
+		} else {
+			literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
+		}
+		// Make sure we don't leak our literals buffer
+		if err != nil {
+			println("decompressing literals:", err)
+			return in, err
+		}
+		if len(literals) != litRegenSize {
+			return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
+		}
+
 	case literalsBlockCompressed:
 		if len(in) < litCompSize {
 			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
-			return ErrBlockTooSmall
+			return in, ErrBlockTooSmall
 		}
 		literals = in[:litCompSize]
 		in = in[litCompSize:]
-		huff = huffDecoderPool.Get().(*huff0.Scratch)
-		var err error
 		// Ensure we have space to store it.
 		if cap(b.literalBuf) < litRegenSize {
 			if b.lowMem {
-				b.literalBuf = make([]byte, 0, litRegenSize)
+				b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
 			} else {
-				b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
+				b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
 			}
 		}
-		if huff == nil {
-			huff = &huff0.Scratch{}
+		huff := hist.huffTree
+		if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) {
+			huff = huffDecoderPool.Get().(*huff0.Scratch)
+			if huff == nil {
+				huff = &huff0.Scratch{}
+			}
+		}
+		var err error
+		if debugDecoder {
+			println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
 		}
 		huff, literals, err = huff0.ReadTable(literals, huff)
 		if err != nil {
 			println("reading huffman table:", err)
-			return err
+			return in, err
 		}
+		hist.huffTree = huff
+		huff.MaxDecodedSize = litRegenSize
 		// Use our out buffer.
 		if fourStreams {
 			literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
@@ -475,27 +461,63 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		}
 		if err != nil {
 			println("decoding compressed literals:", err)
-			return err
+			return in, err
 		}
 		// Make sure we don't leak our literals buffer
 		if len(literals) != litRegenSize {
-			return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
+			return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
 		}
+		// Re-cap to get extra size.
+		literals = b.literalBuf[:len(literals)]
 		if debugDecoder {
 			printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
 		}
 	}
+	hist.decoders.literals = literals
+	return in, nil
+}
+
+// decodeCompressed will start decompressing a block.
+func (b *blockDec) decodeCompressed(hist *history) error {
+	in := b.data
+	in, err := b.decodeLiterals(in, hist)
+	if err != nil {
+		return err
+	}
+	err = b.prepareSequences(in, hist)
+	if err != nil {
+		return err
+	}
+	if hist.decoders.nSeqs == 0 {
+		b.dst = append(b.dst, hist.decoders.literals...)
+		return nil
+	}
+	before := len(hist.decoders.out)
+	err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
+	if err != nil {
+		return err
+	}
+	if hist.decoders.maxSyncLen > 0 {
+		hist.decoders.maxSyncLen += uint64(before)
+		hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
+	}
+	b.dst = hist.decoders.out
+	hist.recentOffsets = hist.decoders.prevOffset
+	return nil
+}
 
+func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
+	if debugDecoder {
+		printf("prepareSequences: %d byte(s) input\n", len(in))
+	}
 	// Decode Sequences
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
 	if len(in) < 1 {
 		return ErrBlockTooSmall
 	}
+	var nSeqs int
 	seqHeader := in[0]
-	nSeqs := 0
 	switch {
-	case seqHeader == 0:
-		in = in[1:]
 	case seqHeader < 128:
 		nSeqs = int(seqHeader)
 		in = in[1:]
@@ -512,19 +534,16 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
 		in = in[3:]
 	}
-	// Allocate sequences
-	if cap(b.sequenceBuf) < nSeqs {
-		if b.lowMem {
-			b.sequenceBuf = make([]seq, nSeqs)
-		} else {
-			// Allocate max
-			b.sequenceBuf = make([]seq, nSeqs, maxSequences)
+	if nSeqs == 0 && len(in) != 0 {
+		// When no sequences, there should not be any more data...
+		if debugDecoder {
+			printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
 		}
-	} else {
-		// Reuse buffer
-		b.sequenceBuf = b.sequenceBuf[:nSeqs]
+		return ErrUnexpectedBlockSize
 	}
-	var seqs = &sequenceDecs{}
+
+	var seqs = &hist.decoders
+	seqs.nSeqs = nSeqs
 	if nSeqs > 0 {
 		if len(in) < 1 {
 			return ErrBlockTooSmall
@@ -553,6 +572,9 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			}
 			switch mode {
 			case compModePredefined:
+				if seq.fse != nil && !seq.fse.preDefined {
+					fseDecoderPool.Put(seq.fse)
+				}
 				seq.fse = &fsePredef[i]
 			case compModeRLE:
 				if br.remain() < 1 {
@@ -560,34 +582,36 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 				}
 				v := br.Uint8()
 				br.advance(1)
-				dec := fseDecoderPool.Get().(*fseDecoder)
+				if seq.fse == nil || seq.fse.preDefined {
+					seq.fse = fseDecoderPool.Get().(*fseDecoder)
+				}
 				symb, err := decSymbolValue(v, symbolTableX[i])
 				if err != nil {
 					printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
 					return err
 				}
-				dec.setRLE(symb)
-				seq.fse = dec
+				seq.fse.setRLE(symb)
 				if debugDecoder {
-					printf("RLE set to %+v, code: %v", symb, v)
+					printf("RLE set to 0x%x, code: %v", symb, v)
 				}
 			case compModeFSE:
 				println("Reading table for", tableIndex(i))
-				dec := fseDecoderPool.Get().(*fseDecoder)
-				err := dec.readNCount(&br, uint16(maxTableSymbol[i]))
+				if seq.fse == nil || seq.fse.preDefined {
+					seq.fse = fseDecoderPool.Get().(*fseDecoder)
+				}
+				err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i]))
 				if err != nil {
 					println("Read table error:", err)
 					return err
 				}
-				err = dec.transform(symbolTableX[i])
+				err = seq.fse.transform(symbolTableX[i])
 				if err != nil {
 					println("Transform table error:", err)
 					return err
 				}
 				if debugDecoder {
-					println("Read table ok", "symbolLen:", dec.symbolLen)
+					println("Read table ok", "symbolLen:", seq.fse.symbolLen)
 				}
-				seq.fse = dec
 			case compModeRepeat:
 				seq.repeat = true
 			}
@@ -597,140 +621,106 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		}
 		in = br.unread()
 	}
-
-	// Wait for history.
-	// All time spent after this is critical since it is strictly sequential.
-	if hist == nil {
-		hist = <-b.history
-		if hist.error {
-			return ErrDecoderClosed
-		}
-	}
-
-	// Decode treeless literal block.
-	if litType == literalsBlockTreeless {
-		// TODO: We could send the history early WITHOUT the stream history.
-		//   This would allow decoding treeless literals before the byte history is available.
-		//   Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
-		//   So not much obvious gain here.
-
-		if hist.huffTree == nil {
-			return errors.New("literal block was treeless, but no history was defined")
-		}
-		// Ensure we have space to store it.
-		if cap(b.literalBuf) < litRegenSize {
-			if b.lowMem {
-				b.literalBuf = make([]byte, 0, litRegenSize)
-			} else {
-				b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
-			}
-		}
-		var err error
-		// Use our out buffer.
-		huff = hist.huffTree
-		if fourStreams {
-			literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
-		} else {
-			literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
-		}
-		// Make sure we don't leak our literals buffer
-		if err != nil {
-			println("decompressing literals:", err)
-			return err
-		}
-		if len(literals) != litRegenSize {
-			return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
-		}
-	} else {
-		if hist.huffTree != nil && huff != nil {
-			if hist.dict == nil || hist.dict.litEnc != hist.huffTree {
-				huffDecoderPool.Put(hist.huffTree)
-			}
-			hist.huffTree = nil
-		}
-	}
-	if huff != nil {
-		hist.huffTree = huff
-	}
 	if debugDecoder {
-		println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
+		println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.")
 	}
 
 	if nSeqs == 0 {
-		// Decompressed content is defined entirely as Literals Section content.
-		b.dst = append(b.dst, literals...)
-		if delayedHistory {
-			hist.append(literals)
+		if len(b.sequence) > 0 {
+			b.sequence = b.sequence[:0]
 		}
 		return nil
 	}
+	br := seqs.br
+	if br == nil {
+		br = &bitReader{}
+	}
+	if err := br.init(in); err != nil {
+		return err
+	}
 
-	seqs, err := seqs.mergeHistory(&hist.decoders)
-	if err != nil {
+	if err := seqs.initialize(br, hist, b.dst); err != nil {
+		println("initializing sequences:", err)
 		return err
 	}
-	if debugDecoder {
-		println("History merged ok")
+	// Extract blocks...
+	if false && hist.dict == nil {
+		fatalErr := func(err error) {
+			if err != nil {
+				panic(err)
+			}
+		}
+		fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
+		var buf bytes.Buffer
+		fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
+		fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
+		fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
+		buf.Write(in)
+		os.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
 	}
-	br := &bitReader{}
-	if err := br.init(in); err != nil {
-		return err
+
+	return nil
+}
+
+func (b *blockDec) decodeSequences(hist *history) error {
+	if cap(b.sequence) < hist.decoders.nSeqs {
+		if b.lowMem {
+			b.sequence = make([]seqVals, 0, hist.decoders.nSeqs)
+		} else {
+			b.sequence = make([]seqVals, 0, 0x7F00+0xffff)
+		}
+	}
+	b.sequence = b.sequence[:hist.decoders.nSeqs]
+	if hist.decoders.nSeqs == 0 {
+		hist.decoders.seqSize = len(hist.decoders.literals)
+		return nil
 	}
+	hist.decoders.windowSize = hist.windowSize
+	hist.decoders.prevOffset = hist.recentOffsets
 
-	// TODO: Investigate if sending history without decoders are faster.
-	//   This would allow the sequences to be decoded async and only have to construct stream history.
-	//   If only recent offsets were not transferred, this would be an obvious win.
-	// 	 Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded.
+	err := hist.decoders.decode(b.sequence)
+	hist.recentOffsets = hist.decoders.prevOffset
+	return err
+}
 
+func (b *blockDec) executeSequences(hist *history) error {
 	hbytes := hist.b
 	if len(hbytes) > hist.windowSize {
 		hbytes = hbytes[len(hbytes)-hist.windowSize:]
-		// We do not need history any more.
+		// We do not need history anymore.
 		if hist.dict != nil {
 			hist.dict.content = nil
 		}
 	}
-
-	if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
-		println("initializing sequences:", err)
-		return err
-	}
-
-	err = seqs.decode(nSeqs, br, hbytes)
+	hist.decoders.windowSize = hist.windowSize
+	hist.decoders.out = b.dst[:0]
+	err := hist.decoders.execute(b.sequence, hbytes)
 	if err != nil {
 		return err
 	}
-	if !br.finished() {
-		return fmt.Errorf("%d extra bits on block, should be 0", br.remain())
-	}
+	return b.updateHistory(hist)
+}
 
-	err = br.close()
-	if err != nil {
-		printf("Closing sequences: %v, %+v\n", err, *br)
-	}
+func (b *blockDec) updateHistory(hist *history) error {
 	if len(b.data) > maxCompressedBlockSize {
 		return fmt.Errorf("compressed block size too large (%d)", len(b.data))
 	}
 	// Set output and release references.
-	b.dst = seqs.out
-	seqs.out, seqs.literals, seqs.hist = nil, nil, nil
+	b.dst = hist.decoders.out
+	hist.recentOffsets = hist.decoders.prevOffset
 
-	if !delayedHistory {
-		// If we don't have delayed history, no need to update.
-		hist.recentOffsets = seqs.prevOffset
-		return nil
-	}
 	if b.Last {
 		// if last block we don't care about history.
 		println("Last block, no history returned")
 		hist.b = hist.b[:0]
 		return nil
+	} else {
+		hist.append(b.dst)
+		if debugDecoder {
+			println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b))
+		}
 	}
-	hist.append(b.dst)
-	hist.recentOffsets = seqs.prevOffset
-	if debugDecoder {
-		println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
-	}
+	hist.decoders.out, hist.decoders.literals = nil, nil
 
 	return nil
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index 3df185ee465..2cfe925ade5 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -51,7 +51,7 @@ func (b *blockEnc) init() {
 		if cap(b.literals) < maxCompressedBlockSize {
 			b.literals = make([]byte, 0, maxCompressedBlockSize)
 		}
-		const defSeqs = 200
+		const defSeqs = 2000
 		if cap(b.sequences) < defSeqs {
 			b.sequences = make([]seq, 0, defSeqs)
 		}
@@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	if len(lits) >= 1024 {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
-	} else if len(lits) > 32 {
+	} else if len(lits) > 16 {
 		// Use 1 stream
 		single = true
 		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
 	} else {
 		err = huff0.ErrIncompressible
 	}
-
+	if err == nil && len(out)+5 > len(lits) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSizes(len(out), len(lits), single)
+		if len(out)+lh.size() >= len(lits) {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	case huff0.ErrIncompressible:
 		if debugEncoder {
@@ -426,7 +433,7 @@ func fuzzFseEncoder(data []byte) int {
 		return 0
 	}
 	enc := fseEncoder{}
-	hist := enc.Histogram()[:256]
+	hist := enc.Histogram()
 	maxSym := uint8(0)
 	for i, v := range data {
 		v = v & 63
@@ -473,7 +480,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		return b.encodeLits(b.literals, rawAllLits)
 	}
 	// We want some difference to at least account for the headers.
-	saved := b.size - len(b.literals) - (b.size >> 5)
+	saved := b.size - len(b.literals) - (b.size >> 6)
 	if saved < 16 {
 		if org == nil {
 			return errIncompressible
@@ -503,7 +510,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if len(b.literals) >= 1024 && !raw {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
-	} else if len(b.literals) > 32 && !raw {
+	} else if len(b.literals) > 16 && !raw {
 		// Use 1 stream
 		single = true
 		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@@ -511,6 +518,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		err = huff0.ErrIncompressible
 	}
 
+	if err == nil && len(out)+5 > len(b.literals) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSize(len(b.literals))
+		szRaw := lh.size()
+		lh.setSizes(len(out), len(b.literals), single)
+		szComp := lh.size()
+		if len(out)+szComp >= len(b.literals)+szRaw {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	case huff0.ErrIncompressible:
 		lh.setType(literalsBlockRaw)
@@ -722,66 +740,67 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
 	}
 	seq--
-	if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
-		// No need to flush (common)
-		for seq >= 0 {
-			s = b.sequences[seq]
-			wr.flush32()
-			llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
-			// tabelog max is 8 for all.
-			of.encode(ofB)
-			ml.encode(mlB)
-			ll.encode(llB)
-			wr.flush32()
-
-			// We checked that all can stay within 32 bits
-			wr.addBits32NC(s.litLen, llB.outBits)
-			wr.addBits32NC(s.matchLen, mlB.outBits)
-			wr.addBits32NC(s.offset, ofB.outBits)
-
-			if debugSequences {
-				println("Encoded seq", seq, s)
-			}
-
-			seq--
-		}
-	} else {
-		for seq >= 0 {
-			s = b.sequences[seq]
-			wr.flush32()
-			llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
-			// tabelog max is below 8 for each.
-			of.encode(ofB)
-			ml.encode(mlB)
-			ll.encode(llB)
-			wr.flush32()
-
-			// ml+ll = max 32 bits total
-			wr.addBits32NC(s.litLen, llB.outBits)
-			wr.addBits32NC(s.matchLen, mlB.outBits)
-			wr.flush32()
-			wr.addBits32NC(s.offset, ofB.outBits)
-
-			if debugSequences {
-				println("Encoded seq", seq, s)
-			}
-
-			seq--
-		}
+	// Store sequences in reverse...
+	for seq >= 0 {
+		s = b.sequences[seq]
+
+		ofB := ofTT[s.ofCode]
+		wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
+		//of.encode(ofB)
+		nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
+		dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
+		wr.addBits16NC(of.state, uint8(nbBitsOut))
+		of.state = of.stateTable[dstState]
+
+		// Accumulate extra bits.
+		outBits := ofB.outBits & 31
+		extraBits := uint64(s.offset & bitMask32[outBits])
+		extraBitsN := outBits
+
+		mlB := mlTT[s.mlCode]
+		//ml.encode(mlB)
+		nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
+		dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
+		wr.addBits16NC(ml.state, uint8(nbBitsOut))
+		ml.state = ml.stateTable[dstState]
+
+		outBits = mlB.outBits & 31
+		extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
+		extraBitsN += outBits
+
+		llB := llTT[s.llCode]
+		//ll.encode(llB)
+		nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
+		dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
+		wr.addBits16NC(ll.state, uint8(nbBitsOut))
+		ll.state = ll.stateTable[dstState]
+
+		outBits = llB.outBits & 31
+		extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
+		extraBitsN += outBits
+
+		wr.flush32()
+		wr.addBits64NC(extraBits, extraBitsN)
+
+		if debugSequences {
+			println("Encoded seq", seq, s)
+		}
+
+		seq--
 	}
 	ml.flush(mlEnc.actualTableLog)
 	of.flush(ofEnc.actualTableLog)
 	ll.flush(llEnc.actualTableLog)
-	err = wr.close()
-	if err != nil {
-		return err
-	}
+	wr.close()
 	b.output = wr.out
 
+	// Maybe even add a bigger margin.
 	if len(b.output)-3-bhOffset >= b.size {
-		// Maybe even add a bigger margin.
+		// Discard and encode as raw block.
+		b.output = b.encodeRawTo(b.output[:bhOffset], org)
+		b.popOffsets()
 		b.litEnc.Reuse = huff0.ReusePolicyNone
-		return errIncompressible
+		return nil
 	}
 
 	// Size is output minus block header.
@@ -801,14 +820,13 @@ func (b *blockEnc) genCodes() {
 		// nothing to do
 		return
 	}
-
 	if len(b.sequences) > math.MaxUint16 {
 		panic("can only encode up to 64K sequences")
 	}
 	// No bounds checks after here:
-	llH := b.coders.llEnc.Histogram()[:256]
-	ofH := b.coders.ofEnc.Histogram()[:256]
-	mlH := b.coders.mlEnc.Histogram()[:256]
+	llH := b.coders.llEnc.Histogram()
+	ofH := b.coders.ofEnc.Histogram()
+	mlH := b.coders.mlEnc.Histogram()
 	for i := range llH {
 		llH[i] = 0
 	}
@@ -820,7 +838,8 @@ func (b *blockEnc) genCodes() {
 	}
 
 	var llMax, ofMax, mlMax uint8
-	for i, seq := range b.sequences {
+	for i := range b.sequences {
+		seq := &b.sequences[i]
 		v := llCode(seq.litLen)
 		seq.llCode = v
 		llH[v]++
@@ -844,7 +863,6 @@ func (b *blockEnc) genCodes() {
 				panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
 			}
 		}
-		b.sequences[i] = seq
 	}
 	maxCount := func(a []uint32) int {
 		var max uint32
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index aab71c6cf85..55a388553df 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -7,7 +7,6 @@ package zstd
 import (
 	"fmt"
 	"io"
-	"io/ioutil"
 )
 
 type byteBuffer interface {
@@ -23,7 +22,7 @@ type byteBuffer interface {
 	readByte() (byte, error)
 
 	// Skip n bytes.
-	skipN(n int) error
+	skipN(n int64) error
 }
 
 // in-memory buffer
@@ -52,23 +51,22 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
 	return r, nil
 }
 
-func (b *byteBuf) remain() []byte {
-	return *b
-}
-
 func (b *byteBuf) readByte() (byte, error) {
 	bb := *b
 	if len(bb) < 1 {
-		return 0, nil
+		return 0, io.ErrUnexpectedEOF
 	}
 	r := bb[0]
 	*b = bb[1:]
 	return r, nil
 }
 
-func (b *byteBuf) skipN(n int) error {
+func (b *byteBuf) skipN(n int64) error {
 	bb := *b
-	if len(bb) < n {
+	if n < 0 {
+		return fmt.Errorf("negative skip (%d) requested", n)
+	}
+	if int64(len(bb)) < n {
 		return io.ErrUnexpectedEOF
 	}
 	*b = bb[n:]
@@ -111,8 +109,11 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
 }
 
 func (r *readerWrapper) readByte() (byte, error) {
-	n2, err := r.r.Read(r.tmp[:1])
+	n2, err := io.ReadFull(r.r, r.tmp[:1])
 	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
 		return 0, err
 	}
 	if n2 != 1 {
@@ -121,9 +122,9 @@ func (r *readerWrapper) readByte() (byte, error) {
 	return r.tmp[0], nil
 }
 
-func (r *readerWrapper) skipN(n int) error {
-	n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
-	if n2 != int64(n) {
+func (r *readerWrapper) skipN(n int64) error {
+	n2, err := io.CopyN(io.Discard, r.r, n)
+	if n2 != n {
 		err = io.ErrUnexpectedEOF
 	}
 	return err
diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go
index 2c4fca17fa1..0e59a242d8d 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytereader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@@ -13,12 +13,6 @@ type byteReader struct {
 	off int
 }
 
-// init will initialize the reader and set the input.
-func (b *byteReader) init(in []byte) {
-	b.b = in
-	b.off = 0
-}
-
 // advance the stream b n bytes.
 func (b *byteReader) advance(n uint) {
 	b.off += int(n)
diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
index 69736e8d4bb..6a5a2988b6f 100644
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -4,7 +4,7 @@
 package zstd
 
 import (
-	"bytes"
+	"encoding/binary"
 	"errors"
 	"io"
 )
@@ -15,18 +15,50 @@ const HeaderMaxSize = 14 + 3
 
 // Header contains information about the first frame and block within that.
 type Header struct {
-	// Window Size the window of data to keep while decoding.
-	// Will only be set if HasFCS is false.
-	WindowSize uint64
+	// SingleSegment specifies whether the data is to be decompressed into a
+	// single contiguous memory segment.
+	// It implies that WindowSize is invalid and that FrameContentSize is valid.
+	SingleSegment bool
 
-	// Frame content size.
-	// Expected size of the entire frame.
-	FrameContentSize uint64
+	// WindowSize is the window of data to keep while decoding.
+	// Will only be set if SingleSegment is false.
+	WindowSize uint64
 
 	// Dictionary ID.
 	// If 0, no dictionary.
 	DictionaryID uint32
 
+	// HasFCS specifies whether FrameContentSize has a valid value.
+	HasFCS bool
+
+	// FrameContentSize is the expected uncompressed size of the entire frame.
+	FrameContentSize uint64
+
+	// Skippable will be true if the frame is meant to be skipped.
+	// This implies that FirstBlock.OK is false.
+	Skippable bool
+
+	// SkippableID is the user-specific ID for the skippable frame.
+	// Valid values are between 0 to 15, inclusive.
+	SkippableID int
+
+	// SkippableSize is the length of the user data to skip following
+	// the header.
+	SkippableSize uint32
+
+	// HeaderSize is the raw size of the frame header.
+	//
+	// For normal frames, it includes the size of the magic number and
+	// the size of the header (per section 3.1.1.1).
+	// It does not include the size for any data blocks (section 3.1.1.2) nor
+	// the size for the trailing content checksum.
+	//
+	// For skippable frames, this counts the size of the magic number
+	// along with the size of the size field of the payload.
+	// It does not include the size of the skippable payload itself.
+	// The total frame size is the HeaderSize plus the SkippableSize.
+	HeaderSize int
+
 	// First block information.
 	FirstBlock struct {
 		// OK will be set if first block could be decoded.
@@ -51,17 +83,9 @@ type Header struct {
 		CompressedSize int
 	}
 
-	// Skippable will be true if the frame is meant to be skipped.
-	// No other information will be populated.
-	Skippable bool
-
 	// If set there is a checksum present for the block content.
+	// The checksum field at the end is always 4 bytes long.
 	HasCheckSum bool
-
-	// If this is true FrameContentSize will have a valid value
-	HasFCS bool
-
-	SingleSegment bool
 }
 
 // Decode the header from the beginning of the stream.
@@ -71,39 +95,58 @@ type Header struct {
 // If there isn't enough input, io.ErrUnexpectedEOF is returned.
 // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
 func (h *Header) Decode(in []byte) error {
+	_, err := h.DecodeAndStrip(in)
+	return err
+}
+
+// DecodeAndStrip will decode the header from the beginning of the stream
+// and on success return the remaining bytes.
+// This will decode the frame header and the first block header if enough bytes are provided.
+// It is recommended to provide at least HeaderMaxSize bytes.
+// If the frame header cannot be read an error will be returned.
+// If there isn't enough input, io.ErrUnexpectedEOF is returned.
+// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
+func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
+	*h = Header{}
 	if len(in) < 4 {
-		return io.ErrUnexpectedEOF
+		return nil, io.ErrUnexpectedEOF
 	}
+	h.HeaderSize += 4
 	b, in := in[:4], in[4:]
-	if !bytes.Equal(b, frameMagic) {
-		if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
-			return ErrMagicMismatch
+	if string(b) != frameMagic {
+		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
+			return nil, ErrMagicMismatch
+		}
+		if len(in) < 4 {
+			return nil, io.ErrUnexpectedEOF
 		}
-		*h = Header{Skippable: true}
-		return nil
+		h.HeaderSize += 4
+		h.Skippable = true
+		h.SkippableID = int(b[0] & 0xf)
+		h.SkippableSize = binary.LittleEndian.Uint32(in)
+		return in[4:], nil
 	}
+
+	// Read Window_Descriptor
+	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
 	if len(in) < 1 {
-		return io.ErrUnexpectedEOF
+		return nil, io.ErrUnexpectedEOF
 	}
-
-	// Clear output
-	*h = Header{}
 	fhd, in := in[0], in[1:]
+	h.HeaderSize++
 	h.SingleSegment = fhd&(1<<5) != 0
 	h.HasCheckSum = fhd&(1<<2) != 0
-
 	if fhd&(1<<3) != 0 {
-		return errors.New("reserved bit set on frame header")
+		return nil, errors.New("reserved bit set on frame header")
 	}
 
-	// Read Window_Descriptor
-	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
 	if !h.SingleSegment {
 		if len(in) < 1 {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		var wd byte
 		wd, in = in[0], in[1:]
+		h.HeaderSize++
 		windowLog := 10 + (wd >> 3)
 		windowBase := uint64(1) << windowLog
 		windowAdd := (windowBase / 8) * uint64(wd&0x7)
@@ -117,13 +160,11 @@ func (h *Header) Decode(in []byte) error {
 			size = 4
 		}
 		if len(in) < int(size) {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		b, in = in[:size], in[size:]
-		if b == nil {
-			return io.ErrUnexpectedEOF
-		}
-		switch size {
+		h.HeaderSize += int(size)
+		switch len(b) {
 		case 1:
 			h.DictionaryID = uint32(b[0])
 		case 2:
@@ -149,13 +190,11 @@ func (h *Header) Decode(in []byte) error {
 	if fcsSize > 0 {
 		h.HasFCS = true
 		if len(in) < fcsSize {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		b, in = in[:fcsSize], in[fcsSize:]
-		if b == nil {
-			return io.ErrUnexpectedEOF
-		}
-		switch fcsSize {
+		h.HeaderSize += int(fcsSize)
+		switch len(b) {
 		case 1:
 			h.FrameContentSize = uint64(b[0])
 		case 2:
@@ -172,7 +211,7 @@ func (h *Header) Decode(in []byte) error {
 
 	// Frame Header done, we will not fail from now on.
 	if len(in) < 3 {
-		return nil
+		return in, nil
 	}
 	tmp := in[:3]
 	bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@@ -182,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
 	cSize := int(bh >> 3)
 	switch blockType {
 	case blockTypeReserved:
-		return nil
+		return in, nil
 	case blockTypeRLE:
 		h.FirstBlock.Compressed = true
 		h.FirstBlock.DecompressedSize = cSize
@@ -198,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
 	}
 
 	h.FirstBlock.OK = true
-	return nil
+	return in, nil
+}
+
+// AppendTo will append the encoded header to the dst slice.
+// There is no error checking performed on the header values.
+func (h *Header) AppendTo(dst []byte) ([]byte, error) {
+	if h.Skippable {
+		magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
+		magic[0] |= byte(h.SkippableID & 0xf)
+		dst = append(dst, magic[:]...)
+		f := h.SkippableSize
+		return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
+	}
+	f := frameHeader{
+		ContentSize:   h.FrameContentSize,
+		WindowSize:    uint32(h.WindowSize),
+		SingleSegment: h.SingleSegment,
+		Checksum:      h.HasCheckSum,
+		DictID:        h.DictionaryID,
+	}
+	return f.appendTo(dst), nil
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index f430f58b572..f04aaa21eb8 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -5,9 +5,12 @@
 package zstd
 
 import (
-	"errors"
+	"context"
+	"encoding/binary"
 	"io"
 	"sync"
+
+	"github.com/klauspost/compress/zstd/internal/xxhash"
 )
 
 // Decoder provides decoding of zstandard streams.
@@ -22,15 +25,22 @@ type Decoder struct {
 	// Unreferenced decoders, ready for use.
 	decoders chan *blockDec
 
-	// Streams ready to be decoded.
-	stream chan decodeStream
-
 	// Current read position used for Reader functionality.
 	current decoderState
 
+	// sync stream decoding
+	syncStream struct {
+		decodedFrame uint64
+		br           readerWrapper
+		enabled      bool
+		inFrame      bool
+		dstBuf       []byte
+	}
+
+	frame *frameDec
+
 	// Custom dictionaries.
-	// Always uses copies.
-	dicts map[uint32]dict
+	dicts map[uint32]*dict
 
 	// streamWg is the waitgroup for all streams
 	streamWg sync.WaitGroup
@@ -46,7 +56,10 @@ type decoderState struct {
 	output chan decodeOutput
 
 	// cancel remaining output.
-	cancel chan struct{}
+	cancel context.CancelFunc
+
+	// crc of current frame
+	crc *xxhash.Digest
 
 	flushed bool
 }
@@ -81,7 +94,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
 			return nil, err
 		}
 	}
-	d.current.output = make(chan decodeOutput, d.o.concurrent)
+	d.current.crc = xxhash.New()
 	d.current.flushed = true
 
 	if r == nil {
@@ -89,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
 	}
 
 	// Transfer option dicts.
-	d.dicts = make(map[uint32]dict, len(d.o.dicts))
+	d.dicts = make(map[uint32]*dict, len(d.o.dicts))
 	for _, dc := range d.o.dicts {
 		d.dicts[dc.id] = dc
 	}
@@ -130,7 +143,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
 				break
 			}
 			if !d.nextBlock(n == 0) {
-				return n, nil
+				return n, d.current.err
 			}
 		}
 	}
@@ -162,6 +175,7 @@ func (d *Decoder) Reset(r io.Reader) error {
 
 	d.drainOutput()
 
+	d.syncStream.br.r = nil
 	if r == nil {
 		d.current.err = ErrDecoderNilInput
 		if len(d.current.b) > 0 {
@@ -172,21 +186,23 @@ func (d *Decoder) Reset(r io.Reader) error {
 	}
 
 	// If bytes buffer and < 5MB, do sync decoding anyway.
-	if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
+	if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap {
 		bb2 := bb
 		if debugDecoder {
 			println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
 		}
 		b := bb2.Bytes()
 		var dst []byte
-		if cap(d.current.b) > 0 {
-			dst = d.current.b
+		if cap(d.syncStream.dstBuf) > 0 {
+			dst = d.syncStream.dstBuf[:0]
 		}
 
-		dst, err := d.DecodeAll(b, dst[:0])
+		dst, err := d.DecodeAll(b, dst)
 		if err == nil {
 			err = io.EOF
 		}
+		// Save output buffer
+		d.syncStream.dstBuf = dst
 		d.current.b = dst
 		d.current.err = err
 		d.current.flushed = true
@@ -195,33 +211,40 @@ func (d *Decoder) Reset(r io.Reader) error {
 		}
 		return nil
 	}
-
-	if d.stream == nil {
-		d.stream = make(chan decodeStream, 1)
-		d.streamWg.Add(1)
-		go d.startStreamDecoder(d.stream)
-	}
-
 	// Remove current block.
+	d.stashDecoder()
 	d.current.decodeOutput = decodeOutput{}
 	d.current.err = nil
-	d.current.cancel = make(chan struct{})
 	d.current.flushed = false
 	d.current.d = nil
+	d.syncStream.dstBuf = nil
 
-	d.stream <- decodeStream{
-		r:      r,
-		output: d.current.output,
-		cancel: d.current.cancel,
+	// Ensure no-one else is still running...
+	d.streamWg.Wait()
+	if d.frame == nil {
+		d.frame = newFrameDec(d.o)
 	}
+
+	if d.o.concurrent == 1 {
+		return d.startSyncDecoder(r)
+	}
+
+	d.current.output = make(chan decodeOutput, d.o.concurrent)
+	ctx, cancel := context.WithCancel(context.Background())
+	d.current.cancel = cancel
+	d.streamWg.Add(1)
+	go d.startStreamDecoder(ctx, r, d.current.output)
+
 	return nil
 }
 
 // drainOutput will drain the output until errEndOfStream is sent.
 func (d *Decoder) drainOutput() {
 	if d.current.cancel != nil {
-		println("cancelling current")
-		close(d.current.cancel)
+		if debugDecoder {
+			println("cancelling current")
+		}
+		d.current.cancel()
 		d.current.cancel = nil
 	}
 	if d.current.d != nil {
@@ -243,12 +266,9 @@ func (d *Decoder) drainOutput() {
 			}
 			d.decoders <- v.d
 		}
-		if v.err == errEndOfStream {
-			println("current flushed")
-			d.current.flushed = true
-			return
-		}
 	}
+	d.current.output = nil
+	d.current.flushed = true
 }
 
 // WriteTo writes data to w until there's no more data to write or when an error occurs.
@@ -287,19 +307,23 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
 // DecodeAll can be used concurrently.
 // The Decoder concurrency limits will be respected.
 func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
-	if d.current.err == ErrDecoderClosed {
+	if d.decoders == nil {
 		return dst, ErrDecoderClosed
 	}
 
 	// Grab a block decoder and frame decoder.
 	block := <-d.decoders
 	frame := block.localFrame
+	initialSize := len(dst)
 	defer func() {
 		if debugDecoder {
 			printf("re-adding decoder: %p", block)
 		}
 		frame.rawInput = nil
 		frame.bBuf = nil
+		if frame.history.decoders.br != nil {
+			frame.history.decoders.br.in = nil
+		}
 		d.decoders <- block
 	}()
 	frame.bBuf = input
@@ -307,34 +331,45 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 	for {
 		frame.history.reset()
 		err := frame.reset(&frame.bBuf)
-		if err == io.EOF {
-			if debugDecoder {
-				println("frame reset return EOF")
-			}
-			return dst, nil
-		}
-		if frame.DictionaryID != nil {
-			dict, ok := d.dicts[*frame.DictionaryID]
-			if !ok {
-				return nil, ErrUnknownDictionary
-			}
-			frame.history.setDict(&dict)
-		}
 		if err != nil {
+			if err == io.EOF {
+				if debugDecoder {
+					println("frame reset return EOF")
+				}
+				return dst, nil
+			}
 			return dst, err
 		}
-		if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
-			return dst, ErrDecoderSizeExceeded
+		if err = d.setDict(frame); err != nil {
+			return nil, err
 		}
-		if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
-			// Never preallocate moe than 1 GB up front.
+		if frame.WindowSize > d.o.maxWindowSize {
+			if debugDecoder {
+				println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize)
+			}
+			return dst, ErrWindowSizeExceeded
+		}
+		if frame.FrameContentSize != fcsUnknown {
+			if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) {
+				if debugDecoder {
+					println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst))
+				}
+				return dst, ErrDecoderSizeExceeded
+			}
+			if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) {
+				if debugDecoder {
+					println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst))
+				}
+				return dst, ErrDecoderSizeExceeded
+			}
 			if cap(dst)-len(dst) < int(frame.FrameContentSize) {
-				dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
+				dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc)
 				copy(dst2, dst)
 				dst = dst2
 			}
 		}
-		if cap(dst) == 0 {
+
+		if cap(dst) == 0 && !d.o.limitToCap {
 			// Allocate len(input) * 2 by default if nothing is provided
 			// and we didn't get frame content size.
 			size := len(input) * 2
@@ -352,6 +387,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 		if err != nil {
 			return dst, err
 		}
+		if uint64(len(dst)-initialSize) > d.o.maxDecodedSize {
+			return dst, ErrDecoderSizeExceeded
+		}
 		if len(frame.bBuf) == 0 {
 			if debugDecoder {
 				println("frame dbuf empty")
@@ -368,33 +406,167 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 // If non-blocking mode is used the returned boolean will be false
 // if no data was available without blocking.
 func (d *Decoder) nextBlock(blocking bool) (ok bool) {
-	if d.current.d != nil {
-		if debugDecoder {
-			printf("re-adding current decoder %p", d.current.d)
-		}
-		d.decoders <- d.current.d
-		d.current.d = nil
-	}
 	if d.current.err != nil {
 		// Keep error state.
-		return blocking
+		return false
 	}
+	d.current.b = d.current.b[:0]
 
+	// SYNC:
+	if d.syncStream.enabled {
+		if !blocking {
+			return false
+		}
+		ok = d.nextBlockSync()
+		if !ok {
+			d.stashDecoder()
+		}
+		return ok
+	}
+
+	//ASYNC:
+	d.stashDecoder()
 	if blocking {
-		d.current.decodeOutput = <-d.current.output
+		d.current.decodeOutput, ok = <-d.current.output
 	} else {
 		select {
-		case d.current.decodeOutput = <-d.current.output:
+		case d.current.decodeOutput, ok = <-d.current.output:
 		default:
 			return false
 		}
 	}
+	if !ok {
+		// This should not happen, so signal error state...
+		d.current.err = io.ErrUnexpectedEOF
+		return false
+	}
+	next := d.current.decodeOutput
+	if next.d != nil && next.d.async.newHist != nil {
+		d.current.crc.Reset()
+	}
 	if debugDecoder {
-		println("got", len(d.current.b), "bytes, error:", d.current.err)
+		var tmp [4]byte
+		binary.LittleEndian.PutUint32(tmp[:], uint32(xxhash.Sum64(next.b)))
+		println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
+	}
+
+	if d.o.ignoreChecksum {
+		return true
+	}
+
+	if len(next.b) > 0 {
+		d.current.crc.Write(next.b)
+	}
+	if next.err == nil && next.d != nil && next.d.hasCRC {
+		got := uint32(d.current.crc.Sum64())
+		if got != next.d.checkCRC {
+			if debugDecoder {
+				printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
+			}
+			d.current.err = ErrCRCMismatch
+		} else {
+			if debugDecoder {
+				printf("CRC ok %08x\n", got)
+			}
+		}
+	}
+
+	return true
+}
+
+func (d *Decoder) nextBlockSync() (ok bool) {
+	if d.current.d == nil {
+		d.current.d = <-d.decoders
+	}
+	for len(d.current.b) == 0 {
+		if !d.syncStream.inFrame {
+			d.frame.history.reset()
+			d.current.err = d.frame.reset(&d.syncStream.br)
+			if d.current.err == nil {
+				d.current.err = d.setDict(d.frame)
+			}
+			if d.current.err != nil {
+				return false
+			}
+			if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
+				d.current.err = ErrDecoderSizeExceeded
+				return false
+			}
+
+			d.syncStream.decodedFrame = 0
+			d.syncStream.inFrame = true
+		}
+		d.current.err = d.frame.next(d.current.d)
+		if d.current.err != nil {
+			return false
+		}
+		d.frame.history.ensureBlock()
+		if debugDecoder {
+			println("History trimmed:", len(d.frame.history.b), "decoded already:", d.syncStream.decodedFrame)
+		}
+		histBefore := len(d.frame.history.b)
+		d.current.err = d.current.d.decodeBuf(&d.frame.history)
+
+		if d.current.err != nil {
+			println("error after:", d.current.err)
+			return false
+		}
+		d.current.b = d.frame.history.b[histBefore:]
+		if debugDecoder {
+			println("history after:", len(d.frame.history.b))
+		}
+
+		// Check frame size (before CRC)
+		d.syncStream.decodedFrame += uint64(len(d.current.b))
+		if d.syncStream.decodedFrame > d.frame.FrameContentSize {
+			if debugDecoder {
+				printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
+			}
+			d.current.err = ErrFrameSizeExceeded
+			return false
+		}
+
+		// Check FCS
+		if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize {
+			if debugDecoder {
+				printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
+			}
+			d.current.err = ErrFrameSizeMismatch
+			return false
+		}
+
+		// Update/Check CRC
+		if d.frame.HasCheckSum {
+			if !d.o.ignoreChecksum {
+				d.frame.crc.Write(d.current.b)
+			}
+			if d.current.d.Last {
+				if !d.o.ignoreChecksum {
+					d.current.err = d.frame.checkCRC()
+				} else {
+					d.current.err = d.frame.consumeCRC()
+				}
+				if d.current.err != nil {
+					println("CRC error:", d.current.err)
+					return false
+				}
+			}
+		}
+		d.syncStream.inFrame = !d.current.d.Last
 	}
 	return true
 }
 
+func (d *Decoder) stashDecoder() {
+	if d.current.d != nil {
+		if debugDecoder {
+			printf("re-adding current decoder %p", d.current.d)
+		}
+		d.decoders <- d.current.d
+		d.current.d = nil
+	}
+}
+
 // Close will release all resources.
 // It is NOT possible to reuse the decoder after this.
 func (d *Decoder) Close() {
@@ -402,10 +574,10 @@ func (d *Decoder) Close() {
 		return
 	}
 	d.drainOutput()
-	if d.stream != nil {
-		close(d.stream)
+	if d.current.cancel != nil {
+		d.current.cancel()
 		d.streamWg.Wait()
-		d.stream = nil
+		d.current.cancel = nil
 	}
 	if d.decoders != nil {
 		close(d.decoders)
@@ -456,100 +628,321 @@ type decodeOutput struct {
 	err error
 }
 
-type decodeStream struct {
-	r io.Reader
-
-	// Blocks ready to be written to output.
-	output chan decodeOutput
-
-	// cancel reading from the input
-	cancel chan struct{}
+func (d *Decoder) startSyncDecoder(r io.Reader) error {
+	d.frame.history.reset()
+	d.syncStream.br = readerWrapper{r: r}
+	d.syncStream.inFrame = false
+	d.syncStream.enabled = true
+	d.syncStream.decodedFrame = 0
+	return nil
 }
 
-// errEndOfStream indicates that everything from the stream was read.
-var errEndOfStream = errors.New("end-of-stream")
-
 // Create Decoder:
-// Spawn n block decoders. These accept tasks to decode a block.
-// Create goroutine that handles stream processing, this will send history to decoders as they are available.
-// Decoders update the history as they decode.
-// When a block is returned:
-// 		a) history is sent to the next decoder,
-// 		b) content written to CRC.
-// 		c) return data to WRITER.
-// 		d) wait for next block to return data.
-// Once WRITTEN, the decoders reused by the writer frame decoder for re-use.
-func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
+// ASYNC:
+// Spawn 3 go routines.
+// 0: Read frames and decode block literals.
+// 1: Decode sequences.
+// 2: Execute sequences, send to output.
+func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
 	defer d.streamWg.Done()
-	frame := newFrameDec(d.o)
-	for stream := range inStream {
-		if debugDecoder {
-			println("got new stream")
+	br := readerWrapper{r: r}
+
+	var seqDecode = make(chan *blockDec, d.o.concurrent)
+	var seqExecute = make(chan *blockDec, d.o.concurrent)
+
+	// Async 1: Decode sequences...
+	go func() {
+		var hist history
+		var hasErr bool
+
+		for block := range seqDecode {
+			if hasErr {
+				if block != nil {
+					seqExecute <- block
+				}
+				continue
+			}
+			if block.async.newHist != nil {
+				if debugDecoder {
+					println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
+				}
+				hist.reset()
+				hist.decoders = block.async.newHist.decoders
+				hist.recentOffsets = block.async.newHist.recentOffsets
+				hist.windowSize = block.async.newHist.windowSize
+				if block.async.newHist.dict != nil {
+					hist.setDict(block.async.newHist.dict)
+				}
+			}
+			if block.err != nil || block.Type != blockTypeCompressed {
+				hasErr = block.err != nil
+				seqExecute <- block
+				continue
+			}
+
+			hist.decoders.literals = block.async.literals
+			block.err = block.prepareSequences(block.async.seqData, &hist)
+			if debugDecoder && block.err != nil {
+				println("prepareSequences returned:", block.err)
+			}
+			hasErr = block.err != nil
+			if block.err == nil {
+				block.err = block.decodeSequences(&hist)
+				if debugDecoder && block.err != nil {
+					println("decodeSequences returned:", block.err)
+				}
+				hasErr = block.err != nil
+				//				block.async.sequence = hist.decoders.seq[:hist.decoders.nSeqs]
+				block.async.seqSize = hist.decoders.seqSize
+			}
+			seqExecute <- block
 		}
-		br := readerWrapper{r: stream.r}
-	decodeStream:
-		for {
-			frame.history.reset()
-			err := frame.reset(&br)
-			if debugDecoder && err != nil {
-				println("Frame decoder returned", err)
+		close(seqExecute)
+		hist.reset()
+	}()
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	// Async 3: Execute sequences...
+	frameHistCache := d.frame.history.b
+	go func() {
+		var hist history
+		var decodedFrame uint64
+		var fcs uint64
+		var hasErr bool
+		for block := range seqExecute {
+			out := decodeOutput{err: block.err, d: block}
+			if block.err != nil || hasErr {
+				hasErr = true
+				output <- out
+				continue
+			}
+			if block.async.newHist != nil {
+				if debugDecoder {
+					println("Async 2: new history")
+				}
+				hist.reset()
+				hist.windowSize = block.async.newHist.windowSize
+				hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
+				if block.async.newHist.dict != nil {
+					hist.setDict(block.async.newHist.dict)
+				}
+
+				if cap(hist.b) < hist.allocFrameBuffer {
+					if cap(frameHistCache) >= hist.allocFrameBuffer {
+						hist.b = frameHistCache
+					} else {
+						hist.b = make([]byte, 0, hist.allocFrameBuffer)
+						println("Alloc history sized", hist.allocFrameBuffer)
+					}
+				}
+				hist.b = hist.b[:0]
+				fcs = block.async.fcs
+				decodedFrame = 0
 			}
-			if err == nil && frame.DictionaryID != nil {
-				dict, ok := d.dicts[*frame.DictionaryID]
-				if !ok {
-					err = ErrUnknownDictionary
+			do := decodeOutput{err: block.err, d: block}
+			switch block.Type {
+			case blockTypeRLE:
+				if debugDecoder {
+					println("add rle block length:", block.RLESize)
+				}
+
+				if cap(block.dst) < int(block.RLESize) {
+					if block.lowMem {
+						block.dst = make([]byte, block.RLESize)
+					} else {
+						block.dst = make([]byte, maxCompressedBlockSize)
+					}
+				}
+				block.dst = block.dst[:block.RLESize]
+				v := block.data[0]
+				for i := range block.dst {
+					block.dst[i] = v
+				}
+				hist.append(block.dst)
+				do.b = block.dst
+			case blockTypeRaw:
+				if debugDecoder {
+					println("add raw block length:", len(block.data))
+				}
+				hist.append(block.data)
+				do.b = block.data
+			case blockTypeCompressed:
+				if debugDecoder {
+					println("execute with history length:", len(hist.b), "window:", hist.windowSize)
+				}
+				hist.decoders.seqSize = block.async.seqSize
+				hist.decoders.literals = block.async.literals
+				do.err = block.executeSequences(&hist)
+				hasErr = do.err != nil
+				if debugDecoder && hasErr {
+					println("executeSequences returned:", do.err)
+				}
+				do.b = block.dst
+			}
+			if !hasErr {
+				decodedFrame += uint64(len(do.b))
+				if decodedFrame > fcs {
+					println("fcs exceeded", block.Last, fcs, decodedFrame)
+					do.err = ErrFrameSizeExceeded
+					hasErr = true
+				} else if block.Last && fcs != fcsUnknown && decodedFrame != fcs {
+					do.err = ErrFrameSizeMismatch
+					hasErr = true
 				} else {
-					frame.history.setDict(&dict)
+					if debugDecoder {
+						println("fcs ok", block.Last, fcs, decodedFrame)
+					}
 				}
 			}
-			if err != nil {
-				stream.output <- decodeOutput{
-					err: err,
+			output <- do
+		}
+		close(output)
+		frameHistCache = hist.b
+		wg.Done()
+		if debugDecoder {
+			println("decoder goroutines finished")
+		}
+		hist.reset()
+	}()
+
+	var hist history
+decodeStream:
+	for {
+		var hasErr bool
+		hist.reset()
+		decodeBlock := func(block *blockDec) {
+			if hasErr {
+				if block != nil {
+					seqDecode <- block
 				}
-				break
+				return
 			}
+			if block.err != nil || block.Type != blockTypeCompressed {
+				hasErr = block.err != nil
+				seqDecode <- block
+				return
+			}
+
+			remain, err := block.decodeLiterals(block.data, &hist)
+			block.err = err
+			hasErr = block.err != nil
+			if err == nil {
+				block.async.literals = hist.decoders.literals
+				block.async.seqData = remain
+			} else if debugDecoder {
+				println("decodeLiterals error:", err)
+			}
+			seqDecode <- block
+		}
+		frame := d.frame
+		if debugDecoder {
+			println("New frame...")
+		}
+		var historySent bool
+		frame.history.reset()
+		err := frame.reset(&br)
+		if debugDecoder && err != nil {
+			println("Frame decoder returned", err)
+		}
+		if err == nil {
+			err = d.setDict(frame)
+		}
+		if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
 			if debugDecoder {
-				println("starting frame decoder")
-			}
-
-			// This goroutine will forward history between frames.
-			frame.frameDone.Add(1)
-			frame.initAsync()
-
-			go frame.startDecoder(stream.output)
-		decodeFrame:
-			// Go through all blocks of the frame.
-			for {
-				dec := <-d.decoders
-				select {
-				case <-stream.cancel:
-					if !frame.sendErr(dec, io.EOF) {
-						// To not let the decoder dangle, send it back.
-						stream.output <- decodeOutput{d: dec}
-					}
-					break decodeStream
-				default:
+				println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize)
+			}
+
+			err = ErrDecoderSizeExceeded
+		}
+		if err != nil {
+			select {
+			case <-ctx.Done():
+			case dec := <-d.decoders:
+				dec.sendErr(err)
+				decodeBlock(dec)
+			}
+			break decodeStream
+		}
+
+		// Go through all blocks of the frame.
+		for {
+			var dec *blockDec
+			select {
+			case <-ctx.Done():
+				break decodeStream
+			case dec = <-d.decoders:
+				// Once we have a decoder, we MUST return it.
+			}
+			err := frame.next(dec)
+			if !historySent {
+				h := frame.history
+				if debugDecoder {
+					println("Alloc History:", h.allocFrameBuffer)
+				}
+				hist.reset()
+				if h.dict != nil {
+					hist.setDict(h.dict)
 				}
-				err := frame.next(dec)
-				switch err {
-				case io.EOF:
-					// End of current frame, no error
-					println("EOF on next block")
-					break decodeFrame
-				case nil:
-					continue
-				default:
-					println("block decoder returned", err)
-					break decodeStream
+				dec.async.newHist = &h
+				dec.async.fcs = frame.FrameContentSize
+				historySent = true
+			} else {
+				dec.async.newHist = nil
+			}
+			if debugDecoder && err != nil {
+				println("next block returned error:", err)
+			}
+			dec.err = err
+			dec.hasCRC = false
+			if dec.Last && frame.HasCheckSum && err == nil {
+				crc, err := frame.rawInput.readSmall(4)
+				if len(crc) < 4 {
+					if err == nil {
+						err = io.ErrUnexpectedEOF
+
+					}
+					println("CRC missing?", err)
+					dec.err = err
+				} else {
+					dec.checkCRC = binary.LittleEndian.Uint32(crc)
+					dec.hasCRC = true
+					if debugDecoder {
+						printf("found crc to check: %08x\n", dec.checkCRC)
+					}
 				}
 			}
-			// All blocks have started decoding, check if there are more frames.
-			println("waiting for done")
-			frame.frameDone.Wait()
-			println("done waiting...")
+			err = dec.err
+			last := dec.Last
+			decodeBlock(dec)
+			if err != nil {
+				break decodeStream
+			}
+			if last {
+				break
+			}
 		}
-		frame.frameDone.Wait()
-		println("Sending EOS")
-		stream.output <- decodeOutput{err: errEndOfStream}
 	}
+	close(seqDecode)
+	wg.Wait()
+	hist.reset()
+	d.frame.history.b = frameHistCache
+}
+
+func (d *Decoder) setDict(frame *frameDec) (err error) {
+	dict, ok := d.dicts[frame.DictionaryID]
+	if ok {
+		if debugDecoder {
+			println("setting dict", frame.DictionaryID)
+		}
+		frame.history.setDict(dict)
+	} else if frame.DictionaryID != 0 {
+		// A zero or missing dictionary id is ambiguous:
+		// either dictionary zero, or no dictionary. In particular,
+		// zstd --patch-from uses this id for the source file,
+		// so only return an error if the dictionary id is not zero.
+		err = ErrUnknownDictionary
+	}
+	return err
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
index 95cc9b8b81f..774c5f00fe4 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -6,6 +6,8 @@ package zstd
 
 import (
 	"errors"
+	"fmt"
+	"math/bits"
 	"runtime"
 )
 
@@ -14,21 +16,28 @@ type DOption func(*decoderOptions) error
 
 // options retains accumulated state of multiple options.
 type decoderOptions struct {
-	lowMem         bool
-	concurrent     int
-	maxDecodedSize uint64
-	maxWindowSize  uint64
-	dicts          []dict
+	lowMem          bool
+	concurrent      int
+	maxDecodedSize  uint64
+	maxWindowSize   uint64
+	dicts           []*dict
+	ignoreChecksum  bool
+	limitToCap      bool
+	decodeBufsBelow int
 }
 
 func (o *decoderOptions) setDefault() {
 	*o = decoderOptions{
 		// use less ram: true for now, but may change.
-		lowMem:        true,
-		concurrent:    runtime.GOMAXPROCS(0),
-		maxWindowSize: MaxWindowSize,
+		lowMem:          true,
+		concurrent:      runtime.GOMAXPROCS(0),
+		maxWindowSize:   MaxWindowSize,
+		decodeBufsBelow: 128 << 10,
 	}
-	o.maxDecodedSize = 1 << 63
+	if o.concurrent > 4 {
+		o.concurrent = 4
+	}
+	o.maxDecodedSize = 64 << 30
 }
 
 // WithDecoderLowmem will set whether to use a lower amount of memory,
@@ -37,16 +46,25 @@ func WithDecoderLowmem(b bool) DOption {
 	return func(o *decoderOptions) error { o.lowMem = b; return nil }
 }
 
-// WithDecoderConcurrency will set the concurrency,
-// meaning the maximum number of decoders to run concurrently.
-// The value supplied must be at least 1.
-// By default this will be set to GOMAXPROCS.
+// WithDecoderConcurrency sets the number of created decoders.
+// When decoding block with DecodeAll, this will limit the number
+// of possible concurrently running decodes.
+// When decoding streams, this will limit the number of
+// inflight blocks.
+// When decoding streams and setting maximum to 1,
+// no async decoding will be done.
+// When a value of 0 is provided GOMAXPROCS will be used.
+// By default this will be set to 4 or GOMAXPROCS, whatever is lower.
 func WithDecoderConcurrency(n int) DOption {
 	return func(o *decoderOptions) error {
-		if n <= 0 {
+		if n < 0 {
 			return errors.New("concurrency must be at least 1")
 		}
-		o.concurrent = n
+		if n == 0 {
+			o.concurrent = runtime.GOMAXPROCS(0)
+		} else {
+			o.concurrent = n
+		}
 		return nil
 	}
 }
@@ -54,7 +72,7 @@ func WithDecoderConcurrency(n int) DOption {
 // WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
 // non-streaming operations or maximum window size for streaming operations.
 // This can be used to control memory usage of potentially hostile content.
-// Maximum and default is 1 << 63 bytes.
+// Maximum is 1 << 63 bytes. Default is 64GiB.
 func WithDecoderMaxMemory(n uint64) DOption {
 	return func(o *decoderOptions) error {
 		if n == 0 {
@@ -69,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption {
 }
 
 // WithDecoderDicts allows to register one or more dictionaries for the decoder.
-// If several dictionaries with the same ID is provided the last one will be used.
+//
+// Each slice in dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
+// If several dictionaries with the same ID are provided, the last one will be used.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
 func WithDecoderDicts(dicts ...[]byte) DOption {
 	return func(o *decoderOptions) error {
 		for _, b := range dicts {
@@ -77,8 +101,20 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
 			if err != nil {
 				return err
 			}
-			o.dicts = append(o.dicts, *d)
+			o.dicts = append(o.dicts, d)
+		}
+		return nil
+	}
+}
+
+// WithDecoderDictRaw registers a dictionary that may be used by the decoder.
+// The slice content can be arbitrary data.
+func WithDecoderDictRaw(id uint32, content []byte) DOption {
+	return func(o *decoderOptions) error {
+		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
 		}
+		o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
 		return nil
 	}
 }
@@ -100,3 +136,34 @@ func WithDecoderMaxWindow(size uint64) DOption {
 		return nil
 	}
 }
+
+// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes,
+// or any size set in WithDecoderMaxMemory.
+// This can be used to limit decoding to a specific maximum output size.
+// Disabled by default.
+func WithDecodeAllCapLimit(b bool) DOption {
+	return func(o *decoderOptions) error {
+		o.limitToCap = b
+		return nil
+	}
+}
+
+// WithDecodeBuffersBelow will fully decode readers that have a
+// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer.
+// This typically uses less allocations but will have the full decompressed object in memory.
+// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
+// Default is 128KiB.
+func WithDecodeBuffersBelow(size int) DOption {
+	return func(o *decoderOptions) error {
+		o.decodeBufsBelow = size
+		return nil
+	}
+}
+
+// IgnoreChecksum allows to forcibly ignore checksum checking.
+func IgnoreChecksum(b bool) DOption {
+	return func(o *decoderOptions) error {
+		o.ignoreChecksum = b
+		return nil
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
index a36ae83ef57..8d5567fe64c 100644
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -6,6 +6,8 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"math"
+	"sort"
 
 	"github.com/klauspost/compress/huff0"
 )
@@ -15,12 +17,14 @@ type dict struct {
 
 	litEnc              *huff0.Scratch
 	llDec, ofDec, mlDec sequenceDec
-	//llEnc, ofEnc, mlEnc []*fseEncoder
-	offsets [3]int
-	content []byte
+	offsets             [3]int
+	content             []byte
 }
 
-var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+const dictMagic = "\x37\xa4\x30\xec"
+
+// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
+const dictMaxLength = 1 << 31
 
 // ID returns the dictionary id or 0 if d is nil.
 func (d *dict) ID() uint32 {
@@ -30,14 +34,38 @@ func (d *dict) ID() uint32 {
 	return d.id
 }
 
-// DictContentSize returns the dictionary content size or 0 if d is nil.
-func (d *dict) DictContentSize() int {
+// ContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) ContentSize() int {
 	if d == nil {
 		return 0
 	}
 	return len(d.content)
 }
 
+// Content returns the dictionary content.
+func (d *dict) Content() []byte {
+	if d == nil {
+		return nil
+	}
+	return d.content
+}
+
+// Offsets returns the initial offsets.
+func (d *dict) Offsets() [3]int {
+	if d == nil {
+		return [3]int{}
+	}
+	return d.offsets
+}
+
+// LitEncoder returns the literal encoder.
+func (d *dict) LitEncoder() *huff0.Scratch {
+	if d == nil {
+		return nil
+	}
+	return d.litEnc
+}
+
 // Load a dictionary as described in
 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
 func loadDict(b []byte) (*dict, error) {
@@ -50,7 +78,7 @@ func loadDict(b []byte) (*dict, error) {
 		ofDec: sequenceDec{fse: &fseDecoder{}},
 		mlDec: sequenceDec{fse: &fseDecoder{}},
 	}
-	if !bytes.Equal(b[:4], dictMagic[:]) {
+	if string(b[:4]) != dictMagic {
 		return nil, ErrMagicMismatch
 	}
 	d.id = binary.LittleEndian.Uint32(b[4:8])
@@ -62,7 +90,7 @@ func loadDict(b []byte) (*dict, error) {
 	var err error
 	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("loading literal table: %w", err)
 	}
 	d.litEnc.Reuse = huff0.ReusePolicyMust
 
@@ -120,3 +148,387 @@ func loadDict(b []byte) (*dict, error) {
 
 	return &d, nil
 }
+
+// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
+func InspectDictionary(b []byte) (interface {
+	ID() uint32
+	ContentSize() int
+	Content() []byte
+	Offsets() [3]int
+	LitEncoder() *huff0.Scratch
+}, error) {
+	initPredefined()
+	d, err := loadDict(b)
+	return d, err
+}
+
+type BuildDictOptions struct {
+	// Dictionary ID.
+	ID uint32
+
+	// Content to use to create dictionary tables.
+	Contents [][]byte
+
+	// History to use for all blocks.
+	History []byte
+
+	// Offsets to use.
+	Offsets [3]int
+
+	// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
+	// See https://github.com/facebook/zstd/issues/3724
+	CompatV155 bool
+
+	// Use the specified encoder level.
+	// The dictionary will be built using the specified encoder level,
+	// which will reflect speed and make the dictionary tailored for that level.
+	// If not set SpeedBestCompression will be used.
+	Level EncoderLevel
+
+	// DebugOut will write stats and other details here if set.
+	DebugOut io.Writer
+}
+
+func BuildDict(o BuildDictOptions) ([]byte, error) {
+	initPredefined()
+	hist := o.History
+	contents := o.Contents
+	debug := o.DebugOut != nil
+	println := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintln(o.DebugOut, args...)
+		}
+	}
+	printf := func(s string, args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintf(o.DebugOut, s, args...)
+		}
+	}
+	print := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprint(o.DebugOut, args...)
+		}
+	}
+
+	if int64(len(hist)) > dictMaxLength {
+		return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
+	}
+	if len(hist) < 8 {
+		return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
+	}
+	if len(contents) == 0 {
+		return nil, errors.New("no content provided")
+	}
+	d := dict{
+		id:      o.ID,
+		litEnc:  nil,
+		llDec:   sequenceDec{},
+		ofDec:   sequenceDec{},
+		mlDec:   sequenceDec{},
+		offsets: o.Offsets,
+		content: hist,
+	}
+	block := blockEnc{lowMem: false}
+	block.init()
+	enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
+	if o.Level != 0 {
+		eOpts := encoderOptions{
+			level:      o.Level,
+			blockSize:  maxMatchLen,
+			windowSize: maxMatchLen,
+			dict:       &d,
+			lowMem:     false,
+		}
+		enc = eOpts.encoder()
+	} else {
+		o.Level = SpeedBestCompression
+	}
+	var (
+		remain [256]int
+		ll     [256]int
+		ml     [256]int
+		of     [256]int
+	)
+	addValues := func(dst *[256]int, src []byte) {
+		for _, v := range src {
+			dst[v]++
+		}
+	}
+	addHist := func(dst *[256]int, src *[256]uint32) {
+		for i, v := range src {
+			dst[i] += int(v)
+		}
+	}
+	seqs := 0
+	nUsed := 0
+	litTotal := 0
+	newOffsets := make(map[uint32]int, 1000)
+	for _, b := range contents {
+		block.reset(nil)
+		if len(b) < 8 {
+			continue
+		}
+		nUsed++
+		enc.Reset(&d, true)
+		enc.Encode(&block, b)
+		addValues(&remain, block.literals)
+		litTotal += len(block.literals)
+		seqs += len(block.sequences)
+		block.genCodes()
+		addHist(&ll, block.coders.llEnc.Histogram())
+		addHist(&ml, block.coders.mlEnc.Histogram())
+		addHist(&of, block.coders.ofEnc.Histogram())
+		for i, seq := range block.sequences {
+			if i > 3 {
+				break
+			}
+			offset := seq.offset
+			if offset == 0 {
+				continue
+			}
+			if offset > 3 {
+				newOffsets[offset-3]++
+			} else {
+				newOffsets[uint32(o.Offsets[offset-1])]++
+			}
+		}
+	}
+	// Find most used offsets.
+	var sortedOffsets []uint32
+	for k := range newOffsets {
+		sortedOffsets = append(sortedOffsets, k)
+	}
+	sort.Slice(sortedOffsets, func(i, j int) bool {
+		a, b := sortedOffsets[i], sortedOffsets[j]
+		if a == b {
+			// Prefer the longer offset
+			return sortedOffsets[i] > sortedOffsets[j]
+		}
+		return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
+	})
+	if len(sortedOffsets) > 3 {
+		if debug {
+			print("Offsets:")
+			for i, v := range sortedOffsets {
+				if i > 20 {
+					break
+				}
+				printf("[%d: %d],", v, newOffsets[v])
+			}
+			println("")
+		}
+
+		sortedOffsets = sortedOffsets[:3]
+	}
+	for i, v := range sortedOffsets {
+		o.Offsets[i] = int(v)
+	}
+	if debug {
+		println("New repeat offsets", o.Offsets)
+	}
+
+	if nUsed == 0 || seqs == 0 {
+		return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
+	}
+	if debug {
+		println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
+	}
+	if seqs/nUsed < 512 {
+		// Use 512 as minimum.
+		nUsed = seqs / 512
+	}
+	copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
+		hist := dst.Histogram()
+		var maxSym uint8
+		var maxCount int
+		var fakeLength int
+		for i, v := range src {
+			if v > 0 {
+				v = v / nUsed
+				if v == 0 {
+					v = 1
+				}
+			}
+			if v > maxCount {
+				maxCount = v
+			}
+			if v != 0 {
+				maxSym = uint8(i)
+			}
+			fakeLength += v
+			hist[i] = uint32(v)
+		}
+		dst.HistogramFinished(maxSym, maxCount)
+		dst.reUsed = false
+		dst.useRLE = false
+		err := dst.normalizeCount(fakeLength)
+		if err != nil {
+			return nil, err
+		}
+		if debug {
+			println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
+		}
+		return dst.writeCount(nil)
+	}
+	if debug {
+		print("Literal lengths: ")
+	}
+	llTable, err := copyHist(block.coders.llEnc, &ll)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Match lengths: ")
+	}
+	mlTable, err := copyHist(block.coders.mlEnc, &ml)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Offsets: ")
+	}
+	ofTable, err := copyHist(block.coders.ofEnc, &of)
+	if err != nil {
+		return nil, err
+	}
+
+	// Literal table
+	avgSize := litTotal
+	if avgSize > huff0.BlockSizeMax/2 {
+		avgSize = huff0.BlockSizeMax / 2
+	}
+	huffBuff := make([]byte, 0, avgSize)
+	// Target size
+	div := litTotal / avgSize
+	if div < 1 {
+		div = 1
+	}
+	if debug {
+		println("Huffman weights:")
+	}
+	for i, n := range remain[:] {
+		if n > 0 {
+			n = n / div
+			// Allow all entries to be represented.
+			if n == 0 {
+				n = 1
+			}
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			if debug {
+				printf("[%d: %d], ", i, n)
+			}
+		}
+	}
+	if o.CompatV155 && remain[255]/div == 0 {
+		huffBuff = append(huffBuff, 255)
+	}
+	scratch := &huff0.Scratch{TableLog: 11}
+	for tries := 0; tries < 255; tries++ {
+		scratch = &huff0.Scratch{TableLog: 11}
+		_, _, err = huff0.Compress1X(huffBuff, scratch)
+		if err == nil {
+			break
+		}
+		if debug {
+			printf("Try %d: Huffman error: %v\n", tries+1, err)
+		}
+		huffBuff = huffBuff[:0]
+		if tries == 250 {
+			if debug {
+				println("Huffman: Bailing out with predefined table")
+			}
+
+			// Bail out.... Just generate something
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
+			for i := 0; i < 128; i++ {
+				huffBuff = append(huffBuff, byte(i))
+			}
+			continue
+		}
+		if errors.Is(err, huff0.ErrIncompressible) {
+			// Try truncating least common.
+			for i, n := range remain[:] {
+				if n > 0 {
+					n = n / (div * (i + 1))
+					if n > 0 {
+						huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+					}
+				}
+			}
+			if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
+				huffBuff = append(huffBuff, 255)
+			}
+			if len(huffBuff) == 0 {
+				huffBuff = append(huffBuff, 0, 255)
+			}
+		}
+		if errors.Is(err, huff0.ErrUseRLE) {
+			for i, n := range remain[:] {
+				n = n / (div * (i + 1))
+				// Allow all entries to be represented.
+				if n == 0 {
+					n = 1
+				}
+				huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			}
+		}
+	}
+
+	var out bytes.Buffer
+	out.Write([]byte(dictMagic))
+	out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
+	out.Write(scratch.OutTable)
+	if debug {
+		println("huff table:", len(scratch.OutTable), "bytes")
+		println("of table:", len(ofTable), "bytes")
+		println("ml table:", len(mlTable), "bytes")
+		println("ll table:", len(llTable), "bytes")
+	}
+	out.Write(ofTable)
+	out.Write(mlTable)
+	out.Write(llTable)
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
+	out.Write(hist)
+	if debug {
+		_, err := loadDict(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		i, err := InspectDictionary(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		println("ID:", i.ID())
+		println("Content size:", i.ContentSize())
+		println("Encoder:", i.LitEncoder() != nil)
+		println("Offsets:", i.Offsets())
+		var totalSize int
+		for _, b := range contents {
+			totalSize += len(b)
+		}
+
+		encWith := func(opts ...EOption) int {
+			enc, err := NewWriter(nil, opts...)
+			if err != nil {
+				panic(err)
+			}
+			defer enc.Close()
+			var dst []byte
+			var totalSize int
+			for _, b := range contents {
+				dst = enc.EncodeAll(b, dst[:0])
+				totalSize += len(dst)
+			}
+			return totalSize
+		}
+		plain := encWith(WithEncoderLevel(o.Level))
+		withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
+		println("Input size:", totalSize)
+		println("Plain Compressed:", plain)
+		println("Dict Compressed:", withDict)
+		println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
+	}
+	return out.Bytes(), nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
index 295cd602a42..5ca46038ad9 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -16,6 +16,7 @@ type fastBase struct {
 	cur int32
 	// maximum offset. Should be at least 2x block size.
 	maxMatchOff int32
+	bufferReset int32
 	hist        []byte
 	crc         *xxhash.Digest
 	tmp         [8]byte
@@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
 }
 
 func (e *fastBase) addBlock(src []byte) int32 {
-	if debugAsserts && e.cur > bufferReset {
-		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+	if debugAsserts && e.cur > e.bufferReset {
+		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
 	}
 	// check if we have space already
 	if len(e.hist)+len(src) > cap(e.hist) {
@@ -108,11 +109,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) {
 	e.blk = enc
 }
 
-func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
-	// Extend the match to be as long as possible.
-	return int32(matchLen(src[s:], src[t:]))
-}
-
 func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
 	if debugAsserts {
 		if s < 0 {
@@ -131,8 +127,6 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
 			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
 		}
 	}
-
-	// Extend the match to be as long as possible.
 	return int32(matchLen(src[s:], src[t:]))
 }
 
@@ -150,18 +144,19 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 	} else {
 		e.crc.Reset()
 	}
+	e.blk.dictLitEnc = nil
 	if d != nil {
 		low := e.lowMem
 		if singleBlock {
 			e.lowMem = true
 		}
-		e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+		e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
 		e.lowMem = low
 	}
 
 	// We offset current position so everything will be out of reach.
 	// If above reset line, history will be purged.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += e.maxMatchOff + int32(len(e.hist))
 	}
 	e.hist = e.hist[:0]
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
index 96028ecd836..87f42879a87 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -34,7 +34,7 @@ type match struct {
 	est    int32
 }
 
-const highScore = 25000
+const highScore = maxMatchLen * 8
 
 // estBits will estimate output bits from predefined tables.
 func (m *match) estBits(bitsPerByte int32) {
@@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
 	if m.rep < 0 {
 		ofc = ofCode(uint32(m.s-m.offset) + 3)
 	} else {
-		ofc = ofCode(uint32(m.rep))
+		ofc = ofCode(uint32(m.rep) & 3)
 	}
 	// Cost, excluding
 	ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@@ -84,14 +84,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = prevEntry{}
-			}
-			for i := range e.longTable[:] {
-				e.longTable[i] = prevEntry{}
-			}
+			e.table = [bestShortTableSize]prevEntry{}
+			e.longTable = [bestLongTableSize]prevEntry{}
 			e.cur = e.maxMatchOff
 			break
 		}
@@ -163,7 +159,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 
 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
-	cv := load6432(src, s)
 
 	// Relative offsets
 	offset1 := int32(blk.recentOffsets[0])
@@ -177,7 +172,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	_ = addLiterals
 
 	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
@@ -192,54 +186,104 @@ encodeLoop:
 			panic("offset0 was 0")
 		}
 
-		bestOf := func(a, b match) match {
-			if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
-				return a
-			}
-			return b
-		}
-		const goodEnough = 100
+		const goodEnough = 250
+
+		cv := load6432(src, s)
 
 		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
 		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
 		candidateL := e.longTable[nextHashL]
 		candidateS := e.table[nextHashS]
 
-		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
-			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
-				return match{s: s, est: highScore}
+		// Set m to a match at offset if it looks like that will improve compression.
+		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
+			delta := s - offset
+			if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
+				return
+			}
+			// Try to quick reject if we already have a long match.
+			if m.length > 16 {
+				left := len(src) - int(m.s+m.length)
+				// If we are too close to the end, keep as is.
+				if left <= 0 {
+					return
+				}
+				checkLen := m.length - (s - m.s) - 8
+				if left > 2 && checkLen > 4 {
+					// Check 4 bytes, 4 bytes from the end of the current match.
+					a := load3232(src, offset+checkLen)
+					b := load3232(src, s+checkLen)
+					if a != b {
+						return
+					}
+				}
+			}
+			l := 4 + e.matchlen(s+4, offset+4, src)
+			if m.rep <= 0 {
+				// Extend candidate match backwards as far as possible.
+				// Do not extend repeats as we can assume they are optimal
+				// and offsets change if s == nextEmit.
+				tMin := s - e.maxMatchOff
+				if tMin < 0 {
+					tMin = 0
+				}
+				for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
+					s--
+					offset--
+					l++
+				}
 			}
 			if debugAsserts {
-				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
-					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+				if offset >= s {
+					panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
 				}
+				if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
+					panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+				}
+			}
+			cand := match{offset: offset, s: s, length: l, rep: rep}
+			cand.estBits(bitsPerByte)
+			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
+				*m = cand
 			}
-			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
-			m.estBits(bitsPerByte)
-			return m
 		}
 
-		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
-		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
-		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+		best := match{s: s, est: highScore}
+		improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
 
 		if canRepeat && best.length < goodEnough {
-			cv32 := uint32(cv >> 8)
-			spp := s + 1
-			best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
-			best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
-			best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
-			if best.length > 0 {
-				cv32 = uint32(cv >> 24)
-				spp += 2
-				best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
-				best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
-				best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+			if s == nextEmit {
+				// Check repeats straight after a match.
+				improve(&best, s-offset2, s, uint32(cv), 1|4)
+				improve(&best, s-offset3, s, uint32(cv), 2|4)
+				if offset1 > 1 {
+					improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
+				}
+			}
+
+			// If either no match or a non-repeat match, check at + 1
+			if best.rep <= 0 {
+				cv32 := uint32(cv >> 8)
+				spp := s + 1
+				improve(&best, spp-offset1, spp, cv32, 1)
+				improve(&best, spp-offset2, spp, cv32, 2)
+				improve(&best, spp-offset3, spp, cv32, 3)
+				if best.rep < 0 {
+					cv32 = uint32(cv >> 24)
+					spp += 2
+					improve(&best, spp-offset1, spp, cv32, 1)
+					improve(&best, spp-offset2, spp, cv32, 2)
+					improve(&best, spp-offset3, spp, cv32, 3)
+				}
 			}
 		}
 		// Load next and check...
 		e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
 		e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+		index0 := s + 1
 
 		// Look far ahead, unless we have a really long match already...
 		if best.length < goodEnough {
@@ -249,97 +293,89 @@ encodeLoop:
 				if s >= sLimit {
 					break encodeLoop
 				}
-				cv = load6432(src, s)
 				continue
 			}
 
-			s++
 			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
-			cv = load6432(src, s)
-			cv2 := load6432(src, s+1)
+			cv = load6432(src, s+1)
+			cv2 := load6432(src, s+2)
 			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
 
 			// Short at s+1
-			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+			improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
 			// Long at s+1, s+2
-			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
-			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
-			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
-			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+			improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
+			improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
 			if false {
 				// Short at s+3.
 				// Too often worse...
-				best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+				improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
 			}
-			// See if we can find a better match by checking where the current best ends.
-			// Use that offset to see if we can find a better full match.
-			if sAt := best.s + best.length; sAt < sLimit {
-				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
-				candidateEnd := e.longTable[nextHashL]
-				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
-					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
-					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
-						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+
+			// Start check at a fixed offset to allow for a few mismatches.
+			// For this compression level 2 yields the best results.
+			// We cannot do this if we have already indexed this position.
+			const skipBeginning = 2
+			if best.s > s-skipBeginning {
+				// See if we can find a better match by checking where the current best ends.
+				// Use that offset to see if we can find a better full match.
+				if sAt := best.s + best.length; sAt < sLimit {
+					nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
+					candidateEnd := e.longTable[nextHashL]
+
+					if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
+						improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
+							improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						}
 					}
-					best = bestEnd
 				}
 			}
 		}
 
 		if debugAsserts {
+			if best.offset >= best.s {
+				panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
+			}
+			if best.s < nextEmit {
+				panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
+			}
+			if best.offset < s-e.maxMatchOff {
+				panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
+			}
 			if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
 				panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
 			}
 		}
 
 		// We have a match, we can store the forward value
+		s = best.s
 		if best.rep > 0 {
-			s = best.s
 			var seq seq
 			seq.matchLen = uint32(best.length - zstdMinMatch)
+			addLiterals(&seq, best.s)
 
-			// We might be able to match backwards.
-			// Extend as long as we can.
-			start := best.s
-			// We end the search early, so we don't risk 0 literals
-			// and have to do special offset treatment.
-			startLimit := nextEmit + 1
-
-			tMin := s - e.maxMatchOff
-			if tMin < 0 {
-				tMin = 0
-			}
-			repIndex := best.offset
-			for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
-				repIndex--
-				start--
-				seq.matchLen++
-			}
-			addLiterals(&seq, start)
-
-			// rep 0
-			seq.offset = uint32(best.rep)
+			// Repeat. If bit 4 is set, this is a non-lit repeat.
+			seq.offset = uint32(best.rep & 3)
 			if debugSequences {
-				println("repeat sequence", seq, "next s:", s)
+				println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
 			}
 			blk.sequences = append(blk.sequences, seq)
 
-			// Index match start+1 (long) -> s - 1
-			index0 := s
+			// Index old s + 1 -> s - 1
 			s = best.s + best.length
-
 			nextEmit = s
-			if s >= sLimit {
-				if debugEncoder {
-					println("repeat ended", s, best.length)
 
-				}
-				break encodeLoop
-			}
 			// Index skipped...
+			end := s
+			if s > sLimit+4 {
+				end = sLimit + 4
+			}
 			off := index0 + e.cur
-			for index0 < s-1 {
+			for index0 < end {
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -348,19 +384,26 @@ encodeLoop:
 				off++
 				index0++
 			}
+
 			switch best.rep {
-			case 2:
+			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
-			case 3:
+			case 3, 4 | 2:
 				offset1, offset2, offset3 = offset3, offset1, offset2
+			case 4 | 3:
+				offset1, offset2, offset3 = offset1-1, offset1, offset2
+			}
+			if s >= sLimit {
+				if debugEncoder {
+					println("repeat ended", s, best.length)
+				}
+				break encodeLoop
 			}
-			cv = load6432(src, s)
 			continue
 		}
 
 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
-		s = best.s
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2
 
@@ -372,22 +415,9 @@ encodeLoop:
 			panic("invalid offset")
 		}
 
-		// Extend the n-byte match as long as possible.
-		l := best.length
-
-		// Extend backwards
-		tMin := s - e.maxMatchOff
-		if tMin < 0 {
-			tMin = 0
-		}
-		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
-			s--
-			t--
-			l++
-		}
-
 		// Write our sequence
 		var seq seq
+		l := best.length
 		seq.litLen = uint32(s - nextEmit)
 		seq.matchLen = uint32(l - zstdMinMatch)
 		if seq.litLen > 0 {
@@ -400,65 +430,25 @@ encodeLoop:
 		}
 		blk.sequences = append(blk.sequences, seq)
 		nextEmit = s
-		if s >= sLimit {
-			break encodeLoop
+
+		// Index old s + 1 -> s - 1 or sLimit
+		end := s
+		if s > sLimit-4 {
+			end = sLimit - 4
 		}
 
-		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
-		// every entry
-		for index0 < s-1 {
+		off := index0 + e.cur
+		for index0 < end {
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
+			off++
 		}
-
-		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
-
-		// Check offset 2
-		for {
-			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
-				// Do regular search
-				break
-			}
-
-			// Store this, since we have it.
-			nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
-			nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
-
-			// We have at least 4 byte match.
-			// No need to check backwards. We come straight from a match
-			l := 4 + e.matchlen(s+4, o2+4, src)
-
-			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
-			e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
-			seq.matchLen = uint32(l) - zstdMinMatch
-			seq.litLen = 0
-
-			// Since litlen is always 0, this is offset 1.
-			seq.offset = 1
-			s += l
-			nextEmit = s
-			if debugSequences {
-				println("sequence", seq, "next s:", s)
-			}
-			blk.sequences = append(blk.sequences, seq)
-
-			// Swap offset 1 and 2.
-			offset1, offset2 = offset2, offset1
-			if s >= sLimit {
-				// Finished
-				break encodeLoop
-			}
-			cv = load6432(src, s)
+		if s >= sLimit {
+			break encodeLoop
 		}
 	}
 
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index 602c05ee0c4..20d25b0e052 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = tableEntry{}
-			}
-			for i := range e.longTable[:] {
-				e.longTable[i] = prevEntry{}
-			}
+			e.table = [betterShortTableSize]tableEntry{}
+			e.longTable = [betterLongTableSize]prevEntry{}
 			e.cur = e.maxMatchOff
 			break
 		}
@@ -149,15 +145,15 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32
 
 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
-			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]
 
@@ -166,6 +162,7 @@ encodeLoop:
 			off := s + e.cur
 			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+			index0 = s + 1
 
 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@@ -262,7 +259,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)
 
-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@@ -416,15 +412,23 @@ encodeLoop:
 
 		// Try to find a better match by searching for a long match at the end of the current best match
 		if s+matched < sLimit {
+			// Allow some bytes at the beginning to mismatch.
+			// Sweet spot is around 3 bytes, but depends on input.
+			// The skipped bytes are tested in Extend backwards,
+			// and still picked up as part of the match if they do.
+			const skipBeginning = 3
+
 			nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
-			cv := load3232(src, s)
+			s2 := s + skipBeginning
+			cv := load3232(src, s2)
 			candidateL := e.longTable[nextHashL]
-			coffsetL := candidateL.offset - e.cur - matched
-			if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+			coffsetL := candidateL.offset - e.cur - matched + skipBeginning
+			if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
 				// Found a long match, at least 4 bytes.
-				matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+				matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
 				if matchedNext > matched {
 					t = coffsetL
+					s = s2
 					matched = matchedNext
 					if debugMatches {
 						println("long match at end-of-match")
@@ -434,12 +438,13 @@ encodeLoop:
 
 			// Check prev long...
 			if true {
-				coffsetL = candidateL.prev - e.cur - matched
-				if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+				coffsetL = candidateL.prev - e.cur - matched + skipBeginning
+				if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
 					// Found a long match, at least 4 bytes.
-					matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+					matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
 					if matchedNext > matched {
 						t = coffsetL
+						s = s2
 						matched = matchedNext
 						if debugMatches {
 							println("prev long match at end-of-match")
@@ -493,15 +498,15 @@ encodeLoop:
 		}
 
 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			index0 += 2
+			off += 2
 		}
 
 		cv = load6432(src, s)
@@ -518,8 +523,8 @@ encodeLoop:
 			}
 
 			// Store this, since we have it.
-			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@@ -578,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
@@ -667,15 +672,15 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32
 
 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
 				panic("offset0 was 0")
 			}
 
-			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]
 
@@ -686,6 +691,7 @@ encodeLoop:
 			e.markLongShardDirty(nextHashL)
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
 			e.markShortShardDirty(nextHashS)
+			index0 = s + 1
 
 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@@ -721,7 +727,6 @@ encodeLoop:
 					blk.sequences = append(blk.sequences, seq)
 
 					// Index match start+1 (long) -> s - 1
-					index0 := s + repOff
 					s += lenght + repOff
 
 					nextEmit = s
@@ -785,7 +790,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)
 
-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@@ -1019,18 +1023,18 @@ encodeLoop:
 		}
 
 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.markLongShardDirty(h0)
 			h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			e.markShortShardDirty(h1)
 			index0 += 2
+			off += 2
 		}
 
 		cv = load6432(src, s)
@@ -1047,8 +1051,8 @@ encodeLoop:
 			}
 
 			// Store this, since we have it.
-			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 			nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
+			nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
 
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index d6b3104240b..a154c18f741 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = tableEntry{}
-			}
-			for i := range e.longTable[:] {
-				e.longTable[i] = tableEntry{}
-			}
+			e.table = [dFastShortTableSize]tableEntry{}
+			e.longTable = [dFastLongTableSize]tableEntry{}
 			e.cur = e.maxMatchOff
 			break
 		}
@@ -127,8 +123,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}
 
-			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]
 
@@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	if e.cur >= bufferReset {
+	if e.cur >= e.bufferReset {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 		}
@@ -439,8 +435,8 @@ encodeLoop:
 		var t int32
 		for {
 
-			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]
 
@@ -685,7 +681,7 @@ encodeLoop:
 	}
 
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += int32(len(src))
 	}
 }
@@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
@@ -785,8 +781,8 @@ encodeLoop:
 				panic("offset0 was 0")
 			}
 
-			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			candidateL := e.longTable[nextHashL]
 			candidateS := e.table[nextHashS]
 
@@ -969,7 +965,7 @@ encodeLoop:
 		te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
 		te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
 		longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
-		longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
+		longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
 		e.longTable[longHash1] = te0
 		e.longTable[longHash2] = te1
 		e.markLongShardDirty(longHash1)
@@ -1002,8 +998,8 @@ encodeLoop:
 			}
 
 			// Store this, since we have it.
-			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 			nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
+			nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
 
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
@@ -1088,7 +1084,7 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			}
 		}
 		e.lastDictID = d.id
-		e.allDirty = true
+		allDirty = true
 	}
 	// Reset table to initial state
 	e.cur = e.maxMatchOff
@@ -1103,7 +1099,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 	}
 
 	if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
-		copy(e.longTable[:], e.dictLongTable)
+		//copy(e.longTable[:], e.dictLongTable)
+		e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
 		for i := range e.longTableShardDirty {
 			e.longTableShardDirty[i] = false
 		}
@@ -1114,7 +1111,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			continue
 		}
 
-		copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+		// copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+		*(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
+
 		e.longTableShardDirty[i] = false
 	}
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index f2502629bc5..f45a3da7dae 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -6,8 +6,6 @@ package zstd
 
 import (
 	"fmt"
-	"math"
-	"math/bits"
 )
 
 const (
@@ -45,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
@@ -87,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	// TEMPLATE
 	const hashLog = tableBits
 	// seems global, but would be nice to tweak.
-	const kSearchStrength = 7
+	const kSearchStrength = 6
 
 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
@@ -135,21 +133,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				var length int32
-				// length = 4 + e.matchlen(s+6, repIndex+4, src)
-				{
-					a := src[s+6:]
-					b := src[repIndex+4:]
-					endI := len(a) & (math.MaxInt32 - 7)
-					length = int32(endI) + 4
-					for i := 0; i < endI; i += 8 {
-						if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-							length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-							break
-						}
-					}
-				}
-
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 				seq.matchLen = uint32(length - zstdMinMatch)
 
 				// We might be able to match backwards.
@@ -236,20 +220,7 @@ encodeLoop:
 		}
 
 		// Extend the 4-byte match as long as possible.
-		//l := e.matchlen(s+4, t+4, src) + 4
-		var l int32
-		{
-			a := src[s+4:]
-			b := src[t+4:]
-			endI := len(a) & (math.MaxInt32 - 7)
-			l = int32(endI) + 4
-			for i := 0; i < endI; i += 8 {
-				if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-					l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-					break
-				}
-			}
-		}
+		l := e.matchlen(s+4, t+4, src) + 4
 
 		// Extend backwards
 		tMin := s - e.maxMatchOff
@@ -286,20 +257,7 @@ encodeLoop:
 		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
-			//l := 4 + e.matchlen(s+4, o2+4, src)
-			var l int32
-			{
-				a := src[s+4:]
-				b := src[o2+4:]
-				endI := len(a) & (math.MaxInt32 - 7)
-				l = int32(endI) + 4
-				for i := 0; i < endI; i += 8 {
-					if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-						l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-						break
-					}
-				}
-			}
+			l := 4 + e.matchlen(s+4, o2+4, src)
 
 			// Store this, since we have it.
 			nextHash := hashLen(cv, hashLog, tableFastHashLen)
@@ -345,13 +303,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
 	if debugEncoder {
-		if len(src) > maxBlockSize {
+		if len(src) > maxCompressedBlockSize {
 			panic("src too big")
 		}
 	}
 
 	// Protect against e.cur wraparound.
-	if e.cur >= bufferReset {
+	if e.cur >= e.bufferReset {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 		}
@@ -375,7 +333,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	// TEMPLATE
 	const hashLog = tableBits
 	// seems global, but would be nice to tweak.
-	const kSearchStrength = 8
+	const kSearchStrength = 6
 
 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
@@ -418,21 +376,7 @@ encodeLoop:
 			if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				// length := 4 + e.matchlen(s+6, repIndex+4, src)
-				// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
-				var length int32
-				{
-					a := src[s+6:]
-					b := src[repIndex+4:]
-					endI := len(a) & (math.MaxInt32 - 7)
-					length = int32(endI) + 4
-					for i := 0; i < endI; i += 8 {
-						if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-							length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-							break
-						}
-					}
-				}
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 
 				seq.matchLen = uint32(length - zstdMinMatch)
 
@@ -522,21 +466,7 @@ encodeLoop:
 			panic(fmt.Sprintf("t (%d) < 0 ", t))
 		}
 		// Extend the 4-byte match as long as possible.
-		//l := e.matchlenNoHist(s+4, t+4, src) + 4
-		// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
-		var l int32
-		{
-			a := src[s+4:]
-			b := src[t+4:]
-			endI := len(a) & (math.MaxInt32 - 7)
-			l = int32(endI) + 4
-			for i := 0; i < endI; i += 8 {
-				if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-					l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-					break
-				}
-			}
-		}
+		l := e.matchlen(s+4, t+4, src) + 4
 
 		// Extend backwards
 		tMin := s - e.maxMatchOff
@@ -573,21 +503,7 @@ encodeLoop:
 		if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
-			//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
-			// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
-			var l int32
-			{
-				a := src[s+4:]
-				b := src[o2+4:]
-				endI := len(a) & (math.MaxInt32 - 7)
-				l = int32(endI) + 4
-				for i := 0; i < endI; i += 8 {
-					if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-						l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-						break
-					}
-				}
-			}
+			l := 4 + e.matchlen(s+4, o2+4, src)
 
 			// Store this, since we have it.
 			nextHash := hashLen(cv, hashLog, tableFastHashLen)
@@ -621,7 +537,7 @@ encodeLoop:
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += int32(len(src))
 	}
 }
@@ -638,11 +554,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
 		return
 	}
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
-				e.table[i] = tableEntry{}
-			}
+			e.table = [tableSize]tableEntry{}
 			e.cur = e.maxMatchOff
 			break
 		}
@@ -730,20 +644,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				var length int32
-				// length = 4 + e.matchlen(s+6, repIndex+4, src)
-				{
-					a := src[s+6:]
-					b := src[repIndex+4:]
-					endI := len(a) & (math.MaxInt32 - 7)
-					length = int32(endI) + 4
-					for i := 0; i < endI; i += 8 {
-						if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-							length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-							break
-						}
-					}
-				}
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 
 				seq.matchLen = uint32(length - zstdMinMatch)
 
@@ -831,20 +732,7 @@ encodeLoop:
 		}
 
 		// Extend the 4-byte match as long as possible.
-		//l := e.matchlen(s+4, t+4, src) + 4
-		var l int32
-		{
-			a := src[s+4:]
-			b := src[t+4:]
-			endI := len(a) & (math.MaxInt32 - 7)
-			l = int32(endI) + 4
-			for i := 0; i < endI; i += 8 {
-				if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-					l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-					break
-				}
-			}
-		}
+		l := e.matchlen(s+4, t+4, src) + 4
 
 		// Extend backwards
 		tMin := s - e.maxMatchOff
@@ -881,20 +769,7 @@ encodeLoop:
 		if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
 			// We have at least 4 byte match.
 			// No need to check backwards. We come straight from a match
-			//l := 4 + e.matchlen(s+4, o2+4, src)
-			var l int32
-			{
-				a := src[s+4:]
-				b := src[o2+4:]
-				endI := len(a) & (math.MaxInt32 - 7)
-				l = int32(endI) + 4
-				for i := 0; i < endI; i += 8 {
-					if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-						l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
-						break
-					}
-				}
-			}
+			l := 4 + e.matchlen(s+4, o2+4, src)
 
 			// Store this, since we have it.
 			nextHash := hashLen(cv, hashLog, tableFastHashLen)
@@ -954,13 +829,12 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 		}
 		if true {
 			end := e.maxMatchOff + int32(len(d.content)) - 8
-			for i := e.maxMatchOff; i < end; i += 3 {
+			for i := e.maxMatchOff; i < end; i += 2 {
 				const hashLog = tableBits
 
 				cv := load6432(d.content, i-e.maxMatchOff)
-				nextHash := hashLen(cv, hashLog, tableFastHashLen)      // 0 -> 5
-				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen)  // 1 -> 6
-				nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
+				nextHash := hashLen(cv, hashLog, tableFastHashLen)     // 0 -> 6
+				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
 				e.dictTable[nextHash] = tableEntry{
 					val:    uint32(cv),
 					offset: i,
@@ -969,10 +843,6 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 					val:    uint32(cv >> 8),
 					offset: i + 1,
 				}
-				e.dictTable[nextHash2] = tableEntry{
-					val:    uint32(cv >> 16),
-					offset: i + 2,
-				}
 			}
 		}
 		e.lastDictID = d.id
@@ -992,7 +862,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 	const shardCnt = tableShardCnt
 	const shardSize = tableShardSize
 	if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
-		copy(e.table[:], e.dictTable)
+		//copy(e.table[:], e.dictTable)
+		e.table = *(*[tableSize]tableEntry)(e.dictTable)
 		for i := range e.tableShardDirty {
 			e.tableShardDirty[i] = false
 		}
@@ -1004,7 +875,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 			continue
 		}
 
-		copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+		//copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+		*(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
 		e.tableShardDirty[i] = false
 	}
 	e.allDirty = false
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index e6e315969b0..72af7ef0fe0 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -8,6 +8,7 @@ import (
 	"crypto/rand"
 	"fmt"
 	"io"
+	"math"
 	rdebug "runtime/debug"
 	"sync"
 
@@ -98,23 +99,25 @@ func (e *Encoder) Reset(w io.Writer) {
 	if cap(s.filling) == 0 {
 		s.filling = make([]byte, 0, e.o.blockSize)
 	}
-	if cap(s.current) == 0 {
-		s.current = make([]byte, 0, e.o.blockSize)
-	}
-	if cap(s.previous) == 0 {
-		s.previous = make([]byte, 0, e.o.blockSize)
+	if e.o.concurrent > 1 {
+		if cap(s.current) == 0 {
+			s.current = make([]byte, 0, e.o.blockSize)
+		}
+		if cap(s.previous) == 0 {
+			s.previous = make([]byte, 0, e.o.blockSize)
+		}
+		s.current = s.current[:0]
+		s.previous = s.previous[:0]
+		if s.writing == nil {
+			s.writing = &blockEnc{lowMem: e.o.lowMem}
+			s.writing.init()
+		}
+		s.writing.initNewEncode()
 	}
 	if s.encoder == nil {
 		s.encoder = e.o.encoder()
 	}
-	if s.writing == nil {
-		s.writing = &blockEnc{lowMem: e.o.lowMem}
-		s.writing.init()
-	}
-	s.writing.initNewEncode()
 	s.filling = s.filling[:0]
-	s.current = s.current[:0]
-	s.previous = s.previous[:0]
 	s.encoder.Reset(e.o.dict, false)
 	s.headerWritten = false
 	s.eofWritten = false
@@ -224,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			DictID:        e.o.dict.ID(),
 		}
 
-		dst, err := fh.appendTo(tmp[:0])
-		if err != nil {
-			return err
-		}
+		dst := fh.appendTo(tmp[:0])
 		s.headerWritten = true
 		s.wWg.Wait()
 		var n2 int
@@ -258,6 +258,32 @@ func (e *Encoder) nextBlock(final bool) error {
 		return s.err
 	}
 
+	// SYNC:
+	if e.o.concurrent == 1 {
+		src := s.filling
+		s.nInput += int64(len(s.filling))
+		if debugEncoder {
+			println("Adding sync block,", len(src), "bytes, final:", final)
+		}
+		enc := s.encoder
+		blk := enc.Block()
+		blk.reset(nil)
+		enc.Encode(blk, src)
+		blk.last = final
+		if final {
+			s.eofWritten = true
+		}
+
+		s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if s.err != nil {
+			return s.err
+		}
+		_, s.err = s.w.Write(blk.output)
+		s.nWritten += int64(len(blk.output))
+		s.filling = s.filling[:0]
+		return s.err
+	}
+
 	// Move blocks forward.
 	s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
 	s.nInput += int64(len(s.current))
@@ -300,22 +326,8 @@ func (e *Encoder) nextBlock(final bool) error {
 				}
 				s.wWg.Done()
 			}()
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-				err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				blk.encodeRaw(src)
-				// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-			case nil:
-			default:
-				s.writeErr = err
+			s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+			if s.writeErr != nil {
 				return
 			}
 			_, s.writeErr = s.w.Write(blk.output)
@@ -468,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 				Checksum: false,
 				DictID:   0,
 			}
-			dst, _ = fh.appendTo(dst)
+			dst = fh.appendTo(dst)
 
 			// Write raw block as last one only.
 			var blk blockHeader
@@ -486,8 +498,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 		// If a non-single block is needed the encoder will reset again.
 		e.encoders <- enc
 	}()
-	// Use single segments when above minimum window and below 1MB.
-	single := len(src) < 1<<20 && len(src) > MinWindowSize
+	// Use single segments when above minimum window and below window size.
+	single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
 	if e.o.single != nil {
 		single = *e.o.single
 	}
@@ -503,13 +515,10 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
 		dst = make([]byte, 0, len(src))
 	}
-	dst, err := fh.appendTo(dst)
-	if err != nil {
-		panic(err)
-	}
+	dst = fh.appendTo(dst)
 
 	// If we can do everything in one block, prefer that.
-	if len(src) <= maxCompressedBlockSize {
+	if len(src) <= e.o.blockSize {
 		enc.Reset(e.o.dict, true)
 		// Slightly faster with no history and everything in one block.
 		if e.o.crc {
@@ -525,25 +534,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 
 		// If we got the exact same number of literals as input,
 		// assume the literals cannot be compressed.
-		err := errIncompressible
 		oldout := blk.output
-		if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
-			// Output directly to dst
-			blk.output = dst
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
+		// Output directly to dst
+		blk.output = dst
 
-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			dst = blk.encodeRawTo(dst, src)
-		case nil:
-			dst = blk.output
-		default:
+		err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if err != nil {
 			panic(err)
 		}
+		dst = blk.output
 		blk.output = oldout
 	} else {
 		enc.Reset(e.o.dict, false)
@@ -562,25 +561,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			if len(src) == 0 {
 				blk.last = true
 			}
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
-				err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				dst = blk.encodeRawTo(dst, todo)
-				blk.popOffsets()
-			case nil:
-				dst = append(dst, blk.output...)
-			default:
+			err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
+			if err != nil {
 				panic(err)
 			}
+			dst = append(dst, blk.output...)
 			blk.reset(nil)
 		}
 	}
@@ -590,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	// Add padding with content from crypto/rand.Reader
 	if e.o.pad > 0 {
 		add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
+		var err error
 		dst, err = skippableFrame(dst, add, rand.Reader)
 		if err != nil {
 			panic(err)
@@ -597,3 +583,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	}
 	return dst
 }
+
+// MaxEncodedSize returns the expected maximum
+// size of an encoded block or stream.
+func (e *Encoder) MaxEncodedSize(size int) int {
+	frameHeader := 4 + 2 // magic + frame header & window descriptor
+	if e.o.dict != nil {
+		frameHeader += 4
+	}
+	// Frame content size:
+	if size < 256 {
+		frameHeader++
+	} else if size < 65536+256 {
+		frameHeader += 2
+	} else if size < math.MaxInt32 {
+		frameHeader += 4
+	} else {
+		frameHeader += 8
+	}
+	// Final crc
+	if e.o.crc {
+		frameHeader += 4
+	}
+
+	// Max overhead is 3 bytes/block.
+	// There cannot be 0 blocks.
+	blocks := (size + e.o.blockSize) / e.o.blockSize
+
+	// Combine, add padding.
+	maxSz := frameHeader + 3*blocks + size
+	if e.o.pad > 1 {
+		maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
+	}
+	return maxSz
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index 7d29e1d689e..20671dcb91d 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -3,6 +3,8 @@ package zstd
 import (
 	"errors"
 	"fmt"
+	"math"
+	"math/bits"
 	"runtime"
 	"strings"
 )
@@ -24,6 +26,7 @@ type encoderOptions struct {
 	allLitEntropy   bool
 	customWindow    bool
 	customALEntropy bool
+	customBlockSize bool
 	lowMem          bool
 	dict            *dict
 }
@@ -33,10 +36,10 @@ func (o *encoderOptions) setDefault() {
 		concurrent:    runtime.GOMAXPROCS(0),
 		crc:           true,
 		single:        nil,
-		blockSize:     1 << 16,
+		blockSize:     maxCompressedBlockSize,
 		windowSize:    8 << 20,
 		level:         SpeedDefault,
-		allLitEntropy: true,
+		allLitEntropy: false,
 		lowMem:        false,
 	}
 }
@@ -46,22 +49,22 @@ func (o encoderOptions) encoder() encoder {
 	switch o.level {
 	case SpeedFastest:
 		if o.dict != nil {
-			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 		}
-		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 
 	case SpeedDefault:
 		if o.dict != nil {
-			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
 		}
-		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 	case SpeedBetterCompression:
 		if o.dict != nil {
-			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 		}
-		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	case SpeedBestCompression:
-		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	}
 	panic("unknown compression level")
 }
@@ -75,6 +78,7 @@ func WithEncoderCRC(b bool) EOption {
 // WithEncoderConcurrency will set the concurrency,
 // meaning the maximum number of encoders to run concurrently.
 // The value supplied must be at least 1.
+// For streams, setting a value of 1 will disable async compression.
 // By default this will be set to GOMAXPROCS.
 func WithEncoderConcurrency(n int) EOption {
 	return func(o *encoderOptions) error {
@@ -90,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
 // The value must be a power of two between MinWindowSize and MaxWindowSize.
 // A larger value will enable better compression but allocate more memory and,
 // for above-default values, take considerably longer.
-// The default value is determined by the compression level.
+// The default value is determined by the compression level and max 8MB.
 func WithWindowSize(n int) EOption {
 	return func(o *encoderOptions) error {
 		switch {
@@ -106,6 +110,7 @@ func WithWindowSize(n int) EOption {
 		o.customWindow = true
 		if o.blockSize > o.windowSize {
 			o.blockSize = o.windowSize
+			o.customBlockSize = true
 		}
 		return nil
 	}
@@ -124,7 +129,7 @@ func WithEncoderPadding(n int) EOption {
 		}
 		// No need to waste our time.
 		if n == 1 {
-			o.pad = 0
+			n = 0
 		}
 		if n > 1<<30 {
 			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
@@ -188,10 +193,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
 		return SpeedDefault
 	case level >= 6 && level < 10:
 		return SpeedBetterCompression
-	case level >= 10:
+	default:
 		return SpeedBestCompression
 	}
-	return SpeedDefault
 }
 
 // String provides a string representation of the compression level.
@@ -222,16 +226,19 @@ func WithEncoderLevel(l EncoderLevel) EOption {
 			switch o.level {
 			case SpeedFastest:
 				o.windowSize = 4 << 20
+				if !o.customBlockSize {
+					o.blockSize = 1 << 16
+				}
 			case SpeedDefault:
 				o.windowSize = 8 << 20
 			case SpeedBetterCompression:
-				o.windowSize = 16 << 20
+				o.windowSize = 8 << 20
 			case SpeedBestCompression:
-				o.windowSize = 32 << 20
+				o.windowSize = 8 << 20
 			}
 		}
 		if !o.customALEntropy {
-			o.allLitEntropy = l > SpeedFastest
+			o.allLitEntropy = l > SpeedDefault
 		}
 
 		return nil
@@ -278,7 +285,7 @@ func WithNoEntropyCompression(b bool) EOption {
 // a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
 // For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
 // This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
-// If this is not specified, block encodes will automatically choose this based on the input size.
+// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
 // This setting has no effect on streamed encodes.
 func WithSingleSegment(b bool) EOption {
 	return func(o *encoderOptions) error {
@@ -299,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption {
 }
 
 // WithEncoderDict allows to register a dictionary that will be used for the encode.
+//
+// The slice dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
 // The encoder *may* choose to use no dictionary instead for certain payloads.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
 func WithEncoderDict(dict []byte) EOption {
 	return func(o *encoderOptions) error {
 		d, err := loadDict(dict)
@@ -310,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption {
 		return nil
 	}
 }
+
+// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
+//
+// The slice content may contain arbitrary data. It will be used as an initial
+// history.
+func WithEncoderDictRaw(id uint32, content []byte) EOption {
+	return func(o *encoderOptions) error {
+		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+		}
+		o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
+		return nil
+	}
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 989c79f8c31..53e160f7e5a 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -5,26 +5,20 @@
 package zstd
 
 import (
-	"bytes"
+	"encoding/binary"
 	"encoding/hex"
 	"errors"
-	"hash"
 	"io"
-	"sync"
 
 	"github.com/klauspost/compress/zstd/internal/xxhash"
 )
 
 type frameDec struct {
-	o      decoderOptions
-	crc    hash.Hash64
-	offset int64
+	o   decoderOptions
+	crc *xxhash.Digest
 
 	WindowSize uint64
 
-	// In order queue of blocks being decoded.
-	decoding chan *blockDec
-
 	// Frame history passed between blocks
 	history history
 
@@ -34,15 +28,10 @@ type frameDec struct {
 	bBuf byteBuf
 
 	FrameContentSize uint64
-	frameDone        sync.WaitGroup
 
-	DictionaryID  *uint32
+	DictionaryID  uint32
 	HasCheckSum   bool
 	SingleSegment bool
-
-	// asyncRunning indicates whether the async routine processes input on 'decoding'.
-	asyncRunningMu sync.Mutex
-	asyncRunning   bool
 }
 
 const (
@@ -54,9 +43,9 @@ const (
 	MaxWindowSize = 1 << 29
 )
 
-var (
-	frameMagic          = []byte{0x28, 0xb5, 0x2f, 0xfd}
-	skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
+const (
+	frameMagic          = "\x28\xb5\x2f\xfd"
+	skippableFrameMagic = "\x2a\x4d\x18"
 )
 
 func newFrameDec(o decoderOptions) *frameDec {
@@ -84,25 +73,25 @@ func (d *frameDec) reset(br byteBuffer) error {
 		switch err {
 		case io.EOF, io.ErrUnexpectedEOF:
 			return io.EOF
-		default:
-			return err
 		case nil:
 			signature[0] = b[0]
+		default:
+			return err
 		}
 		// Read the rest, don't allow io.ErrUnexpectedEOF
 		b, err = br.readSmall(3)
 		switch err {
 		case io.EOF:
 			return io.EOF
-		default:
-			return err
 		case nil:
 			copy(signature[1:], b)
+		default:
+			return err
 		}
 
-		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
+		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
 			if debugDecoder {
-				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
+				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
 			}
 			// Break if not skippable frame.
 			break
@@ -117,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 		}
 		n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
 		println("Skipping frame with", n, "bytes.")
-		err = br.skipN(int(n))
+		err = br.skipN(int64(n))
 		if err != nil {
 			if debugDecoder {
 				println("Reading discarded frame", err)
@@ -125,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 			return err
 		}
 	}
-	if !bytes.Equal(signature[:], frameMagic) {
+	if string(signature[:]) != frameMagic {
 		if debugDecoder {
-			println("Got magic numbers: ", signature, "want:", frameMagic)
+			println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
 		}
 		return ErrMagicMismatch
 	}
@@ -166,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 
 	// Read Dictionary_ID
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
-	d.DictionaryID = nil
+	d.DictionaryID = 0
 	if size := fhd & 3; size != 0 {
 		if size == 3 {
 			size = 4
@@ -178,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 			return err
 		}
 		var id uint32
-		switch size {
+		switch len(b) {
 		case 1:
 			id = uint32(b[0])
 		case 2:
@@ -189,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 		if debugDecoder {
 			println("Dict size", size, "ID:", id)
 		}
-		if id > 0 {
-			// ID 0 means "sorry, no dictionary anyway".
-			// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
-			d.DictionaryID = &id
-		}
+		d.DictionaryID = id
 	}
 
 	// Read Frame_Content_Size
@@ -208,14 +193,14 @@ func (d *frameDec) reset(br byteBuffer) error {
 	default:
 		fcsSize = 1 << v
 	}
-	d.FrameContentSize = 0
+	d.FrameContentSize = fcsUnknown
 	if fcsSize > 0 {
 		b, err := br.readSmall(fcsSize)
 		if err != nil {
 			println("Reading Frame content", err)
 			return err
 		}
-		switch fcsSize {
+		switch len(b) {
 		case 1:
 			d.FrameContentSize = uint64(b[0])
 		case 2:
@@ -229,9 +214,10 @@ func (d *frameDec) reset(br byteBuffer) error {
 			d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
 		}
 		if debugDecoder {
-			println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
+			println("Read FCS:", d.FrameContentSize)
 		}
 	}
+
 	// Move this to shared.
 	d.HasCheckSum = fhd&(1<<2) != 0
 	if d.HasCheckSum {
@@ -241,20 +227,27 @@ func (d *frameDec) reset(br byteBuffer) error {
 		d.crc.Reset()
 	}
 
+	if d.WindowSize > d.o.maxWindowSize {
+		if debugDecoder {
+			printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+		}
+		return ErrWindowSizeExceeded
+	}
+
 	if d.WindowSize == 0 && d.SingleSegment {
 		// We may not need window in this case.
 		d.WindowSize = d.FrameContentSize
 		if d.WindowSize < MinWindowSize {
 			d.WindowSize = MinWindowSize
 		}
-	}
-
-	if d.WindowSize > uint64(d.o.maxWindowSize) {
-		if debugDecoder {
-			printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+		if d.WindowSize > d.o.maxDecodedSize {
+			if debugDecoder {
+				printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+			}
+			return ErrDecoderSizeExceeded
 		}
-		return ErrWindowSizeExceeded
 	}
+
 	// The minimum Window_Size is 1 KB.
 	if d.WindowSize < MinWindowSize {
 		if debugDecoder {
@@ -263,11 +256,23 @@ func (d *frameDec) reset(br byteBuffer) error {
 		return ErrWindowSizeTooSmall
 	}
 	d.history.windowSize = int(d.WindowSize)
-	if d.o.lowMem && d.history.windowSize < maxBlockSize {
-		d.history.maxSize = d.history.windowSize * 2
+	if !d.o.lowMem || d.history.windowSize < maxBlockSize {
+		// Alloc 2x window size if not low-mem, or window size below 2MB.
+		d.history.allocFrameBuffer = d.history.windowSize * 2
 	} else {
-		d.history.maxSize = d.history.windowSize + maxBlockSize
+		if d.o.lowMem {
+			// Alloc with 1MB extra.
+			d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
+		} else {
+			// Alloc with 2MB extra.
+			d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
+		}
 	}
+
+	if debugDecoder {
+		println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
+	}
+
 	// history contains input - maybe we do something
 	d.rawInput = br
 	return nil
@@ -276,209 +281,85 @@ func (d *frameDec) reset(br byteBuffer) error {
 // next will start decoding the next block from stream.
 func (d *frameDec) next(block *blockDec) error {
 	if debugDecoder {
-		printf("decoding new block %p:%p", block, block.data)
+		println("decoding new block")
 	}
 	err := block.reset(d.rawInput, d.WindowSize)
 	if err != nil {
 		println("block error:", err)
 		// Signal the frame decoder we have a problem.
-		d.sendErr(block, err)
+		block.sendErr(err)
 		return err
 	}
-	block.input <- struct{}{}
-	if debugDecoder {
-		println("next block:", block)
-	}
-	d.asyncRunningMu.Lock()
-	defer d.asyncRunningMu.Unlock()
-	if !d.asyncRunning {
-		return nil
-	}
-	if block.Last {
-		// We indicate the frame is done by sending io.EOF
-		d.decoding <- block
-		return io.EOF
-	}
-	d.decoding <- block
 	return nil
 }
 
-// sendEOF will queue an error block on the frame.
-// This will cause the frame decoder to return when it encounters the block.
-// Returns true if the decoder was added.
-func (d *frameDec) sendErr(block *blockDec, err error) bool {
-	d.asyncRunningMu.Lock()
-	defer d.asyncRunningMu.Unlock()
-	if !d.asyncRunning {
-		return false
-	}
-
-	println("sending error", err.Error())
-	block.sendErr(err)
-	d.decoding <- block
-	return true
-}
-
-// checkCRC will check the checksum if the frame has one.
+// checkCRC will check the checksum, assuming the frame has one.
 // Will return ErrCRCMismatch if crc check failed, otherwise nil.
 func (d *frameDec) checkCRC() error {
-	if !d.HasCheckSum {
-		return nil
-	}
-	var tmp [4]byte
-	got := d.crc.Sum64()
-	// Flip to match file order.
-	tmp[0] = byte(got >> 0)
-	tmp[1] = byte(got >> 8)
-	tmp[2] = byte(got >> 16)
-	tmp[3] = byte(got >> 24)
-
 	// We can overwrite upper tmp now
-	want, err := d.rawInput.readSmall(4)
+	buf, err := d.rawInput.readSmall(4)
 	if err != nil {
 		println("CRC missing?", err)
 		return err
 	}
 
-	if !bytes.Equal(tmp[:], want) {
+	want := binary.LittleEndian.Uint32(buf[:4])
+	got := uint32(d.crc.Sum64())
+
+	if got != want {
 		if debugDecoder {
-			println("CRC Check Failed:", tmp[:], "!=", want)
+			printf("CRC check failed: got %08x, want %08x\n", got, want)
 		}
 		return ErrCRCMismatch
 	}
 	if debugDecoder {
-		println("CRC ok", tmp[:])
+		printf("CRC ok %08x\n", got)
 	}
 	return nil
 }
 
-func (d *frameDec) initAsync() {
-	if !d.o.lowMem && !d.SingleSegment {
-		// set max extra size history to 2MB.
-		d.history.maxSize = d.history.windowSize + maxBlockSize
-	}
-	// re-alloc if more than one extra block size.
-	if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize {
-		d.history.b = make([]byte, 0, d.history.maxSize)
-	}
-	if cap(d.history.b) < d.history.maxSize {
-		d.history.b = make([]byte, 0, d.history.maxSize)
-	}
-	if cap(d.decoding) < d.o.concurrent {
-		d.decoding = make(chan *blockDec, d.o.concurrent)
-	}
-	if debugDecoder {
-		h := d.history
-		printf("history init. len: %d, cap: %d", len(h.b), cap(h.b))
-	}
-	d.asyncRunningMu.Lock()
-	d.asyncRunning = true
-	d.asyncRunningMu.Unlock()
-}
-
-// startDecoder will start decoding blocks and write them to the writer.
-// The decoder will stop as soon as an error occurs or at end of frame.
-// When the frame has finished decoding the *bufio.Reader
-// containing the remaining input will be sent on frameDec.frameDone.
-func (d *frameDec) startDecoder(output chan decodeOutput) {
-	written := int64(0)
-
-	defer func() {
-		d.asyncRunningMu.Lock()
-		d.asyncRunning = false
-		d.asyncRunningMu.Unlock()
-
-		// Drain the currently decoding.
-		d.history.error = true
-	flushdone:
-		for {
-			select {
-			case b := <-d.decoding:
-				b.history <- &d.history
-				output <- <-b.result
-			default:
-				break flushdone
-			}
-		}
-		println("frame decoder done, signalling done")
-		d.frameDone.Done()
-	}()
-	// Get decoder for first block.
-	block := <-d.decoding
-	block.history <- &d.history
-	for {
-		var next *blockDec
-		// Get result
-		r := <-block.result
-		if r.err != nil {
-			println("Result contained error", r.err)
-			output <- r
-			return
-		}
-		if debugDecoder {
-			println("got result, from ", d.offset, "to", d.offset+int64(len(r.b)))
-			d.offset += int64(len(r.b))
-		}
-		if !block.Last {
-			// Send history to next block
-			select {
-			case next = <-d.decoding:
-				if debugDecoder {
-					println("Sending ", len(d.history.b), "bytes as history")
-				}
-				next.history <- &d.history
-			default:
-				// Wait until we have sent the block, so
-				// other decoders can potentially get the decoder.
-				next = nil
-			}
-		}
-
-		// Add checksum, async to decoding.
-		if d.HasCheckSum {
-			n, err := d.crc.Write(r.b)
-			if err != nil {
-				r.err = err
-				if n != len(r.b) {
-					r.err = io.ErrShortWrite
-				}
-				output <- r
-				return
-			}
-		}
-		written += int64(len(r.b))
-		if d.SingleSegment && uint64(written) > d.FrameContentSize {
-			println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize)
-			r.err = ErrFrameSizeExceeded
-			output <- r
-			return
-		}
-		if block.Last {
-			r.err = d.checkCRC()
-			output <- r
-			return
-		}
-		output <- r
-		if next == nil {
-			// There was no decoder available, we wait for one now that we have sent to the writer.
-			if debugDecoder {
-				println("Sending ", len(d.history.b), " bytes as history")
-			}
-			next = <-d.decoding
-			next.history <- &d.history
-		}
-		block = next
+// consumeCRC skips over the checksum, assuming the frame has one.
+func (d *frameDec) consumeCRC() error {
+	_, err := d.rawInput.readSmall(4)
+	if err != nil {
+		println("CRC missing?", err)
 	}
+	return err
 }
 
-// runDecoder will create a sync decoder that will decode a block of data.
+// runDecoder will run the decoder for the remainder of the frame.
 func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 	saved := d.history.b
 
 	// We use the history for output to avoid copying it.
 	d.history.b = dst
+	d.history.ignoreBuffer = len(dst)
 	// Store input length, so we only check new data.
 	crcStart := len(dst)
+	d.history.decoders.maxSyncLen = 0
+	if d.o.limitToCap {
+		d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
+	}
+	if d.FrameContentSize != fcsUnknown {
+		if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
+			d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
+		}
+		if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
+			if debugDecoder {
+				println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
+			}
+			return dst, ErrDecoderSizeExceeded
+		}
+		if debugDecoder {
+			println("maxSyncLen:", d.history.decoders.maxSyncLen)
+		}
+		if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
+			// Alloc for output
+			dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
+			copy(dst2, dst)
+			dst = dst2
+		}
+	}
 	var err error
 	for {
 		err = dec.reset(d.rawInput, d.WindowSize)
@@ -489,30 +370,41 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 			println("next block:", dec)
 		}
 		err = dec.decodeBuf(&d.history)
-		if err != nil || dec.Last {
+		if err != nil {
+			break
+		}
+		if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
+			println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
+			err = ErrDecoderSizeExceeded
 			break
 		}
-		if uint64(len(d.history.b)) > d.o.maxDecodedSize {
+		if d.o.limitToCap && len(d.history.b) > cap(dst) {
+			println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
 			err = ErrDecoderSizeExceeded
 			break
 		}
-		if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
-			println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
+		if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
+			println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
 			err = ErrFrameSizeExceeded
 			break
 		}
+		if dec.Last {
+			break
+		}
+		if debugDecoder {
+			println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
+		}
 	}
 	dst = d.history.b
 	if err == nil {
-		if d.HasCheckSum {
-			var n int
-			n, err = d.crc.Write(dst[crcStart:])
-			if err == nil {
-				if n != len(dst)-crcStart {
-					err = io.ErrShortWrite
-				} else {
-					err = d.checkCRC()
-				}
+		if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
+			err = ErrFrameSizeMismatch
+		} else if d.HasCheckSum {
+			if d.o.ignoreChecksum {
+				err = d.consumeCRC()
+			} else {
+				d.crc.Write(dst[crcStart:])
+				err = d.checkCRC()
 			}
 		}
 	}
diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go
index 4ef7f5a3e3d..667ca06794e 100644
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -22,7 +22,7 @@ type frameHeader struct {
 
 const maxHeaderSize = 14
 
-func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
+func (f frameHeader) appendTo(dst []byte) []byte {
 	dst = append(dst, frameMagic...)
 	var fhd uint8
 	if f.Checksum {
@@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 		if f.SingleSegment {
 			dst = append(dst, uint8(f.ContentSize))
 		}
-		// Unless SingleSegment is set, framessizes < 256 are nto stored.
+		// Unless SingleSegment is set, framessizes < 256 are not stored.
 	case 1:
 		f.ContentSize -= 256
 		dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
@@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 	default:
 		panic("invalid fcs")
 	}
-	return dst, nil
+	return dst
 }
 
 const skippableFrameHeader = 4 + 4
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index e6d3d49b39c..2f8860a722b 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -5,8 +5,10 @@
 package zstd
 
 import (
+	"encoding/binary"
 	"errors"
 	"fmt"
+	"io"
 )
 
 const (
@@ -178,10 +180,32 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
 		return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
 	}
 	b.advance((bitCount + 7) >> 3)
-	// println(s.norm[:s.symbolLen], s.symbolLen)
 	return s.buildDtable()
 }
 
+func (s *fseDecoder) mustReadFrom(r io.Reader) {
+	fatalErr := func(err error) {
+		if err != nil {
+			panic(err)
+		}
+	}
+	// 	dt             [maxTablesize]decSymbol // Decompression table.
+	//	symbolLen      uint16                  // Length of active part of the symbol table.
+	//	actualTableLog uint8                   // Selected tablelog.
+	//	maxBits        uint8                   // Maximum number of additional bits
+	//	// used for table creation to avoid allocations.
+	//	stateTable [256]uint16
+	//	norm       [maxSymbolValue + 1]int16
+	//	preDefined bool
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.dt))
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen))
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog))
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits))
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable))
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.norm))
+	fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined))
+}
+
 // decSymbol contains information about a state entry,
 // Including the state offset base, the output symbol and
 // the number of bits to read for the low part of the destination state.
@@ -204,18 +228,10 @@ func (d decSymbol) newState() uint16 {
 	return uint16(d >> 16)
 }
 
-func (d decSymbol) baseline() uint32 {
-	return uint32(d >> 32)
-}
-
 func (d decSymbol) baselineInt() int {
 	return int(d >> 32)
 }
 
-func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
-	*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
-}
-
 func (d *decSymbol) setNBits(nBits uint8) {
 	const mask = 0xffffffffffffff00
 	*d = (*d & mask) | decSymbol(nBits)
@@ -231,11 +247,6 @@ func (d *decSymbol) setNewState(state uint16) {
 	*d = (*d & mask) | decSymbol(state)<<16
 }
 
-func (d *decSymbol) setBaseline(baseline uint32) {
-	const mask = 0xffffffff
-	*d = (*d & mask) | decSymbol(baseline)<<32
-}
-
 func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
 	const mask = 0xffff00ff
 	*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
@@ -257,68 +268,6 @@ func (s *fseDecoder) setRLE(symbol decSymbol) {
 	s.dt[0] = symbol
 }
 
-// buildDtable will build the decoding table.
-func (s *fseDecoder) buildDtable() error {
-	tableSize := uint32(1 << s.actualTableLog)
-	highThreshold := tableSize - 1
-	symbolNext := s.stateTable[:256]
-
-	// Init, lay down lowprob symbols
-	{
-		for i, v := range s.norm[:s.symbolLen] {
-			if v == -1 {
-				s.dt[highThreshold].setAddBits(uint8(i))
-				highThreshold--
-				symbolNext[i] = 1
-			} else {
-				symbolNext[i] = uint16(v)
-			}
-		}
-	}
-	// Spread symbols
-	{
-		tableMask := tableSize - 1
-		step := tableStep(tableSize)
-		position := uint32(0)
-		for ss, v := range s.norm[:s.symbolLen] {
-			for i := 0; i < int(v); i++ {
-				s.dt[position].setAddBits(uint8(ss))
-				position = (position + step) & tableMask
-				for position > highThreshold {
-					// lowprob area
-					position = (position + step) & tableMask
-				}
-			}
-		}
-		if position != 0 {
-			// position must reach all cells once, otherwise normalizedCounter is incorrect
-			return errors.New("corrupted input (position != 0)")
-		}
-	}
-
-	// Build Decoding table
-	{
-		tableSize := uint16(1 << s.actualTableLog)
-		for u, v := range s.dt[:tableSize] {
-			symbol := v.addBits()
-			nextState := symbolNext[symbol]
-			symbolNext[symbol] = nextState + 1
-			nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
-			s.dt[u&maxTableMask].setNBits(nBits)
-			newState := (nextState << nBits) - tableSize
-			if newState > tableSize {
-				return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
-			}
-			if newState == uint16(u) && nBits == 0 {
-				// Seems weird that this is possible with nbits > 0.
-				return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
-			}
-			s.dt[u&maxTableMask].setNewState(newState)
-		}
-	}
-	return nil
-}
-
 // transform will transform the decoder table into a table usable for
 // decoding without having to apply the transformation while decoding.
 // The state will contain the base value and the number of bits to read.
@@ -352,34 +301,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
 	s.state = dt[br.getBits(tableLog)]
 }
 
-// next returns the current symbol and sets the next state.
-// At least tablelog bits must be available in the bit reader.
-func (s *fseState) next(br *bitReader) {
-	lowBits := uint16(br.getBits(s.state.nbBits()))
-	s.state = s.dt[s.state.newState()+lowBits]
-}
-
-// finished returns true if all bits have been read from the bitstream
-// and the next state would require reading bits from the input.
-func (s *fseState) finished(br *bitReader) bool {
-	return br.finished() && s.state.nbBits() > 0
-}
-
-// final returns the current state symbol without decoding the next.
-func (s *fseState) final() (int, uint8) {
-	return s.state.baselineInt(), s.state.addBits()
-}
-
 // final returns the current state symbol without decoding the next.
 func (s decSymbol) final() (int, uint8) {
 	return s.baselineInt(), s.addBits()
 }
-
-// nextFast returns the next symbol and sets the next state.
-// This can only be used if no symbols are 0 bits.
-// At least tablelog bits must be available in the bit reader.
-func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
-	lowBits := uint16(br.getBitsFast(s.state.nbBits()))
-	s.state = s.dt[s.state.newState()+lowBits]
-	return s.state.baseline(), s.state.addBits()
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
new file mode 100644
index 00000000000..d04a829b0a0
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
@@ -0,0 +1,65 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+package zstd
+
+import (
+	"fmt"
+)
+
+type buildDtableAsmContext struct {
+	// inputs
+	stateTable *uint16
+	norm       *int16
+	dt         *uint64
+
+	// outputs --- set by the procedure in the case of error;
+	// for interpretation please see the error handling part below
+	errParam1 uint64
+	errParam2 uint64
+}
+
+// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
+// Function returns non-zero exit code on error.
+//
+//go:noescape
+func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
+
+// please keep in sync with _generate/gen_fse.go
+const (
+	errorCorruptedNormalizedCounter = 1
+	errorNewStateTooBig             = 2
+	errorNewStateNoBits             = 3
+)
+
+// buildDtable will build the decoding table.
+func (s *fseDecoder) buildDtable() error {
+	ctx := buildDtableAsmContext{
+		stateTable: &s.stateTable[0],
+		norm:       &s.norm[0],
+		dt:         (*uint64)(&s.dt[0]),
+	}
+	code := buildDtable_asm(s, &ctx)
+
+	if code != 0 {
+		switch code {
+		case errorCorruptedNormalizedCounter:
+			position := ctx.errParam1
+			return fmt.Errorf("corrupted input (position=%d, expected 0)", position)
+
+		case errorNewStateTooBig:
+			newState := decSymbol(ctx.errParam1)
+			size := ctx.errParam2
+			return fmt.Errorf("newState (%d) outside table size (%d)", newState, size)
+
+		case errorNewStateNoBits:
+			newState := decSymbol(ctx.errParam1)
+			oldState := decSymbol(ctx.errParam2)
+			return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, oldState)
+
+		default:
+			return fmt.Errorf("buildDtable_asm returned unhandled nonzero code = %d", code)
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
new file mode 100644
index 00000000000..bcde3986953
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
@@ -0,0 +1,126 @@
+// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
+TEXT ·buildDtable_asm(SB), $0-24
+	MOVQ ctx+8(FP), CX
+	MOVQ s+0(FP), DI
+
+	// Load values
+	MOVBQZX 4098(DI), DX
+	XORQ    AX, AX
+	BTSQ    DX, AX
+	MOVQ    (CX), BX
+	MOVQ    16(CX), SI
+	LEAQ    -1(AX), R8
+	MOVQ    8(CX), CX
+	MOVWQZX 4096(DI), DI
+
+	// End load values
+	// Init, lay down lowprob symbols
+	XORQ R9, R9
+	JMP  init_main_loop_condition
+
+init_main_loop:
+	MOVWQSX (CX)(R9*2), R10
+	CMPW    R10, $-1
+	JNE     do_not_update_high_threshold
+	MOVB    R9, 1(SI)(R8*8)
+	DECQ    R8
+	MOVQ    $0x0000000000000001, R10
+
+do_not_update_high_threshold:
+	MOVW R10, (BX)(R9*2)
+	INCQ R9
+
+init_main_loop_condition:
+	CMPQ R9, DI
+	JL   init_main_loop
+
+	// Spread symbols
+	// Calculate table step
+	MOVQ AX, R9
+	SHRQ $0x01, R9
+	MOVQ AX, R10
+	SHRQ $0x03, R10
+	LEAQ 3(R9)(R10*1), R9
+
+	// Fill add bits values
+	LEAQ -1(AX), R10
+	XORQ R11, R11
+	XORQ R12, R12
+	JMP  spread_main_loop_condition
+
+spread_main_loop:
+	XORQ    R13, R13
+	MOVWQSX (CX)(R12*2), R14
+	JMP     spread_inner_loop_condition
+
+spread_inner_loop:
+	MOVB R12, 1(SI)(R11*8)
+
+adjust_position:
+	ADDQ R9, R11
+	ANDQ R10, R11
+	CMPQ R11, R8
+	JG   adjust_position
+	INCQ R13
+
+spread_inner_loop_condition:
+	CMPQ R13, R14
+	JL   spread_inner_loop
+	INCQ R12
+
+spread_main_loop_condition:
+	CMPQ  R12, DI
+	JL    spread_main_loop
+	TESTQ R11, R11
+	JZ    spread_check_ok
+	MOVQ  ctx+8(FP), AX
+	MOVQ  R11, 24(AX)
+	MOVQ  $+1, ret+16(FP)
+	RET
+
+spread_check_ok:
+	// Build Decoding table
+	XORQ DI, DI
+
+build_table_main_table:
+	MOVBQZX 1(SI)(DI*8), CX
+	MOVWQZX (BX)(CX*2), R8
+	LEAQ    1(R8), R9
+	MOVW    R9, (BX)(CX*2)
+	MOVQ    R8, R9
+	BSRQ    R9, R9
+	MOVQ    DX, CX
+	SUBQ    R9, CX
+	SHLQ    CL, R8
+	SUBQ    AX, R8
+	MOVB    CL, (SI)(DI*8)
+	MOVW    R8, 2(SI)(DI*8)
+	CMPQ    R8, AX
+	JLE     build_table_check1_ok
+	MOVQ    ctx+8(FP), CX
+	MOVQ    R8, 24(CX)
+	MOVQ    AX, 32(CX)
+	MOVQ    $+2, ret+16(FP)
+	RET
+
+build_table_check1_ok:
+	TESTB CL, CL
+	JNZ   build_table_check2_ok
+	CMPW  R8, DI
+	JNE   build_table_check2_ok
+	MOVQ  ctx+8(FP), AX
+	MOVQ  R8, 24(AX)
+	MOVQ  DI, 32(AX)
+	MOVQ  $+3, ret+16(FP)
+	RET
+
+build_table_check2_ok:
+	INCQ DI
+	CMPQ DI, AX
+	JL   build_table_main_table
+	MOVQ $+0, ret+16(FP)
+	RET
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
new file mode 100644
index 00000000000..8adfebb0297
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
@@ -0,0 +1,73 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+package zstd
+
+import (
+	"errors"
+	"fmt"
+)
+
+// buildDtable will build the decoding table.
+func (s *fseDecoder) buildDtable() error {
+	tableSize := uint32(1 << s.actualTableLog)
+	highThreshold := tableSize - 1
+	symbolNext := s.stateTable[:256]
+
+	// Init, lay down lowprob symbols
+	{
+		for i, v := range s.norm[:s.symbolLen] {
+			if v == -1 {
+				s.dt[highThreshold].setAddBits(uint8(i))
+				highThreshold--
+				v = 1
+			}
+			symbolNext[i] = uint16(v)
+		}
+	}
+
+	// Spread symbols
+	{
+		tableMask := tableSize - 1
+		step := tableStep(tableSize)
+		position := uint32(0)
+		for ss, v := range s.norm[:s.symbolLen] {
+			for i := 0; i < int(v); i++ {
+				s.dt[position].setAddBits(uint8(ss))
+				for {
+					// lowprob area
+					position = (position + step) & tableMask
+					if position <= highThreshold {
+						break
+					}
+				}
+			}
+		}
+		if position != 0 {
+			// position must reach all cells once, otherwise normalizedCounter is incorrect
+			return errors.New("corrupted input (position != 0)")
+		}
+	}
+
+	// Build Decoding table
+	{
+		tableSize := uint16(1 << s.actualTableLog)
+		for u, v := range s.dt[:tableSize] {
+			symbol := v.addBits()
+			nextState := symbolNext[symbol]
+			symbolNext[symbol] = nextState + 1
+			nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
+			s.dt[u&maxTableMask].setNBits(nBits)
+			newState := (nextState << nBits) - tableSize
+			if newState > tableSize {
+				return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
+			}
+			if newState == uint16(u) && nBits == 0 {
+				// Seems weird that this is possible with nbits > 0.
+				return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
+			}
+			s.dt[u&maxTableMask].setNewState(newState)
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
index b4757ee3f03..ab26326a8ff 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -62,9 +62,8 @@ func (s symbolTransform) String() string {
 // To indicate that you have populated the histogram call HistogramFinished
 // with the value of the highest populated symbol, as well as the number of entries
 // in the most populated entry. These are accepted at face value.
-// The returned slice will always be length 256.
-func (s *fseEncoder) Histogram() []uint32 {
-	return s.count[:]
+func (s *fseEncoder) Histogram() *[256]uint32 {
+	return &s.count
 }
 
 // HistogramFinished can be called to indicate that the histogram has been populated.
@@ -77,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
 	s.clearCount = maxCount != 0
 }
 
-// prepare will prepare and allocate scratch tables used for both compression and decompression.
-func (s *fseEncoder) prepare() (*fseEncoder, error) {
-	if s == nil {
-		s = &fseEncoder{}
-	}
-	s.useRLE = false
-	if s.clearCount && s.maxCount == 0 {
-		for i := range s.count {
-			s.count[i] = 0
-		}
-		s.clearCount = false
-	}
-	return s, nil
-}
-
 // allocCtable will allocate tables needed for compression.
 // If existing tables a re big enough, they are simply re-used.
 func (s *fseEncoder) allocCtable() {
@@ -710,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
 	c.state = c.stateTable[lu]
 }
 
-// encode the output symbol provided and write it to the bitstream.
-func (c *cState) encode(symbolTT symbolTransform) {
-	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
-	dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
-	c.bw.addBits16NC(c.state, uint8(nbBitsOut))
-	c.state = c.stateTable[dstState]
-}
-
 // flush will write the tablelog to the output and flush the remaining full bytes.
 func (c *cState) flush(tableLog uint8) {
 	c.bw.flush32()
diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go
index cf33f29a1b4..5d73c21ebdd 100644
--- a/vendor/github.com/klauspost/compress/zstd/hash.go
+++ b/vendor/github.com/klauspost/compress/zstd/hash.go
@@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 {
 		return (uint32(u) * prime4bytes) >> (32 - length)
 	}
 }
-
-// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash3(u uint32, h uint8) uint32 {
-	return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
index f783e32d251..09164856d22 100644
--- a/vendor/github.com/klauspost/compress/zstd/history.go
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -10,40 +10,48 @@ import (
 
 // history contains the information transferred between blocks.
 type history struct {
-	b             []byte
-	huffTree      *huff0.Scratch
-	recentOffsets [3]int
+	// Literal decompression
+	huffTree *huff0.Scratch
+
+	// Sequence decompression
 	decoders      sequenceDecs
-	windowSize    int
-	maxSize       int
-	error         bool
-	dict          *dict
+	recentOffsets [3]int
+
+	// History buffer...
+	b []byte
+
+	// ignoreBuffer is meant to ignore a number of bytes
+	// when checking for matches in history
+	ignoreBuffer int
+
+	windowSize       int
+	allocFrameBuffer int // needed?
+	error            bool
+	dict             *dict
 }
 
 // reset will reset the history to initial state of a frame.
 // The history must already have been initialized to the desired size.
 func (h *history) reset() {
 	h.b = h.b[:0]
+	h.ignoreBuffer = 0
 	h.error = false
 	h.recentOffsets = [3]int{1, 4, 8}
-	if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
-		fseDecoderPool.Put(f)
-	}
-	if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
-		fseDecoderPool.Put(f)
-	}
-	if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
-		fseDecoderPool.Put(f)
-	}
-	h.decoders = sequenceDecs{}
+	h.decoders.freeDecoders()
+	h.decoders = sequenceDecs{br: h.decoders.br}
+	h.freeHuffDecoder()
+	h.huffTree = nil
+	h.dict = nil
+	//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
+}
+
+func (h *history) freeHuffDecoder() {
 	if h.huffTree != nil {
 		if h.dict == nil || h.dict.litEnc != h.huffTree {
 			huffDecoderPool.Put(h.huffTree)
+			h.huffTree = nil
 		}
 	}
-	h.huffTree = nil
-	h.dict = nil
-	//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
 }
 
 func (h *history) setDict(dict *dict) {
@@ -54,6 +62,7 @@ func (h *history) setDict(dict *dict) {
 	h.decoders.litLengths = dict.llDec
 	h.decoders.offsets = dict.ofDec
 	h.decoders.matchLengths = dict.mlDec
+	h.decoders.dict = dict.content
 	h.recentOffsets = dict.offsets
 	h.huffTree = dict.litEnc
 }
@@ -83,6 +92,24 @@ func (h *history) append(b []byte) {
 	copy(h.b[h.windowSize-len(b):], b)
 }
 
+// ensureBlock will ensure there is space for at least one block...
+func (h *history) ensureBlock() {
+	if cap(h.b) < h.allocFrameBuffer {
+		h.b = make([]byte, 0, h.allocFrameBuffer)
+		return
+	}
+
+	avail := cap(h.b) - len(h.b)
+	if avail >= h.windowSize || avail > maxCompressedBlockSize {
+		return
+	}
+	// Move data down so we only have window size left.
+	// We know we have less than window size in b at this point.
+	discard := len(h.b) - h.windowSize
+	copy(h.b, h.b[discard:])
+	h.b = h.b[:h.windowSize]
+}
+
 // append bytes to history without ever discarding anything.
 func (h *history) appendKeep(b []byte) {
 	h.b = append(h.b, b...)
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
index 69aa3bb587c..777290d44ce 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
@@ -2,12 +2,7 @@
 
 VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
 
-
-[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
-[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
-
-xxhash is a Go implementation of the 64-bit
-[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
 high-quality hashing algorithm that is much faster than anything in the Go
 standard library.
 
@@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
 func (*Digest) Sum64() uint64
 ```
 
-This implementation provides a fast pure-Go implementation and an even faster
-assembly implementation for amd64.
+The package is written with optimized pure Go and also contains even faster
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
+opts into using the Go code even on those architectures.
+
+[xxHash]: http://cyan4973.github.io/xxHash/
+
+## Compatibility
+
+This package is in a module and the latest code is in version 2 of the module.
+You need a version of Go with at least "minimal module compatibility" to use
+github.com/cespare/xxhash/v2:
+
+* 1.9.7+ for Go 1.9
+* 1.10.3+ for Go 1.10
+* Go 1.11 or later
+
+I recommend using the latest release of Go.
 
 ## Benchmarks
 
 Here are some quick benchmarks comparing the pure-Go and assembly
 implementations of Sum64.
 
-| input size | purego | asm |
-| --- | --- | --- |
-| 5 B   |  979.66 MB/s |  1291.17 MB/s  |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s  |
-| 4 KB  | 17573.46 MB/s | 17602.65 MB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| input size | purego    | asm       |
+| ---------- | --------- | --------- |
+| 4 B        |  1.3 GB/s |  1.2 GB/s |
+| 16 B       |  2.9 GB/s |  3.5 GB/s |
+| 100 B      |  6.9 GB/s |  8.1 GB/s |
+| 4 KB       | 11.7 GB/s | 16.7 GB/s |
+| 10 MB      | 12.0 GB/s | 17.3 GB/s |
 
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
-the following commands under Go 1.11.2:
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
+CPU using the following commands under Go 1.19.2:
 
 ```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
 ```
 
 ## Projects using this package
 
 - [InfluxDB](https://github.com/influxdata/influxdb)
 - [Prometheus](https://github.com/prometheus/prometheus)
+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
 - [FreeCache](https://github.com/coocood/freecache)
+- [FastCache](https://github.com/VictoriaMetrics/fastcache)
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
index 2c112a0ab1c..fc40c820016 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
@@ -18,19 +18,11 @@ const (
 	prime5 uint64 = 2870177450012600261
 )
 
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
-// possible in the Go code is worth a small (but measurable) performance boost
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
-// convenience in the Go code in a few places where we need to intentionally
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
-// result overflows a uint64).
-var (
-	prime1v = prime1
-	prime2v = prime2
-	prime3v = prime3
-	prime4v = prime4
-	prime5v = prime5
-)
+// Store the primes in an array as well.
+//
+// The consts are used when possible in Go code to avoid MOVs but we need a
+// contiguous array of the assembly code.
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
 
 // Digest implements hash.Hash64.
 type Digest struct {
@@ -52,10 +44,10 @@ func New() *Digest {
 
 // Reset clears the Digest's state so that it can be reused.
 func (d *Digest) Reset() {
-	d.v1 = prime1v + prime2
+	d.v1 = primes[0] + prime2
 	d.v2 = prime2
 	d.v3 = 0
-	d.v4 = -prime1v
+	d.v4 = -primes[0]
 	d.total = 0
 	d.n = 0
 }
@@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
 	n = len(b)
 	d.total += uint64(n)
 
+	memleft := d.mem[d.n&(len(d.mem)-1):]
+
 	if d.n+n < 32 {
 		// This new data doesn't even fill the current block.
-		copy(d.mem[d.n:], b)
+		copy(memleft, b)
 		d.n += n
 		return
 	}
 
 	if d.n > 0 {
 		// Finish off the partial block.
-		copy(d.mem[d.n:], b)
+		c := copy(memleft, b)
 		d.v1 = round(d.v1, u64(d.mem[0:8]))
 		d.v2 = round(d.v2, u64(d.mem[8:16]))
 		d.v3 = round(d.v3, u64(d.mem[16:24]))
 		d.v4 = round(d.v4, u64(d.mem[24:32]))
-		b = b[32-d.n:]
+		b = b[c:]
 		d.n = 0
 	}
 
@@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
 
 	h += d.total
 
-	i, end := 0, d.n
-	for ; i+8 <= end; i += 8 {
-		k1 := round(0, u64(d.mem[i:i+8]))
+	b := d.mem[:d.n&(len(d.mem)-1)]
+	for ; len(b) >= 8; b = b[8:] {
+		k1 := round(0, u64(b[:8]))
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 	}
-	if i+4 <= end {
-		h ^= uint64(u32(d.mem[i:i+4])) * prime1
+	if len(b) >= 4 {
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
-	for i < end {
-		h ^= uint64(d.mem[i]) * prime5
+	for ; len(b) > 0; b = b[1:] {
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
-		i++
 	}
 
 	h ^= h >> 33
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
deleted file mode 100644
index 0ae847f75b0..00000000000
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
+++ /dev/null
@@ -1,12 +0,0 @@
-//go:build !appengine && gc && !purego
-// +build !appengine,gc,!purego
-
-package xxhash
-
-// Sum64 computes the 64-bit xxHash digest of b.
-//
-//go:noescape
-func Sum64(b []byte) uint64
-
-//go:noescape
-func writeBlocks(d *Digest, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
index be8db5bf796..ddb63aa91b1 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -1,215 +1,210 @@
+//go:build !appengine && gc && !purego && !noasm
 // +build !appengine
 // +build gc
 // +build !purego
+// +build !noasm
 
 #include "textflag.h"
 
-// Register allocation:
-// AX	h
-// SI	pointer to advance through b
-// DX	n
-// BX	loop end
-// R8	v1, k1
-// R9	v2
-// R10	v3
-// R11	v4
-// R12	tmp
-// R13	prime1v
-// R14	prime2v
-// DI	prime4v
-
-// round reads from and advances the buffer pointer in SI.
-// It assumes that R13 has prime1v and R14 has prime2v.
-#define round(r) \
-	MOVQ  (SI), R12 \
-	ADDQ  $8, SI    \
-	IMULQ R14, R12  \
-	ADDQ  R12, r    \
-	ROLQ  $31, r    \
-	IMULQ R13, r
-
-// mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
-#define mergeRound(acc, val) \
-	IMULQ R14, val \
-	ROLQ  $31, val \
-	IMULQ R13, val \
-	XORQ  val, acc \
-	IMULQ R13, acc \
-	ADDQ  DI, acc
+// Registers:
+#define h      AX
+#define d      AX
+#define p      SI // pointer to advance through b
+#define n      DX
+#define end    BX // loop end
+#define v1     R8
+#define v2     R9
+#define v3     R10
+#define v4     R11
+#define x      R12
+#define prime1 R13
+#define prime2 R14
+#define prime4 DI
+
+#define round(acc, x) \
+	IMULQ prime2, x   \
+	ADDQ  x, acc      \
+	ROLQ  $31, acc    \
+	IMULQ prime1, acc
+
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+	IMULQ prime2, x \
+	ROLQ  $31, x    \
+	IMULQ prime1, x
+
+// mergeRound applies a merge round on the two registers acc and x.
+// It assumes that prime1, prime2, and prime4 have been loaded.
+#define mergeRound(acc, x) \
+	round0(x)         \
+	XORQ  x, acc      \
+	IMULQ prime1, acc \
+	ADDQ  prime4, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that there is at least one block
+// to process.
+#define blockLoop() \
+loop:  \
+	MOVQ +0(p), x  \
+	round(v1, x)   \
+	MOVQ +8(p), x  \
+	round(v2, x)   \
+	MOVQ +16(p), x \
+	round(v3, x)   \
+	MOVQ +24(p), x \
+	round(v4, x)   \
+	ADDQ $32, p    \
+	CMPQ p, end    \
+	JLE  loop
 
 // func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
 	// Load fixed primes.
-	MOVQ ·prime1v(SB), R13
-	MOVQ ·prime2v(SB), R14
-	MOVQ ·prime4v(SB), DI
+	MOVQ ·primes+0(SB), prime1
+	MOVQ ·primes+8(SB), prime2
+	MOVQ ·primes+24(SB), prime4
 
 	// Load slice.
-	MOVQ b_base+0(FP), SI
-	MOVQ b_len+8(FP), DX
-	LEAQ (SI)(DX*1), BX
+	MOVQ b_base+0(FP), p
+	MOVQ b_len+8(FP), n
+	LEAQ (p)(n*1), end
 
 	// The first loop limit will be len(b)-32.
-	SUBQ $32, BX
+	SUBQ $32, end
 
 	// Check whether we have at least one block.
-	CMPQ DX, $32
+	CMPQ n, $32
 	JLT  noBlocks
 
 	// Set up initial state (v1, v2, v3, v4).
-	MOVQ R13, R8
-	ADDQ R14, R8
-	MOVQ R14, R9
-	XORQ R10, R10
-	XORQ R11, R11
-	SUBQ R13, R11
-
-	// Loop until SI > BX.
-blockLoop:
-	round(R8)
-	round(R9)
-	round(R10)
-	round(R11)
-
-	CMPQ SI, BX
-	JLE  blockLoop
-
-	MOVQ R8, AX
-	ROLQ $1, AX
-	MOVQ R9, R12
-	ROLQ $7, R12
-	ADDQ R12, AX
-	MOVQ R10, R12
-	ROLQ $12, R12
-	ADDQ R12, AX
-	MOVQ R11, R12
-	ROLQ $18, R12
-	ADDQ R12, AX
-
-	mergeRound(AX, R8)
-	mergeRound(AX, R9)
-	mergeRound(AX, R10)
-	mergeRound(AX, R11)
+	MOVQ prime1, v1
+	ADDQ prime2, v1
+	MOVQ prime2, v2
+	XORQ v3, v3
+	XORQ v4, v4
+	SUBQ prime1, v4
+
+	blockLoop()
+
+	MOVQ v1, h
+	ROLQ $1, h
+	MOVQ v2, x
+	ROLQ $7, x
+	ADDQ x, h
+	MOVQ v3, x
+	ROLQ $12, x
+	ADDQ x, h
+	MOVQ v4, x
+	ROLQ $18, x
+	ADDQ x, h
+
+	mergeRound(h, v1)
+	mergeRound(h, v2)
+	mergeRound(h, v3)
+	mergeRound(h, v4)
 
 	JMP afterBlocks
 
 noBlocks:
-	MOVQ ·prime5v(SB), AX
+	MOVQ ·primes+32(SB), h
 
 afterBlocks:
-	ADDQ DX, AX
-
-	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
-	ADDQ $24, BX
-
-	CMPQ SI, BX
-	JG   fourByte
-
-wordLoop:
-	// Calculate k1.
-	MOVQ  (SI), R8
-	ADDQ  $8, SI
-	IMULQ R14, R8
-	ROLQ  $31, R8
-	IMULQ R13, R8
-
-	XORQ  R8, AX
-	ROLQ  $27, AX
-	IMULQ R13, AX
-	ADDQ  DI, AX
-
-	CMPQ SI, BX
-	JLE  wordLoop
-
-fourByte:
-	ADDQ $4, BX
-	CMPQ SI, BX
-	JG   singles
-
-	MOVL  (SI), R8
-	ADDQ  $4, SI
-	IMULQ R13, R8
-	XORQ  R8, AX
-
-	ROLQ  $23, AX
-	IMULQ R14, AX
-	ADDQ  ·prime3v(SB), AX
-
-singles:
-	ADDQ $4, BX
-	CMPQ SI, BX
+	ADDQ n, h
+
+	ADDQ $24, end
+	CMPQ p, end
+	JG   try4
+
+loop8:
+	MOVQ  (p), x
+	ADDQ  $8, p
+	round0(x)
+	XORQ  x, h
+	ROLQ  $27, h
+	IMULQ prime1, h
+	ADDQ  prime4, h
+
+	CMPQ p, end
+	JLE  loop8
+
+try4:
+	ADDQ $4, end
+	CMPQ p, end
+	JG   try1
+
+	MOVL  (p), x
+	ADDQ  $4, p
+	IMULQ prime1, x
+	XORQ  x, h
+
+	ROLQ  $23, h
+	IMULQ prime2, h
+	ADDQ  ·primes+16(SB), h
+
+try1:
+	ADDQ $4, end
+	CMPQ p, end
 	JGE  finalize
 
-singlesLoop:
-	MOVBQZX (SI), R12
-	ADDQ    $1, SI
-	IMULQ   ·prime5v(SB), R12
-	XORQ    R12, AX
+loop1:
+	MOVBQZX (p), x
+	ADDQ    $1, p
+	IMULQ   ·primes+32(SB), x
+	XORQ    x, h
+	ROLQ    $11, h
+	IMULQ   prime1, h
 
-	ROLQ  $11, AX
-	IMULQ R13, AX
-
-	CMPQ SI, BX
-	JL   singlesLoop
+	CMPQ p, end
+	JL   loop1
 
 finalize:
-	MOVQ  AX, R12
-	SHRQ  $33, R12
-	XORQ  R12, AX
-	IMULQ R14, AX
-	MOVQ  AX, R12
-	SHRQ  $29, R12
-	XORQ  R12, AX
-	IMULQ ·prime3v(SB), AX
-	MOVQ  AX, R12
-	SHRQ  $32, R12
-	XORQ  R12, AX
-
-	MOVQ AX, ret+24(FP)
+	MOVQ  h, x
+	SHRQ  $33, x
+	XORQ  x, h
+	IMULQ prime2, h
+	MOVQ  h, x
+	SHRQ  $29, x
+	XORQ  x, h
+	IMULQ ·primes+16(SB), h
+	MOVQ  h, x
+	SHRQ  $32, x
+	XORQ  x, h
+
+	MOVQ h, ret+24(FP)
 	RET
 
-// writeBlocks uses the same registers as above except that it uses AX to store
-// the d pointer.
-
 // func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
 	// Load fixed primes needed for round.
-	MOVQ ·prime1v(SB), R13
-	MOVQ ·prime2v(SB), R14
+	MOVQ ·primes+0(SB), prime1
+	MOVQ ·primes+8(SB), prime2
 
 	// Load slice.
-	MOVQ b_base+8(FP), SI
-	MOVQ b_len+16(FP), DX
-	LEAQ (SI)(DX*1), BX
-	SUBQ $32, BX
+	MOVQ b_base+8(FP), p
+	MOVQ b_len+16(FP), n
+	LEAQ (p)(n*1), end
+	SUBQ $32, end
 
 	// Load vN from d.
-	MOVQ d+0(FP), AX
-	MOVQ 0(AX), R8   // v1
-	MOVQ 8(AX), R9   // v2
-	MOVQ 16(AX), R10 // v3
-	MOVQ 24(AX), R11 // v4
+	MOVQ s+0(FP), d
+	MOVQ 0(d), v1
+	MOVQ 8(d), v2
+	MOVQ 16(d), v3
+	MOVQ 24(d), v4
 
 	// We don't need to check the loop condition here; this function is
 	// always called with at least one block of data to process.
-blockLoop:
-	round(R8)
-	round(R9)
-	round(R10)
-	round(R11)
-
-	CMPQ SI, BX
-	JLE  blockLoop
+	blockLoop()
 
 	// Copy vN back to d.
-	MOVQ R8, 0(AX)
-	MOVQ R9, 8(AX)
-	MOVQ R10, 16(AX)
-	MOVQ R11, 24(AX)
-
-	// The number of bytes written is SI minus the old base pointer.
-	SUBQ b_base+8(FP), SI
-	MOVQ SI, ret+32(FP)
+	MOVQ v1, 0(d)
+	MOVQ v2, 8(d)
+	MOVQ v3, 16(d)
+	MOVQ v4, 24(d)
+
+	// The number of bytes written is p minus the old base pointer.
+	SUBQ b_base+8(FP), p
+	MOVQ p, ret+32(FP)
 
 	RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
new file mode 100644
index 00000000000..17901e08040
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
@@ -0,0 +1,184 @@
+//go:build !appengine && gc && !purego && !noasm
+// +build !appengine
+// +build gc
+// +build !purego
+// +build !noasm
+
+#include "textflag.h"
+
+// Registers:
+#define digest	R1
+#define h	R2 // return value
+#define p	R3 // input pointer
+#define n	R4 // input length
+#define nblocks	R5 // n / 32
+#define prime1	R7
+#define prime2	R8
+#define prime3	R9
+#define prime4	R10
+#define prime5	R11
+#define v1	R12
+#define v2	R13
+#define v3	R14
+#define v4	R15
+#define x1	R20
+#define x2	R21
+#define x3	R22
+#define x4	R23
+
+#define round(acc, x) \
+	MADD prime2, acc, x, acc \
+	ROR  $64-31, acc         \
+	MUL  prime1, acc
+
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+	MUL prime2, x \
+	ROR $64-31, x \
+	MUL prime1, x
+
+#define mergeRound(acc, x) \
+	round0(x)                     \
+	EOR  x, acc                   \
+	MADD acc, prime4, prime1, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that n >= 32.
+#define blockLoop() \
+	LSR     $5, n, nblocks  \
+	PCALIGN $16             \
+	loop:                   \
+	LDP.P   16(p), (x1, x2) \
+	LDP.P   16(p), (x3, x4) \
+	round(v1, x1)           \
+	round(v2, x2)           \
+	round(v3, x3)           \
+	round(v4, x4)           \
+	SUB     $1, nblocks     \
+	CBNZ    nblocks, loop
+
+// func Sum64(b []byte) uint64
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
+	LDP b_base+0(FP), (p, n)
+
+	LDP  ·primes+0(SB), (prime1, prime2)
+	LDP  ·primes+16(SB), (prime3, prime4)
+	MOVD ·primes+32(SB), prime5
+
+	CMP  $32, n
+	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
+	BLT  afterLoop
+
+	ADD  prime1, prime2, v1
+	MOVD prime2, v2
+	MOVD $0, v3
+	NEG  prime1, v4
+
+	blockLoop()
+
+	ROR $64-1, v1, x1
+	ROR $64-7, v2, x2
+	ADD x1, x2
+	ROR $64-12, v3, x3
+	ROR $64-18, v4, x4
+	ADD x3, x4
+	ADD x2, x4, h
+
+	mergeRound(h, v1)
+	mergeRound(h, v2)
+	mergeRound(h, v3)
+	mergeRound(h, v4)
+
+afterLoop:
+	ADD n, h
+
+	TBZ   $4, n, try8
+	LDP.P 16(p), (x1, x2)
+
+	round0(x1)
+
+	// NOTE: here and below, sequencing the EOR after the ROR (using a
+	// rotated register) is worth a small but measurable speedup for small
+	// inputs.
+	ROR  $64-27, h
+	EOR  x1 @> 64-27, h, h
+	MADD h, prime4, prime1, h
+
+	round0(x2)
+	ROR  $64-27, h
+	EOR  x2 @> 64-27, h, h
+	MADD h, prime4, prime1, h
+
+try8:
+	TBZ    $3, n, try4
+	MOVD.P 8(p), x1
+
+	round0(x1)
+	ROR  $64-27, h
+	EOR  x1 @> 64-27, h, h
+	MADD h, prime4, prime1, h
+
+try4:
+	TBZ     $2, n, try2
+	MOVWU.P 4(p), x2
+
+	MUL  prime1, x2
+	ROR  $64-23, h
+	EOR  x2 @> 64-23, h, h
+	MADD h, prime3, prime2, h
+
+try2:
+	TBZ     $1, n, try1
+	MOVHU.P 2(p), x3
+	AND     $255, x3, x1
+	LSR     $8, x3, x2
+
+	MUL prime5, x1
+	ROR $64-11, h
+	EOR x1 @> 64-11, h, h
+	MUL prime1, h
+
+	MUL prime5, x2
+	ROR $64-11, h
+	EOR x2 @> 64-11, h, h
+	MUL prime1, h
+
+try1:
+	TBZ   $0, n, finalize
+	MOVBU (p), x4
+
+	MUL prime5, x4
+	ROR $64-11, h
+	EOR x4 @> 64-11, h, h
+	MUL prime1, h
+
+finalize:
+	EOR h >> 33, h
+	MUL prime2, h
+	EOR h >> 29, h
+	MUL prime3, h
+	EOR h >> 32, h
+
+	MOVD h, ret+24(FP)
+	RET
+
+// func writeBlocks(d *Digest, b []byte) int
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
+	LDP ·primes+0(SB), (prime1, prime2)
+
+	// Load state. Assume v[1-4] are stored contiguously.
+	MOVD d+0(FP), digest
+	LDP  0(digest), (v1, v2)
+	LDP  16(digest), (v3, v4)
+
+	LDP b_base+8(FP), (p, n)
+
+	blockLoop()
+
+	// Store updated state.
+	STP (v1, v2), 0(digest)
+	STP (v3, v4), 16(digest)
+
+	BIC  $31, n
+	MOVD n, ret+32(FP)
+	RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
new file mode 100644
index 00000000000..d4221edf4fd
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
@@ -0,0 +1,16 @@
+//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm
+// +build amd64 arm64
+// +build !appengine
+// +build gc
+// +build !purego
+// +build !noasm
+
+package xxhash
+
+// Sum64 computes the 64-bit xxHash digest of b.
+//
+//go:noescape
+func Sum64(b []byte) uint64
+
+//go:noescape
+func writeBlocks(s *Digest, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
index 1f52f296e71..0be16cefc7f 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@@ -1,5 +1,5 @@
-//go:build !amd64 || appengine || !gc || purego
-// +build !amd64 appengine !gc purego
+//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm
+// +build !amd64,!arm64 appengine !gc purego noasm
 
 package xxhash
 
@@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
 	var h uint64
 
 	if n >= 32 {
-		v1 := prime1v + prime2
+		v1 := primes[0] + prime2
 		v2 := prime2
 		v3 := uint64(0)
-		v4 := -prime1v
+		v4 := -primes[0]
 		for len(b) >= 32 {
 			v1 = round(v1, u64(b[0:8:len(b)]))
 			v2 = round(v2, u64(b[8:16:len(b)]))
@@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 {
 
 	h += uint64(n)
 
-	i, end := 0, len(b)
-	for ; i+8 <= end; i += 8 {
-		k1 := round(0, u64(b[i:i+8:len(b)]))
+	for ; len(b) >= 8; b = b[8:] {
+		k1 := round(0, u64(b[:8]))
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 	}
-	if i+4 <= end {
-		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+	if len(b) >= 4 {
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
-	for ; i < end; i++ {
-		h ^= uint64(b[i]) * prime5
+	for ; len(b) > 0; b = b[1:] {
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 	}
 
diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
new file mode 100644
index 00000000000..f41932b7a4f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
@@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+//	len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
new file mode 100644
index 00000000000..9a7655c0f76
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
@@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+	MOVQ a_base+0(FP), AX
+	MOVQ b_base+24(FP), CX
+	MOVQ a_len+8(FP), DX
+
+	// matchLen
+	XORL SI, SI
+	CMPL DX, $0x08
+	JB   matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+	MOVQ  (AX)(SI*1), BX
+	XORQ  (CX)(SI*1), BX
+	TESTQ BX, BX
+	JZ    matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+	TZCNTQ BX, BX
+#else
+	BSFQ BX, BX
+#endif
+	SARQ $0x03, BX
+	LEAL (SI)(BX*1), SI
+	JMP  gen_match_len_end
+
+matchlen_loop_standalone:
+	LEAL -8(DX), DX
+	LEAL 8(SI), SI
+	CMPL DX, $0x08
+	JAE  matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+	CMPL DX, $0x04
+	JB   matchlen_match2_standalone
+	MOVL (AX)(SI*1), BX
+	CMPL (CX)(SI*1), BX
+	JNE  matchlen_match2_standalone
+	LEAL -4(DX), DX
+	LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+	CMPL DX, $0x02
+	JB   matchlen_match1_standalone
+	MOVW (AX)(SI*1), BX
+	CMPW (CX)(SI*1), BX
+	JNE  matchlen_match1_standalone
+	LEAL -2(DX), DX
+	LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+	CMPL DX, $0x01
+	JB   gen_match_len_end
+	MOVB (AX)(SI*1), BL
+	CMPB (CX)(SI*1), BL
+	JNE  gen_match_len_end
+	INCL SI
+
+gen_match_len_end:
+	MOVQ SI, ret+48(FP)
+	RET
diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
new file mode 100644
index 00000000000..57b9c31c027
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
@@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+		if diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
+		}
+		n += 8
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			break
+		}
+		n++
+	}
+	return n
+
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index 1dd39e63b7e..d7fe6d82d93 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -20,6 +20,10 @@ type seq struct {
 	llCode, mlCode, ofCode uint8
 }
 
+type seqVals struct {
+	ll, ml, mo int
+}
+
 func (s seq) String() string {
 	if s.offset <= 3 {
 		if s.offset == 0 {
@@ -61,16 +65,19 @@ type sequenceDecs struct {
 	offsets      sequenceDec
 	matchLengths sequenceDec
 	prevOffset   [3]int
-	hist         []byte
 	dict         []byte
 	literals     []byte
 	out          []byte
+	nSeqs        int
+	br           *bitReader
+	seqSize      int
 	windowSize   int
 	maxBits      uint8
+	maxSyncLen   uint64
 }
 
 // initialize all 3 decoders from the stream input.
-func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []byte) error {
+func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error {
 	if err := s.litLengths.init(br); err != nil {
 		return errors.New("litLengths:" + err.Error())
 	}
@@ -80,8 +87,7 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
 	if err := s.matchLengths.init(br); err != nil {
 		return errors.New("matchLengths:" + err.Error())
 	}
-	s.literals = literals
-	s.hist = hist.b
+	s.br = br
 	s.prevOffset = hist.recentOffsets
 	s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
 	s.windowSize = hist.windowSize
@@ -93,20 +99,153 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
 	return nil
 }
 
+func (s *sequenceDecs) freeDecoders() {
+	if f := s.litLengths.fse; f != nil && !f.preDefined {
+		fseDecoderPool.Put(f)
+		s.litLengths.fse = nil
+	}
+	if f := s.offsets.fse; f != nil && !f.preDefined {
+		fseDecoderPool.Put(f)
+		s.offsets.fse = nil
+	}
+	if f := s.matchLengths.fse; f != nil && !f.preDefined {
+		fseDecoderPool.Put(f)
+		s.matchLengths.fse = nil
+	}
+}
+
+// execute will execute the decoded sequence with the provided history.
+// The sequence must be evaluated before being sent.
+func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
+	if len(s.dict) == 0 {
+		return s.executeSimple(seqs, hist)
+	}
+
+	// Ensure we have enough output size...
+	if len(s.out)+s.seqSize > cap(s.out) {
+		addBytes := s.seqSize + len(s.out)
+		s.out = append(s.out, make([]byte, addBytes)...)
+		s.out = s.out[:len(s.out)-addBytes]
+	}
+
+	if debugDecoder {
+		printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize)
+	}
+
+	var t = len(s.out)
+	out := s.out[:t+s.seqSize]
+
+	for _, seq := range seqs {
+		// Add literals
+		copy(out[t:], s.literals[:seq.ll])
+		t += seq.ll
+		s.literals = s.literals[seq.ll:]
+
+		// Copy from dictionary...
+		if seq.mo > t+len(hist) || seq.mo > s.windowSize {
+			if len(s.dict) == 0 {
+				return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
+			}
+
+			// we may be in dictionary.
+			dictO := len(s.dict) - (seq.mo - (t + len(hist)))
+			if dictO < 0 || dictO >= len(s.dict) {
+				return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict))
+			}
+			end := dictO + seq.ml
+			if end > len(s.dict) {
+				n := len(s.dict) - dictO
+				copy(out[t:], s.dict[dictO:])
+				t += n
+				seq.ml -= n
+			} else {
+				copy(out[t:], s.dict[dictO:end])
+				t += end - dictO
+				continue
+			}
+		}
+
+		// Copy from history.
+		if v := seq.mo - t; v > 0 {
+			// v is the start position in history from end.
+			start := len(hist) - v
+			if seq.ml > v {
+				// Some goes into current block.
+				// Copy remainder of history
+				copy(out[t:], hist[start:])
+				t += v
+				seq.ml -= v
+			} else {
+				copy(out[t:], hist[start:start+seq.ml])
+				t += seq.ml
+				continue
+			}
+		}
+		// We must be in current buffer now
+		if seq.ml > 0 {
+			start := t - seq.mo
+			if seq.ml <= t-start {
+				// No overlap
+				copy(out[t:], out[start:start+seq.ml])
+				t += seq.ml
+				continue
+			} else {
+				// Overlapping copy
+				// Extend destination slice and copy one byte at the time.
+				src := out[start : start+seq.ml]
+				dst := out[t:]
+				dst = dst[:len(src)]
+				t += len(src)
+				// Destination is the space we just added.
+				for i := range src {
+					dst[i] = src[i]
+				}
+			}
+		}
+	}
+
+	// Add final literals
+	copy(out[t:], s.literals)
+	if debugDecoder {
+		t += len(s.literals)
+		if t != len(out) {
+			panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
+		}
+	}
+	s.out = out
+
+	return nil
+}
+
 // decode sequences from the stream with the provided history.
-func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
+func (s *sequenceDecs) decodeSync(hist []byte) error {
+	supported, err := s.decodeSyncSimple(hist)
+	if supported {
+		return err
+	}
+
+	br := s.br
+	seqs := s.nSeqs
 	startSize := len(s.out)
 	// Grab full sizes tables, to avoid bounds checks.
 	llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
 	llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
+	out := s.out
+	maxBlockSize := maxCompressedBlockSize
+	if s.windowSize < maxBlockSize {
+		maxBlockSize = s.windowSize
+	}
 
+	if debugDecoder {
+		println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
+	}
 	for i := seqs - 1; i >= 0; i-- {
 		if br.overread() {
-			printf("reading sequence %d, exceeded available data\n", seqs-i)
+			printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain())
 			return io.ErrUnexpectedEOF
 		}
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
 
@@ -151,7 +290,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 
 					if temp == 0 {
 						// 0 is not valid; input is corrupted; force offset to 1
-						println("temp was 0")
+						println("WARNING: temp was 0")
 						temp = 1
 					}
 
@@ -176,51 +315,49 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 		if ll > len(s.literals) {
 			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
 		}
-		size := ll + ml + len(s.out)
+		size := ll + ml + len(out)
 		if size-startSize > maxBlockSize {
-			return fmt.Errorf("output (%d) bigger than max block size", size)
+			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		}
-		if size > cap(s.out) {
+		if size > cap(out) {
 			// Not enough size, which can happen under high volume block streaming conditions
 			// but could be if destination slice is too small for sync operations.
 			// over-allocating here can create a large amount of GC pressure so we try to keep
 			// it as contained as possible
-			used := len(s.out) - startSize
+			used := len(out) - startSize
 			addBytes := 256 + ll + ml + used>>2
 			// Clamp to max block size.
 			if used+addBytes > maxBlockSize {
 				addBytes = maxBlockSize - used
 			}
-			s.out = append(s.out, make([]byte, addBytes)...)
-			s.out = s.out[:len(s.out)-addBytes]
+			out = append(out, make([]byte, addBytes)...)
+			out = out[:len(out)-addBytes]
 		}
 		if ml > maxMatchLen {
 			return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
 		}
 
 		// Add literals
-		s.out = append(s.out, s.literals[:ll]...)
+		out = append(out, s.literals[:ll]...)
 		s.literals = s.literals[ll:]
-		out := s.out
 
 		if mo == 0 && ml > 0 {
 			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
 		}
 
-		if mo > len(s.out)+len(hist) || mo > s.windowSize {
+		if mo > len(out)+len(hist) || mo > s.windowSize {
 			if len(s.dict) == 0 {
-				return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+				return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
 			}
 
 			// we may be in dictionary.
-			dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
+			dictO := len(s.dict) - (mo - (len(out) + len(hist)))
 			if dictO < 0 || dictO >= len(s.dict) {
-				return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+				return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
 			}
 			end := dictO + ml
 			if end > len(s.dict) {
 				out = append(out, s.dict[dictO:]...)
-				mo -= len(s.dict) - dictO
 				ml -= len(s.dict) - dictO
 			} else {
 				out = append(out, s.dict[dictO:end]...)
@@ -231,26 +368,25 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 
 		// Copy from history.
 		// TODO: Blocks without history could be made to ignore this completely.
-		if v := mo - len(s.out); v > 0 {
+		if v := mo - len(out); v > 0 {
 			// v is the start position in history from end.
-			start := len(s.hist) - v
+			start := len(hist) - v
 			if ml > v {
 				// Some goes into current block.
 				// Copy remainder of history
-				out = append(out, s.hist[start:]...)
-				mo -= v
+				out = append(out, hist[start:]...)
 				ml -= v
 			} else {
-				out = append(out, s.hist[start:start+ml]...)
+				out = append(out, hist[start:start+ml]...)
 				ml = 0
 			}
 		}
 		// We must be in current buffer now
 		if ml > 0 {
-			start := len(s.out) - mo
-			if ml <= len(s.out)-start {
+			start := len(out) - mo
+			if ml <= len(out)-start {
 				// No overlap
-				out = append(out, s.out[start:start+ml]...)
+				out = append(out, out[start:start+ml]...)
 			} else {
 				// Overlapping copy
 				// Extend destination slice and copy one byte at the time.
@@ -264,7 +400,6 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 				}
 			}
 		}
-		s.out = out
 		if i == 0 {
 			// This is the last sequence, so we shouldn't update state.
 			break
@@ -278,7 +413,8 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 			mlState = mlTable[mlState.newState()&maxTableMask]
 			ofState = ofTable[ofState.newState()&maxTableMask]
 		} else {
-			bits := br.getBitsFast(nBits)
+			bits := br.get32BitsFast(nBits)
+
 			lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
 			llState = llTable[(llState.newState()+lowBits)&maxTableMask]
 
@@ -291,19 +427,13 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
 		}
 	}
 
-	// Add final literals
-	s.out = append(s.out, s.literals...)
-	return nil
-}
+	if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
+		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+	}
 
-// update states, at least 27 bits must be available.
-func (s *sequenceDecs) update(br *bitReader) {
-	// Max 8 bits
-	s.litLengths.state.next(br)
-	// Max 9 bits
-	s.matchLengths.state.next(br)
-	// Max 8 bits
-	s.offsets.state.next(br)
+	// Add final literals
+	s.out = append(out, s.literals...)
+	return br.close()
 }
 
 var bitMask [16]uint16
@@ -314,107 +444,21 @@ func init() {
 	}
 }
 
-// update states, at least 27 bits must be available.
-func (s *sequenceDecs) updateAlt(br *bitReader) {
-	// Update all 3 states at once. Approx 20% faster.
-	a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
-
-	nBits := a.nbBits() + b.nbBits() + c.nbBits()
-	if nBits == 0 {
-		s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
-		s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
-		s.offsets.state.state = s.offsets.state.dt[c.newState()]
-		return
-	}
-	bits := br.getBitsFast(nBits)
-	lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
-	s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
-
-	lowBits = uint16(bits >> (c.nbBits() & 31))
-	lowBits &= bitMask[b.nbBits()&15]
-	s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
-
-	lowBits = uint16(bits) & bitMask[c.nbBits()&15]
-	s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
-}
-
-// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
-func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
+func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
 	// Final will not read from stream.
 	ll, llB := llState.final()
 	ml, mlB := mlState.final()
 	mo, moB := ofState.final()
 
 	// extra bits are stored in reverse order.
-	br.fillFast()
+	br.fill()
 	mo += br.getBits(moB)
 	if s.maxBits > 32 {
-		br.fillFast()
+		br.fill()
 	}
+	// matchlength+literal length, max 32 bits
 	ml += br.getBits(mlB)
 	ll += br.getBits(llB)
-
-	if moB > 1 {
-		s.prevOffset[2] = s.prevOffset[1]
-		s.prevOffset[1] = s.prevOffset[0]
-		s.prevOffset[0] = mo
-		return
-	}
-	// mo = s.adjustOffset(mo, ll, moB)
-	// Inlined for rather big speedup
-	if ll == 0 {
-		// There is an exception though, when current sequence's literals_length = 0.
-		// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
-		// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
-		mo++
-	}
-
-	if mo == 0 {
-		mo = s.prevOffset[0]
-		return
-	}
-	var temp int
-	if mo == 3 {
-		temp = s.prevOffset[0] - 1
-	} else {
-		temp = s.prevOffset[mo]
-	}
-
-	if temp == 0 {
-		// 0 is not valid; input is corrupted; force offset to 1
-		println("temp was 0")
-		temp = 1
-	}
-
-	if mo != 1 {
-		s.prevOffset[2] = s.prevOffset[1]
-	}
-	s.prevOffset[1] = s.prevOffset[0]
-	s.prevOffset[0] = temp
-	mo = temp
-	return
-}
-
-func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
-	// Final will not read from stream.
-	ll, llB := llState.final()
-	ml, mlB := mlState.final()
-	mo, moB := ofState.final()
-
-	// extra bits are stored in reverse order.
-	br.fill()
-	if s.maxBits <= 32 {
-		mo += br.getBits(moB)
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-	} else {
-		mo += br.getBits(moB)
-		br.fill()
-		// matchlength+literal length, max 32 bits
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-
-	}
 	mo = s.adjustOffset(mo, ll, moB)
 	return
 }
@@ -457,36 +501,3 @@ func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
 	s.prevOffset[0] = temp
 	return temp
 }
-
-// mergeHistory will merge history.
-func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) {
-	for i := uint(0); i < 3; i++ {
-		var sNew, sHist *sequenceDec
-		switch i {
-		default:
-			// same as "case 0":
-			sNew = &s.litLengths
-			sHist = &hist.litLengths
-		case 1:
-			sNew = &s.offsets
-			sHist = &hist.offsets
-		case 2:
-			sNew = &s.matchLengths
-			sHist = &hist.matchLengths
-		}
-		if sNew.repeat {
-			if sHist.fse == nil {
-				return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i)
-			}
-			continue
-		}
-		if sNew.fse == nil {
-			return nil, fmt.Errorf("sequence stream %d, no fse found", i)
-		}
-		if sHist.fse != nil && !sHist.fse.preDefined {
-			fseDecoderPool.Put(sHist.fse)
-		}
-		sHist.fse = sNew.fse
-	}
-	return hist, nil
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
new file mode 100644
index 00000000000..8adabd82877
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -0,0 +1,394 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+package zstd
+
+import (
+	"fmt"
+	"io"
+
+	"github.com/klauspost/compress/internal/cpuinfo"
+)
+
+type decodeSyncAsmContext struct {
+	llTable     []decSymbol
+	mlTable     []decSymbol
+	ofTable     []decSymbol
+	llState     uint64
+	mlState     uint64
+	ofState     uint64
+	iteration   int
+	litRemain   int
+	out         []byte
+	outPosition int
+	literals    []byte
+	litPosition int
+	history     []byte
+	windowSize  int
+	ll          int // set on error (not for all errors, please refer to _generate/gen.go)
+	ml          int // set on error (not for all errors, please refer to _generate/gen.go)
+	mo          int // set on error (not for all errors, please refer to _generate/gen.go)
+}
+
+// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//
+//go:noescape
+func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
+//
+//go:noescape
+func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
+//
+//go:noescape
+func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
+//
+//go:noescape
+func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// decode sequences from the stream with the provided history but without a dictionary.
+func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
+	if len(s.dict) > 0 {
+		return false, nil
+	}
+	if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
+		return false, nil
+	}
+
+	// FIXME: Using unsafe memory copies leads to rare, random crashes
+	// with fuzz testing. It is therefore disabled for now.
+	const useSafe = true
+	/*
+		useSafe := false
+		if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
+			useSafe = true
+		}
+		if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
+			useSafe = true
+		}
+		if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
+			useSafe = true
+		}
+	*/
+
+	br := s.br
+
+	maxBlockSize := maxCompressedBlockSize
+	if s.windowSize < maxBlockSize {
+		maxBlockSize = s.windowSize
+	}
+
+	ctx := decodeSyncAsmContext{
+		llTable:     s.litLengths.fse.dt[:maxTablesize],
+		mlTable:     s.matchLengths.fse.dt[:maxTablesize],
+		ofTable:     s.offsets.fse.dt[:maxTablesize],
+		llState:     uint64(s.litLengths.state.state),
+		mlState:     uint64(s.matchLengths.state.state),
+		ofState:     uint64(s.offsets.state.state),
+		iteration:   s.nSeqs - 1,
+		litRemain:   len(s.literals),
+		out:         s.out,
+		outPosition: len(s.out),
+		literals:    s.literals,
+		windowSize:  s.windowSize,
+		history:     hist,
+	}
+
+	s.seqSize = 0
+	startSize := len(s.out)
+
+	var errCode int
+	if cpuinfo.HasBMI2() {
+		if useSafe {
+			errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
+		} else {
+			errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
+		}
+	} else {
+		if useSafe {
+			errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
+		} else {
+			errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
+		}
+	}
+	switch errCode {
+	case noError:
+		break
+
+	case errorMatchLenOfsMismatch:
+		return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
+
+	case errorMatchLenTooBig:
+		return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
+
+	case errorMatchOffTooBig:
+		return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
+			ctx.mo, ctx.outPosition+len(hist)-startSize)
+
+	case errorNotEnoughLiterals:
+		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
+			ctx.ll, ctx.litRemain+ctx.ll)
+
+	case errorOverread:
+		return true, io.ErrUnexpectedEOF
+
+	case errorNotEnoughSpace:
+		size := ctx.outPosition + ctx.ll + ctx.ml
+		if debugDecoder {
+			println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
+		}
+		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+
+	default:
+		return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
+	}
+
+	s.seqSize += ctx.litRemain
+	if s.seqSize > maxBlockSize {
+		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+	}
+	err := br.close()
+	if err != nil {
+		printf("Closing sequences: %v, %+v\n", err, *br)
+		return true, err
+	}
+
+	s.literals = s.literals[ctx.litPosition:]
+	t := ctx.outPosition
+	s.out = s.out[:t]
+
+	// Add final literals
+	s.out = append(s.out, s.literals...)
+	if debugDecoder {
+		t += len(s.literals)
+		if t != len(s.out) {
+			panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
+		}
+	}
+
+	return true, nil
+}
+
+// --------------------------------------------------------------------------------
+
+type decodeAsmContext struct {
+	llTable   []decSymbol
+	mlTable   []decSymbol
+	ofTable   []decSymbol
+	llState   uint64
+	mlState   uint64
+	ofState   uint64
+	iteration int
+	seqs      []seqVals
+	litRemain int
+}
+
+const noError = 0
+
+// error reported when mo == 0 && ml > 0
+const errorMatchLenOfsMismatch = 1
+
+// error reported when ml > maxMatchLen
+const errorMatchLenTooBig = 2
+
+// error reported when mo > available history or mo > s.windowSize
+const errorMatchOffTooBig = 3
+
+// error reported when the sum of literal lengths exeeceds the literal buffer size
+const errorNotEnoughLiterals = 4
+
+// error reported when capacity of `out` is too small
+const errorNotEnoughSpace = 5
+
+// error reported when bits are overread.
+const errorOverread = 6
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//
+//go:noescape
+func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//
+//go:noescape
+func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//
+//go:noescape
+func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//
+//go:noescape
+func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// decode sequences from the stream without the provided history.
+func (s *sequenceDecs) decode(seqs []seqVals) error {
+	br := s.br
+
+	maxBlockSize := maxCompressedBlockSize
+	if s.windowSize < maxBlockSize {
+		maxBlockSize = s.windowSize
+	}
+
+	ctx := decodeAsmContext{
+		llTable:   s.litLengths.fse.dt[:maxTablesize],
+		mlTable:   s.matchLengths.fse.dt[:maxTablesize],
+		ofTable:   s.offsets.fse.dt[:maxTablesize],
+		llState:   uint64(s.litLengths.state.state),
+		mlState:   uint64(s.matchLengths.state.state),
+		ofState:   uint64(s.offsets.state.state),
+		seqs:      seqs,
+		iteration: len(seqs) - 1,
+		litRemain: len(s.literals),
+	}
+
+	if debugDecoder {
+		println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
+	}
+
+	s.seqSize = 0
+	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
+	var errCode int
+	if cpuinfo.HasBMI2() {
+		if lte56bits {
+			errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
+		} else {
+			errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
+		}
+	} else {
+		if lte56bits {
+			errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
+		} else {
+			errCode = sequenceDecs_decode_amd64(s, br, &ctx)
+		}
+	}
+	if errCode != 0 {
+		i := len(seqs) - ctx.iteration - 1
+		switch errCode {
+		case errorMatchLenOfsMismatch:
+			ml := ctx.seqs[i].ml
+			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+
+		case errorMatchLenTooBig:
+			ml := ctx.seqs[i].ml
+			return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
+
+		case errorNotEnoughLiterals:
+			ll := ctx.seqs[i].ll
+			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
+		case errorOverread:
+			return io.ErrUnexpectedEOF
+		}
+
+		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
+	}
+
+	if ctx.litRemain < 0 {
+		return fmt.Errorf("literal count is too big: total available %d, total requested %d",
+			len(s.literals), len(s.literals)-ctx.litRemain)
+	}
+
+	s.seqSize += ctx.litRemain
+	if s.seqSize > maxBlockSize {
+		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+	}
+	if debugDecoder {
+		println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
+	}
+	err := br.close()
+	if err != nil {
+		printf("Closing sequences: %v, %+v\n", err, *br)
+	}
+	return err
+}
+
+// --------------------------------------------------------------------------------
+
+type executeAsmContext struct {
+	seqs        []seqVals
+	seqIndex    int
+	out         []byte
+	history     []byte
+	literals    []byte
+	outPosition int
+	litPosition int
+	windowSize  int
+}
+
+// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
+//
+// Returns false if a match offset is too big.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//
+//go:noescape
+func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
+
+// Same as above, but with safe memcopies
+//
+//go:noescape
+func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
+
+// executeSimple handles cases when dictionary is not used.
+func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
+	// Ensure we have enough output size...
+	if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
+		addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
+		s.out = append(s.out, make([]byte, addBytes)...)
+		s.out = s.out[:len(s.out)-addBytes]
+	}
+
+	if debugDecoder {
+		printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
+	}
+
+	var t = len(s.out)
+	out := s.out[:t+s.seqSize]
+
+	ctx := executeAsmContext{
+		seqs:        seqs,
+		seqIndex:    0,
+		out:         out,
+		history:     hist,
+		outPosition: t,
+		litPosition: 0,
+		literals:    s.literals,
+		windowSize:  s.windowSize,
+	}
+	var ok bool
+	if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
+		ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
+	} else {
+		ok = sequenceDecs_executeSimple_amd64(&ctx)
+	}
+	if !ok {
+		return fmt.Errorf("match offset (%d) bigger than current history (%d)",
+			seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
+	}
+	s.literals = s.literals[ctx.litPosition:]
+	t = ctx.outPosition
+
+	// Add final literals
+	copy(out[t:], s.literals)
+	if debugDecoder {
+		t += len(s.literals)
+		if t != len(out) {
+			panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
+		}
+	}
+	s.out = out
+
+	return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
new file mode 100644
index 00000000000..5b06174b898
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -0,0 +1,4151 @@
+// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: CMOV
+TEXT ·sequenceDecs_decode_amd64(SB), $8-32
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
+	ADDQ    SI, AX
+	MOVQ    AX, (SP)
+	MOVQ    ctx+16(FP), AX
+	MOVQ    72(AX), DI
+	MOVQ    80(AX), R8
+	MOVQ    88(AX), R9
+	MOVQ    104(AX), R10
+	MOVQ    s+0(FP), AX
+	MOVQ    144(AX), R11
+	MOVQ    152(AX), R12
+	MOVQ    160(AX), R13
+
+sequenceDecs_decode_amd64_main_loop:
+	MOVQ (SP), R14
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decode_amd64_fill_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R14
+	MOVQ (R14), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decode_amd64_fill_end
+
+sequenceDecs_decode_amd64_fill_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decode_amd64_fill_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decode_amd64_fill_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R14
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R14), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decode_amd64_fill_byte_by_byte
+
+sequenceDecs_decode_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decode_amd64_fill_end:
+	// Update offset
+	MOVQ  R9, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R15
+	SHLQ  CL, R15
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decode_amd64_of_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decode_amd64_of_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decode_amd64_of_update_zero
+	NEGQ  CX
+	SHRQ  CL, R15
+	ADDQ  R15, AX
+
+sequenceDecs_decode_amd64_of_update_zero:
+	MOVQ AX, 16(R10)
+
+	// Update match length
+	MOVQ  R8, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R15
+	SHLQ  CL, R15
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decode_amd64_ml_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decode_amd64_ml_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decode_amd64_ml_update_zero
+	NEGQ  CX
+	SHRQ  CL, R15
+	ADDQ  R15, AX
+
+sequenceDecs_decode_amd64_ml_update_zero:
+	MOVQ AX, 8(R10)
+
+	// Fill bitreader to have enough for the remaining
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decode_amd64_fill_2_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R14
+	MOVQ (R14), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decode_amd64_fill_2_end
+
+sequenceDecs_decode_amd64_fill_2_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decode_amd64_fill_2_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decode_amd64_fill_2_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R14
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R14), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decode_amd64_fill_2_byte_by_byte
+
+sequenceDecs_decode_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decode_amd64_fill_2_end:
+	// Update literal length
+	MOVQ  DI, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R15
+	SHLQ  CL, R15
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decode_amd64_ll_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decode_amd64_ll_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decode_amd64_ll_update_zero
+	NEGQ  CX
+	SHRQ  CL, R15
+	ADDQ  R15, AX
+
+sequenceDecs_decode_amd64_ll_update_zero:
+	MOVQ AX, (R10)
+
+	// Fill bitreader for state updates
+	MOVQ    R14, (SP)
+	MOVQ    R9, AX
+	SHRQ    $0x08, AX
+	MOVBQZX AL, AX
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decode_amd64_skip_update
+
+	// Update Literal Length State
+	MOVBQZX DI, R14
+	SHRL    $0x10, DI
+	LEAQ    (BX)(R14*1), CX
+	MOVQ    DX, R15
+	MOVQ    CX, BX
+	ROLQ    CL, R15
+	MOVL    $0x00000001, BP
+	MOVB    R14, CL
+	SHLL    CL, BP
+	DECL    BP
+	ANDQ    BP, R15
+	ADDQ    R15, DI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Match Length State
+	MOVBQZX R8, R14
+	SHRL    $0x10, R8
+	LEAQ    (BX)(R14*1), CX
+	MOVQ    DX, R15
+	MOVQ    CX, BX
+	ROLQ    CL, R15
+	MOVL    $0x00000001, BP
+	MOVB    R14, CL
+	SHLL    CL, BP
+	DECL    BP
+	ANDQ    BP, R15
+	ADDQ    R15, R8
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Offset State
+	MOVBQZX R9, R14
+	SHRL    $0x10, R9
+	LEAQ    (BX)(R14*1), CX
+	MOVQ    DX, R15
+	MOVQ    CX, BX
+	ROLQ    CL, R15
+	MOVL    $0x00000001, BP
+	MOVB    R14, CL
+	SHLL    CL, BP
+	DECL    BP
+	ANDQ    BP, R15
+	ADDQ    R15, R9
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decode_amd64_skip_update:
+	// Adjust offset
+	MOVQ 16(R10), CX
+	CMPQ AX, $0x01
+	JBE  sequenceDecs_decode_amd64_adjust_offsetB_1_or_0
+	MOVQ R12, R13
+	MOVQ R11, R12
+	MOVQ CX, R11
+	JMP  sequenceDecs_decode_amd64_after_adjust
+
+sequenceDecs_decode_amd64_adjust_offsetB_1_or_0:
+	CMPQ (R10), $0x00000000
+	JNE  sequenceDecs_decode_amd64_adjust_offset_maybezero
+	INCQ CX
+	JMP  sequenceDecs_decode_amd64_adjust_offset_nonzero
+
+sequenceDecs_decode_amd64_adjust_offset_maybezero:
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_amd64_adjust_offset_nonzero
+	MOVQ  R11, CX
+	JMP   sequenceDecs_decode_amd64_after_adjust
+
+sequenceDecs_decode_amd64_adjust_offset_nonzero:
+	CMPQ CX, $0x01
+	JB   sequenceDecs_decode_amd64_adjust_zero
+	JEQ  sequenceDecs_decode_amd64_adjust_one
+	CMPQ CX, $0x02
+	JA   sequenceDecs_decode_amd64_adjust_three
+	JMP  sequenceDecs_decode_amd64_adjust_two
+
+sequenceDecs_decode_amd64_adjust_zero:
+	MOVQ R11, AX
+	JMP  sequenceDecs_decode_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_amd64_adjust_one:
+	MOVQ R12, AX
+	JMP  sequenceDecs_decode_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_amd64_adjust_two:
+	MOVQ R13, AX
+	JMP  sequenceDecs_decode_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_amd64_adjust_three:
+	LEAQ -1(R11), AX
+
+sequenceDecs_decode_amd64_adjust_test_temp_valid:
+	TESTQ AX, AX
+	JNZ   sequenceDecs_decode_amd64_adjust_temp_valid
+	MOVQ  $0x00000001, AX
+
+sequenceDecs_decode_amd64_adjust_temp_valid:
+	CMPQ    CX, $0x01
+	CMOVQNE R12, R13
+	MOVQ    R11, R12
+	MOVQ    AX, R11
+	MOVQ    AX, CX
+
+sequenceDecs_decode_amd64_after_adjust:
+	MOVQ CX, 16(R10)
+
+	// Check values
+	MOVQ  8(R10), AX
+	MOVQ  (R10), R14
+	LEAQ  (AX)(R14*1), R15
+	MOVQ  s+0(FP), BP
+	ADDQ  R15, 256(BP)
+	MOVQ  ctx+16(FP), R15
+	SUBQ  R14, 128(R15)
+	JS    error_not_enough_literals
+	CMPQ  AX, $0x00020002
+	JA    sequenceDecs_decode_amd64_error_match_len_too_big
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_amd64_match_len_ofs_ok
+	TESTQ AX, AX
+	JNZ   sequenceDecs_decode_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_amd64_match_len_ofs_ok:
+	ADDQ $0x18, R10
+	MOVQ ctx+16(FP), AX
+	DECQ 96(AX)
+	JNS  sequenceDecs_decode_amd64_main_loop
+	MOVQ s+0(FP), AX
+	MOVQ R11, 144(AX)
+	MOVQ R12, 152(AX)
+	MOVQ R13, 160(AX)
+	MOVQ br+8(FP), AX
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decode_amd64_error_match_len_ofs_mismatch:
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decode_amd64_error_match_len_too_big:
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: CMOV
+TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
+	ADDQ    SI, AX
+	MOVQ    AX, (SP)
+	MOVQ    ctx+16(FP), AX
+	MOVQ    72(AX), DI
+	MOVQ    80(AX), R8
+	MOVQ    88(AX), R9
+	MOVQ    104(AX), R10
+	MOVQ    s+0(FP), AX
+	MOVQ    144(AX), R11
+	MOVQ    152(AX), R12
+	MOVQ    160(AX), R13
+
+sequenceDecs_decode_56_amd64_main_loop:
+	MOVQ (SP), R14
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decode_56_amd64_fill_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R14
+	MOVQ (R14), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decode_56_amd64_fill_end
+
+sequenceDecs_decode_56_amd64_fill_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decode_56_amd64_fill_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decode_56_amd64_fill_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R14
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R14), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decode_56_amd64_fill_byte_by_byte
+
+sequenceDecs_decode_56_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decode_56_amd64_fill_end:
+	// Update offset
+	MOVQ  R9, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R15
+	SHLQ  CL, R15
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decode_56_amd64_of_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decode_56_amd64_of_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decode_56_amd64_of_update_zero
+	NEGQ  CX
+	SHRQ  CL, R15
+	ADDQ  R15, AX
+
+sequenceDecs_decode_56_amd64_of_update_zero:
+	MOVQ AX, 16(R10)
+
+	// Update match length
+	MOVQ  R8, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R15
+	SHLQ  CL, R15
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decode_56_amd64_ml_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decode_56_amd64_ml_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decode_56_amd64_ml_update_zero
+	NEGQ  CX
+	SHRQ  CL, R15
+	ADDQ  R15, AX
+
+sequenceDecs_decode_56_amd64_ml_update_zero:
+	MOVQ AX, 8(R10)
+
+	// Update literal length
+	MOVQ  DI, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R15
+	SHLQ  CL, R15
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decode_56_amd64_ll_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decode_56_amd64_ll_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decode_56_amd64_ll_update_zero
+	NEGQ  CX
+	SHRQ  CL, R15
+	ADDQ  R15, AX
+
+sequenceDecs_decode_56_amd64_ll_update_zero:
+	MOVQ AX, (R10)
+
+	// Fill bitreader for state updates
+	MOVQ    R14, (SP)
+	MOVQ    R9, AX
+	SHRQ    $0x08, AX
+	MOVBQZX AL, AX
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decode_56_amd64_skip_update
+
+	// Update Literal Length State
+	MOVBQZX DI, R14
+	SHRL    $0x10, DI
+	LEAQ    (BX)(R14*1), CX
+	MOVQ    DX, R15
+	MOVQ    CX, BX
+	ROLQ    CL, R15
+	MOVL    $0x00000001, BP
+	MOVB    R14, CL
+	SHLL    CL, BP
+	DECL    BP
+	ANDQ    BP, R15
+	ADDQ    R15, DI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Match Length State
+	MOVBQZX R8, R14
+	SHRL    $0x10, R8
+	LEAQ    (BX)(R14*1), CX
+	MOVQ    DX, R15
+	MOVQ    CX, BX
+	ROLQ    CL, R15
+	MOVL    $0x00000001, BP
+	MOVB    R14, CL
+	SHLL    CL, BP
+	DECL    BP
+	ANDQ    BP, R15
+	ADDQ    R15, R8
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Offset State
+	MOVBQZX R9, R14
+	SHRL    $0x10, R9
+	LEAQ    (BX)(R14*1), CX
+	MOVQ    DX, R15
+	MOVQ    CX, BX
+	ROLQ    CL, R15
+	MOVL    $0x00000001, BP
+	MOVB    R14, CL
+	SHLL    CL, BP
+	DECL    BP
+	ANDQ    BP, R15
+	ADDQ    R15, R9
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decode_56_amd64_skip_update:
+	// Adjust offset
+	MOVQ 16(R10), CX
+	CMPQ AX, $0x01
+	JBE  sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0
+	MOVQ R12, R13
+	MOVQ R11, R12
+	MOVQ CX, R11
+	JMP  sequenceDecs_decode_56_amd64_after_adjust
+
+sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0:
+	CMPQ (R10), $0x00000000
+	JNE  sequenceDecs_decode_56_amd64_adjust_offset_maybezero
+	INCQ CX
+	JMP  sequenceDecs_decode_56_amd64_adjust_offset_nonzero
+
+sequenceDecs_decode_56_amd64_adjust_offset_maybezero:
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_56_amd64_adjust_offset_nonzero
+	MOVQ  R11, CX
+	JMP   sequenceDecs_decode_56_amd64_after_adjust
+
+sequenceDecs_decode_56_amd64_adjust_offset_nonzero:
+	CMPQ CX, $0x01
+	JB   sequenceDecs_decode_56_amd64_adjust_zero
+	JEQ  sequenceDecs_decode_56_amd64_adjust_one
+	CMPQ CX, $0x02
+	JA   sequenceDecs_decode_56_amd64_adjust_three
+	JMP  sequenceDecs_decode_56_amd64_adjust_two
+
+sequenceDecs_decode_56_amd64_adjust_zero:
+	MOVQ R11, AX
+	JMP  sequenceDecs_decode_56_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_56_amd64_adjust_one:
+	MOVQ R12, AX
+	JMP  sequenceDecs_decode_56_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_56_amd64_adjust_two:
+	MOVQ R13, AX
+	JMP  sequenceDecs_decode_56_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_56_amd64_adjust_three:
+	LEAQ -1(R11), AX
+
+sequenceDecs_decode_56_amd64_adjust_test_temp_valid:
+	TESTQ AX, AX
+	JNZ   sequenceDecs_decode_56_amd64_adjust_temp_valid
+	MOVQ  $0x00000001, AX
+
+sequenceDecs_decode_56_amd64_adjust_temp_valid:
+	CMPQ    CX, $0x01
+	CMOVQNE R12, R13
+	MOVQ    R11, R12
+	MOVQ    AX, R11
+	MOVQ    AX, CX
+
+sequenceDecs_decode_56_amd64_after_adjust:
+	MOVQ CX, 16(R10)
+
+	// Check values
+	MOVQ  8(R10), AX
+	MOVQ  (R10), R14
+	LEAQ  (AX)(R14*1), R15
+	MOVQ  s+0(FP), BP
+	ADDQ  R15, 256(BP)
+	MOVQ  ctx+16(FP), R15
+	SUBQ  R14, 128(R15)
+	JS    error_not_enough_literals
+	CMPQ  AX, $0x00020002
+	JA    sequenceDecs_decode_56_amd64_error_match_len_too_big
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_56_amd64_match_len_ofs_ok
+	TESTQ AX, AX
+	JNZ   sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_56_amd64_match_len_ofs_ok:
+	ADDQ $0x18, R10
+	MOVQ ctx+16(FP), AX
+	DECQ 96(AX)
+	JNS  sequenceDecs_decode_56_amd64_main_loop
+	MOVQ s+0(FP), AX
+	MOVQ R11, 144(AX)
+	MOVQ R12, 152(AX)
+	MOVQ R13, 160(AX)
+	MOVQ br+8(FP), AX
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch:
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decode_56_amd64_error_match_len_too_big:
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: BMI, BMI2, CMOV
+TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
+	ADDQ    BX, CX
+	MOVQ    CX, (SP)
+	MOVQ    ctx+16(FP), CX
+	MOVQ    72(CX), SI
+	MOVQ    80(CX), DI
+	MOVQ    88(CX), R8
+	MOVQ    104(CX), R9
+	MOVQ    s+0(FP), CX
+	MOVQ    144(CX), R10
+	MOVQ    152(CX), R11
+	MOVQ    160(CX), R12
+
+sequenceDecs_decode_bmi2_main_loop:
+	MOVQ (SP), R13
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decode_bmi2_fill_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R13
+	MOVQ (R13), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decode_bmi2_fill_end
+
+sequenceDecs_decode_bmi2_fill_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decode_bmi2_fill_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decode_bmi2_fill_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R13), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decode_bmi2_fill_byte_by_byte
+
+sequenceDecs_decode_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decode_bmi2_fill_end:
+	// Update offset
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, R8, R14
+	MOVQ   AX, R15
+	LEAQ   (DX)(R14*1), CX
+	ROLQ   CL, R15
+	BZHIQ  R14, R15, R15
+	MOVQ   CX, DX
+	MOVQ   R8, CX
+	SHRQ   $0x20, CX
+	ADDQ   R15, CX
+	MOVQ   CX, 16(R9)
+
+	// Update match length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, DI, R14
+	MOVQ   AX, R15
+	LEAQ   (DX)(R14*1), CX
+	ROLQ   CL, R15
+	BZHIQ  R14, R15, R15
+	MOVQ   CX, DX
+	MOVQ   DI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R15, CX
+	MOVQ   CX, 8(R9)
+
+	// Fill bitreader to have enough for the remaining
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decode_bmi2_fill_2_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R13
+	MOVQ (R13), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decode_bmi2_fill_2_end
+
+sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decode_bmi2_fill_2_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decode_bmi2_fill_2_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R13), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decode_bmi2_fill_2_byte_by_byte
+
+sequenceDecs_decode_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decode_bmi2_fill_2_end:
+	// Update literal length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, SI, R14
+	MOVQ   AX, R15
+	LEAQ   (DX)(R14*1), CX
+	ROLQ   CL, R15
+	BZHIQ  R14, R15, R15
+	MOVQ   CX, DX
+	MOVQ   SI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R15, CX
+	MOVQ   CX, (R9)
+
+	// Fill bitreader for state updates
+	MOVQ    R13, (SP)
+	MOVQ    $0x00000808, CX
+	BEXTRQ  CX, R8, R13
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decode_bmi2_skip_update
+	LEAQ    (SI)(DI*1), R14
+	ADDQ    R8, R14
+	MOVBQZX R14, R14
+	LEAQ    (DX)(R14*1), CX
+	MOVQ    AX, R15
+	MOVQ    CX, DX
+	ROLQ    CL, R15
+	BZHIQ   R14, R15, R15
+
+	// Update Offset State
+	BZHIQ R8, R15, CX
+	SHRXQ R8, R15, R15
+	SHRL  $0x10, R8
+	ADDQ  CX, R8
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Match Length State
+	BZHIQ DI, R15, CX
+	SHRXQ DI, R15, R15
+	SHRL  $0x10, DI
+	ADDQ  CX, DI
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Literal Length State
+	BZHIQ SI, R15, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(SI*8), SI
+
+sequenceDecs_decode_bmi2_skip_update:
+	// Adjust offset
+	MOVQ 16(R9), CX
+	CMPQ R13, $0x01
+	JBE  sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0
+	MOVQ R11, R12
+	MOVQ R10, R11
+	MOVQ CX, R10
+	JMP  sequenceDecs_decode_bmi2_after_adjust
+
+sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0:
+	CMPQ (R9), $0x00000000
+	JNE  sequenceDecs_decode_bmi2_adjust_offset_maybezero
+	INCQ CX
+	JMP  sequenceDecs_decode_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decode_bmi2_adjust_offset_maybezero:
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_bmi2_adjust_offset_nonzero
+	MOVQ  R10, CX
+	JMP   sequenceDecs_decode_bmi2_after_adjust
+
+sequenceDecs_decode_bmi2_adjust_offset_nonzero:
+	CMPQ CX, $0x01
+	JB   sequenceDecs_decode_bmi2_adjust_zero
+	JEQ  sequenceDecs_decode_bmi2_adjust_one
+	CMPQ CX, $0x02
+	JA   sequenceDecs_decode_bmi2_adjust_three
+	JMP  sequenceDecs_decode_bmi2_adjust_two
+
+sequenceDecs_decode_bmi2_adjust_zero:
+	MOVQ R10, R13
+	JMP  sequenceDecs_decode_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_bmi2_adjust_one:
+	MOVQ R11, R13
+	JMP  sequenceDecs_decode_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_bmi2_adjust_two:
+	MOVQ R12, R13
+	JMP  sequenceDecs_decode_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_bmi2_adjust_three:
+	LEAQ -1(R10), R13
+
+sequenceDecs_decode_bmi2_adjust_test_temp_valid:
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decode_bmi2_adjust_temp_valid
+	MOVQ  $0x00000001, R13
+
+sequenceDecs_decode_bmi2_adjust_temp_valid:
+	CMPQ    CX, $0x01
+	CMOVQNE R11, R12
+	MOVQ    R10, R11
+	MOVQ    R13, R10
+	MOVQ    R13, CX
+
+sequenceDecs_decode_bmi2_after_adjust:
+	MOVQ CX, 16(R9)
+
+	// Check values
+	MOVQ  8(R9), R13
+	MOVQ  (R9), R14
+	LEAQ  (R13)(R14*1), R15
+	MOVQ  s+0(FP), BP
+	ADDQ  R15, 256(BP)
+	MOVQ  ctx+16(FP), R15
+	SUBQ  R14, 128(R15)
+	JS    error_not_enough_literals
+	CMPQ  R13, $0x00020002
+	JA    sequenceDecs_decode_bmi2_error_match_len_too_big
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_bmi2_match_len_ofs_ok
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_bmi2_match_len_ofs_ok:
+	ADDQ $0x18, R9
+	MOVQ ctx+16(FP), CX
+	DECQ 96(CX)
+	JNS  sequenceDecs_decode_bmi2_main_loop
+	MOVQ s+0(FP), CX
+	MOVQ R10, 144(CX)
+	MOVQ R11, 152(CX)
+	MOVQ R12, 160(CX)
+	MOVQ br+8(FP), CX
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch:
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decode_bmi2_error_match_len_too_big:
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: BMI, BMI2, CMOV
+TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
+	ADDQ    BX, CX
+	MOVQ    CX, (SP)
+	MOVQ    ctx+16(FP), CX
+	MOVQ    72(CX), SI
+	MOVQ    80(CX), DI
+	MOVQ    88(CX), R8
+	MOVQ    104(CX), R9
+	MOVQ    s+0(FP), CX
+	MOVQ    144(CX), R10
+	MOVQ    152(CX), R11
+	MOVQ    160(CX), R12
+
+sequenceDecs_decode_56_bmi2_main_loop:
+	MOVQ (SP), R13
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decode_56_bmi2_fill_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R13
+	MOVQ (R13), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decode_56_bmi2_fill_end
+
+sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decode_56_bmi2_fill_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decode_56_bmi2_fill_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R13), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decode_56_bmi2_fill_byte_by_byte
+
+sequenceDecs_decode_56_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decode_56_bmi2_fill_end:
+	// Update offset
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, R8, R14
+	MOVQ   AX, R15
+	LEAQ   (DX)(R14*1), CX
+	ROLQ   CL, R15
+	BZHIQ  R14, R15, R15
+	MOVQ   CX, DX
+	MOVQ   R8, CX
+	SHRQ   $0x20, CX
+	ADDQ   R15, CX
+	MOVQ   CX, 16(R9)
+
+	// Update match length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, DI, R14
+	MOVQ   AX, R15
+	LEAQ   (DX)(R14*1), CX
+	ROLQ   CL, R15
+	BZHIQ  R14, R15, R15
+	MOVQ   CX, DX
+	MOVQ   DI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R15, CX
+	MOVQ   CX, 8(R9)
+
+	// Update literal length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, SI, R14
+	MOVQ   AX, R15
+	LEAQ   (DX)(R14*1), CX
+	ROLQ   CL, R15
+	BZHIQ  R14, R15, R15
+	MOVQ   CX, DX
+	MOVQ   SI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R15, CX
+	MOVQ   CX, (R9)
+
+	// Fill bitreader for state updates
+	MOVQ    R13, (SP)
+	MOVQ    $0x00000808, CX
+	BEXTRQ  CX, R8, R13
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decode_56_bmi2_skip_update
+	LEAQ    (SI)(DI*1), R14
+	ADDQ    R8, R14
+	MOVBQZX R14, R14
+	LEAQ    (DX)(R14*1), CX
+	MOVQ    AX, R15
+	MOVQ    CX, DX
+	ROLQ    CL, R15
+	BZHIQ   R14, R15, R15
+
+	// Update Offset State
+	BZHIQ R8, R15, CX
+	SHRXQ R8, R15, R15
+	SHRL  $0x10, R8
+	ADDQ  CX, R8
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Match Length State
+	BZHIQ DI, R15, CX
+	SHRXQ DI, R15, R15
+	SHRL  $0x10, DI
+	ADDQ  CX, DI
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Literal Length State
+	BZHIQ SI, R15, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(SI*8), SI
+
+sequenceDecs_decode_56_bmi2_skip_update:
+	// Adjust offset
+	MOVQ 16(R9), CX
+	CMPQ R13, $0x01
+	JBE  sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0
+	MOVQ R11, R12
+	MOVQ R10, R11
+	MOVQ CX, R10
+	JMP  sequenceDecs_decode_56_bmi2_after_adjust
+
+sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0:
+	CMPQ (R9), $0x00000000
+	JNE  sequenceDecs_decode_56_bmi2_adjust_offset_maybezero
+	INCQ CX
+	JMP  sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decode_56_bmi2_adjust_offset_maybezero:
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
+	MOVQ  R10, CX
+	JMP   sequenceDecs_decode_56_bmi2_after_adjust
+
+sequenceDecs_decode_56_bmi2_adjust_offset_nonzero:
+	CMPQ CX, $0x01
+	JB   sequenceDecs_decode_56_bmi2_adjust_zero
+	JEQ  sequenceDecs_decode_56_bmi2_adjust_one
+	CMPQ CX, $0x02
+	JA   sequenceDecs_decode_56_bmi2_adjust_three
+	JMP  sequenceDecs_decode_56_bmi2_adjust_two
+
+sequenceDecs_decode_56_bmi2_adjust_zero:
+	MOVQ R10, R13
+	JMP  sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_56_bmi2_adjust_one:
+	MOVQ R11, R13
+	JMP  sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_56_bmi2_adjust_two:
+	MOVQ R12, R13
+	JMP  sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_56_bmi2_adjust_three:
+	LEAQ -1(R10), R13
+
+sequenceDecs_decode_56_bmi2_adjust_test_temp_valid:
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decode_56_bmi2_adjust_temp_valid
+	MOVQ  $0x00000001, R13
+
+sequenceDecs_decode_56_bmi2_adjust_temp_valid:
+	CMPQ    CX, $0x01
+	CMOVQNE R11, R12
+	MOVQ    R10, R11
+	MOVQ    R13, R10
+	MOVQ    R13, CX
+
+sequenceDecs_decode_56_bmi2_after_adjust:
+	MOVQ CX, 16(R9)
+
+	// Check values
+	MOVQ  8(R9), R13
+	MOVQ  (R9), R14
+	LEAQ  (R13)(R14*1), R15
+	MOVQ  s+0(FP), BP
+	ADDQ  R15, 256(BP)
+	MOVQ  ctx+16(FP), R15
+	SUBQ  R14, 128(R15)
+	JS    error_not_enough_literals
+	CMPQ  R13, $0x00020002
+	JA    sequenceDecs_decode_56_bmi2_error_match_len_too_big
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decode_56_bmi2_match_len_ofs_ok
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
+	ADDQ $0x18, R9
+	MOVQ ctx+16(FP), CX
+	DECQ 96(CX)
+	JNS  sequenceDecs_decode_56_bmi2_main_loop
+	MOVQ s+0(FP), CX
+	MOVQ R10, 144(CX)
+	MOVQ R11, 152(CX)
+	MOVQ R12, 160(CX)
+	MOVQ br+8(FP), CX
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch:
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decode_56_bmi2_error_match_len_too_big:
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
+// Requires: SSE
+TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
+	MOVQ  ctx+0(FP), R10
+	MOVQ  8(R10), CX
+	TESTQ CX, CX
+	JZ    empty_seqs
+	MOVQ  (R10), AX
+	MOVQ  24(R10), DX
+	MOVQ  32(R10), BX
+	MOVQ  80(R10), SI
+	MOVQ  104(R10), DI
+	MOVQ  120(R10), R8
+	MOVQ  56(R10), R9
+	MOVQ  64(R10), R10
+	ADDQ  R10, R9
+
+	// seqsBase += 24 * seqIndex
+	LEAQ (DX)(DX*2), R11
+	SHLQ $0x03, R11
+	ADDQ R11, AX
+
+	// outBase += outPosition
+	ADDQ DI, BX
+
+main_loop:
+	MOVQ (AX), R11
+	MOVQ 16(AX), R12
+	MOVQ 8(AX), R13
+
+	// Copy literals
+	TESTQ R11, R11
+	JZ    check_offset
+	XORQ  R14, R14
+
+copy_1:
+	MOVUPS (SI)(R14*1), X0
+	MOVUPS X0, (BX)(R14*1)
+	ADDQ   $0x10, R14
+	CMPQ   R14, R11
+	JB     copy_1
+	ADDQ   R11, SI
+	ADDQ   R11, BX
+	ADDQ   R11, DI
+
+	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+	LEAQ (DI)(R10*1), R11
+	CMPQ R12, R11
+	JG   error_match_off_too_big
+	CMPQ R12, R8
+	JG   error_match_off_too_big
+
+	// Copy match from history
+	MOVQ R12, R11
+	SUBQ DI, R11
+	JLS  copy_match
+	MOVQ R9, R14
+	SUBQ R11, R14
+	CMPQ R13, R11
+	JG   copy_all_from_history
+	MOVQ R13, R11
+	SUBQ $0x10, R11
+	JB   copy_4_small
+
+copy_4_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (BX)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, BX
+	SUBQ   $0x10, R11
+	JAE    copy_4_loop
+	LEAQ   16(R14)(R11*1), R14
+	LEAQ   16(BX)(R11*1), BX
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(BX)
+	JMP    copy_4_end
+
+copy_4_small:
+	CMPQ R13, $0x03
+	JE   copy_4_move_3
+	CMPQ R13, $0x08
+	JB   copy_4_move_4through7
+	JMP  copy_4_move_8through16
+
+copy_4_move_3:
+	MOVW (R14), R11
+	MOVB 2(R14), R12
+	MOVW R11, (BX)
+	MOVB R12, 2(BX)
+	ADDQ R13, R14
+	ADDQ R13, BX
+	JMP  copy_4_end
+
+copy_4_move_4through7:
+	MOVL (R14), R11
+	MOVL -4(R14)(R13*1), R12
+	MOVL R11, (BX)
+	MOVL R12, -4(BX)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, BX
+	JMP  copy_4_end
+
+copy_4_move_8through16:
+	MOVQ (R14), R11
+	MOVQ -8(R14)(R13*1), R12
+	MOVQ R11, (BX)
+	MOVQ R12, -8(BX)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, BX
+
+copy_4_end:
+	ADDQ R13, DI
+	ADDQ $0x18, AX
+	INCQ DX
+	CMPQ DX, CX
+	JB   main_loop
+	JMP  loop_finished
+
+copy_all_from_history:
+	MOVQ R11, R15
+	SUBQ $0x10, R15
+	JB   copy_5_small
+
+copy_5_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (BX)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, BX
+	SUBQ   $0x10, R15
+	JAE    copy_5_loop
+	LEAQ   16(R14)(R15*1), R14
+	LEAQ   16(BX)(R15*1), BX
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(BX)
+	JMP    copy_5_end
+
+copy_5_small:
+	CMPQ R11, $0x03
+	JE   copy_5_move_3
+	JB   copy_5_move_1or2
+	CMPQ R11, $0x08
+	JB   copy_5_move_4through7
+	JMP  copy_5_move_8through16
+
+copy_5_move_1or2:
+	MOVB (R14), R15
+	MOVB -1(R14)(R11*1), BP
+	MOVB R15, (BX)
+	MOVB BP, -1(BX)(R11*1)
+	ADDQ R11, R14
+	ADDQ R11, BX
+	JMP  copy_5_end
+
+copy_5_move_3:
+	MOVW (R14), R15
+	MOVB 2(R14), BP
+	MOVW R15, (BX)
+	MOVB BP, 2(BX)
+	ADDQ R11, R14
+	ADDQ R11, BX
+	JMP  copy_5_end
+
+copy_5_move_4through7:
+	MOVL (R14), R15
+	MOVL -4(R14)(R11*1), BP
+	MOVL R15, (BX)
+	MOVL BP, -4(BX)(R11*1)
+	ADDQ R11, R14
+	ADDQ R11, BX
+	JMP  copy_5_end
+
+copy_5_move_8through16:
+	MOVQ (R14), R15
+	MOVQ -8(R14)(R11*1), BP
+	MOVQ R15, (BX)
+	MOVQ BP, -8(BX)(R11*1)
+	ADDQ R11, R14
+	ADDQ R11, BX
+
+copy_5_end:
+	ADDQ R11, DI
+	SUBQ R11, R13
+
+	// Copy match from the current buffer
+copy_match:
+	MOVQ BX, R11
+	SUBQ R12, R11
+
+	// ml <= mo
+	CMPQ R13, R12
+	JA   copy_overlapping_match
+
+	// Copy non-overlapping match
+	ADDQ R13, DI
+	MOVQ BX, R12
+	ADDQ R13, BX
+
+copy_2:
+	MOVUPS (R11), X0
+	MOVUPS X0, (R12)
+	ADDQ   $0x10, R11
+	ADDQ   $0x10, R12
+	SUBQ   $0x10, R13
+	JHI    copy_2
+	JMP    handle_loop
+
+	// Copy overlapping match
+copy_overlapping_match:
+	ADDQ R13, DI
+
+copy_slow_3:
+	MOVB (R11), R12
+	MOVB R12, (BX)
+	INCQ R11
+	INCQ BX
+	DECQ R13
+	JNZ  copy_slow_3
+
+handle_loop:
+	ADDQ $0x18, AX
+	INCQ DX
+	CMPQ DX, CX
+	JB   main_loop
+
+loop_finished:
+	// Return value
+	MOVB $0x01, ret+8(FP)
+
+	// Update the context
+	MOVQ ctx+0(FP), AX
+	MOVQ DX, 24(AX)
+	MOVQ DI, 104(AX)
+	SUBQ 80(AX), SI
+	MOVQ SI, 112(AX)
+	RET
+
+error_match_off_too_big:
+	// Return value
+	MOVB $0x00, ret+8(FP)
+
+	// Update the context
+	MOVQ ctx+0(FP), AX
+	MOVQ DX, 24(AX)
+	MOVQ DI, 104(AX)
+	SUBQ 80(AX), SI
+	MOVQ SI, 112(AX)
+	RET
+
+empty_seqs:
+	// Return value
+	MOVB $0x01, ret+8(FP)
+	RET
+
+// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
+// Requires: SSE
+TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9
+	MOVQ  ctx+0(FP), R10
+	MOVQ  8(R10), CX
+	TESTQ CX, CX
+	JZ    empty_seqs
+	MOVQ  (R10), AX
+	MOVQ  24(R10), DX
+	MOVQ  32(R10), BX
+	MOVQ  80(R10), SI
+	MOVQ  104(R10), DI
+	MOVQ  120(R10), R8
+	MOVQ  56(R10), R9
+	MOVQ  64(R10), R10
+	ADDQ  R10, R9
+
+	// seqsBase += 24 * seqIndex
+	LEAQ (DX)(DX*2), R11
+	SHLQ $0x03, R11
+	ADDQ R11, AX
+
+	// outBase += outPosition
+	ADDQ DI, BX
+
+main_loop:
+	MOVQ (AX), R11
+	MOVQ 16(AX), R12
+	MOVQ 8(AX), R13
+
+	// Copy literals
+	TESTQ R11, R11
+	JZ    check_offset
+	MOVQ  R11, R14
+	SUBQ  $0x10, R14
+	JB    copy_1_small
+
+copy_1_loop:
+	MOVUPS (SI), X0
+	MOVUPS X0, (BX)
+	ADDQ   $0x10, SI
+	ADDQ   $0x10, BX
+	SUBQ   $0x10, R14
+	JAE    copy_1_loop
+	LEAQ   16(SI)(R14*1), SI
+	LEAQ   16(BX)(R14*1), BX
+	MOVUPS -16(SI), X0
+	MOVUPS X0, -16(BX)
+	JMP    copy_1_end
+
+copy_1_small:
+	CMPQ R11, $0x03
+	JE   copy_1_move_3
+	JB   copy_1_move_1or2
+	CMPQ R11, $0x08
+	JB   copy_1_move_4through7
+	JMP  copy_1_move_8through16
+
+copy_1_move_1or2:
+	MOVB (SI), R14
+	MOVB -1(SI)(R11*1), R15
+	MOVB R14, (BX)
+	MOVB R15, -1(BX)(R11*1)
+	ADDQ R11, SI
+	ADDQ R11, BX
+	JMP  copy_1_end
+
+copy_1_move_3:
+	MOVW (SI), R14
+	MOVB 2(SI), R15
+	MOVW R14, (BX)
+	MOVB R15, 2(BX)
+	ADDQ R11, SI
+	ADDQ R11, BX
+	JMP  copy_1_end
+
+copy_1_move_4through7:
+	MOVL (SI), R14
+	MOVL -4(SI)(R11*1), R15
+	MOVL R14, (BX)
+	MOVL R15, -4(BX)(R11*1)
+	ADDQ R11, SI
+	ADDQ R11, BX
+	JMP  copy_1_end
+
+copy_1_move_8through16:
+	MOVQ (SI), R14
+	MOVQ -8(SI)(R11*1), R15
+	MOVQ R14, (BX)
+	MOVQ R15, -8(BX)(R11*1)
+	ADDQ R11, SI
+	ADDQ R11, BX
+
+copy_1_end:
+	ADDQ R11, DI
+
+	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+	LEAQ (DI)(R10*1), R11
+	CMPQ R12, R11
+	JG   error_match_off_too_big
+	CMPQ R12, R8
+	JG   error_match_off_too_big
+
+	// Copy match from history
+	MOVQ R12, R11
+	SUBQ DI, R11
+	JLS  copy_match
+	MOVQ R9, R14
+	SUBQ R11, R14
+	CMPQ R13, R11
+	JG   copy_all_from_history
+	MOVQ R13, R11
+	SUBQ $0x10, R11
+	JB   copy_4_small
+
+copy_4_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (BX)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, BX
+	SUBQ   $0x10, R11
+	JAE    copy_4_loop
+	LEAQ   16(R14)(R11*1), R14
+	LEAQ   16(BX)(R11*1), BX
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(BX)
+	JMP    copy_4_end
+
+copy_4_small:
+	CMPQ R13, $0x03
+	JE   copy_4_move_3
+	CMPQ R13, $0x08
+	JB   copy_4_move_4through7
+	JMP  copy_4_move_8through16
+
+copy_4_move_3:
+	MOVW (R14), R11
+	MOVB 2(R14), R12
+	MOVW R11, (BX)
+	MOVB R12, 2(BX)
+	ADDQ R13, R14
+	ADDQ R13, BX
+	JMP  copy_4_end
+
+copy_4_move_4through7:
+	MOVL (R14), R11
+	MOVL -4(R14)(R13*1), R12
+	MOVL R11, (BX)
+	MOVL R12, -4(BX)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, BX
+	JMP  copy_4_end
+
+copy_4_move_8through16:
+	MOVQ (R14), R11
+	MOVQ -8(R14)(R13*1), R12
+	MOVQ R11, (BX)
+	MOVQ R12, -8(BX)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, BX
+
+copy_4_end:
+	ADDQ R13, DI
+	ADDQ $0x18, AX
+	INCQ DX
+	CMPQ DX, CX
+	JB   main_loop
+	JMP  loop_finished
+
+copy_all_from_history:
+	MOVQ R11, R15
+	SUBQ $0x10, R15
+	JB   copy_5_small
+
+copy_5_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (BX)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, BX
+	SUBQ   $0x10, R15
+	JAE    copy_5_loop
+	LEAQ   16(R14)(R15*1), R14
+	LEAQ   16(BX)(R15*1), BX
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(BX)
+	JMP    copy_5_end
+
+copy_5_small:
+	CMPQ R11, $0x03
+	JE   copy_5_move_3
+	JB   copy_5_move_1or2
+	CMPQ R11, $0x08
+	JB   copy_5_move_4through7
+	JMP  copy_5_move_8through16
+
+copy_5_move_1or2:
+	MOVB (R14), R15
+	MOVB -1(R14)(R11*1), BP
+	MOVB R15, (BX)
+	MOVB BP, -1(BX)(R11*1)
+	ADDQ R11, R14
+	ADDQ R11, BX
+	JMP  copy_5_end
+
+copy_5_move_3:
+	MOVW (R14), R15
+	MOVB 2(R14), BP
+	MOVW R15, (BX)
+	MOVB BP, 2(BX)
+	ADDQ R11, R14
+	ADDQ R11, BX
+	JMP  copy_5_end
+
+copy_5_move_4through7:
+	MOVL (R14), R15
+	MOVL -4(R14)(R11*1), BP
+	MOVL R15, (BX)
+	MOVL BP, -4(BX)(R11*1)
+	ADDQ R11, R14
+	ADDQ R11, BX
+	JMP  copy_5_end
+
+copy_5_move_8through16:
+	MOVQ (R14), R15
+	MOVQ -8(R14)(R11*1), BP
+	MOVQ R15, (BX)
+	MOVQ BP, -8(BX)(R11*1)
+	ADDQ R11, R14
+	ADDQ R11, BX
+
+copy_5_end:
+	ADDQ R11, DI
+	SUBQ R11, R13
+
+	// Copy match from the current buffer
+copy_match:
+	MOVQ BX, R11
+	SUBQ R12, R11
+
+	// ml <= mo
+	CMPQ R13, R12
+	JA   copy_overlapping_match
+
+	// Copy non-overlapping match
+	ADDQ R13, DI
+	MOVQ R13, R12
+	SUBQ $0x10, R12
+	JB   copy_2_small
+
+copy_2_loop:
+	MOVUPS (R11), X0
+	MOVUPS X0, (BX)
+	ADDQ   $0x10, R11
+	ADDQ   $0x10, BX
+	SUBQ   $0x10, R12
+	JAE    copy_2_loop
+	LEAQ   16(R11)(R12*1), R11
+	LEAQ   16(BX)(R12*1), BX
+	MOVUPS -16(R11), X0
+	MOVUPS X0, -16(BX)
+	JMP    copy_2_end
+
+copy_2_small:
+	CMPQ R13, $0x03
+	JE   copy_2_move_3
+	JB   copy_2_move_1or2
+	CMPQ R13, $0x08
+	JB   copy_2_move_4through7
+	JMP  copy_2_move_8through16
+
+copy_2_move_1or2:
+	MOVB (R11), R12
+	MOVB -1(R11)(R13*1), R14
+	MOVB R12, (BX)
+	MOVB R14, -1(BX)(R13*1)
+	ADDQ R13, R11
+	ADDQ R13, BX
+	JMP  copy_2_end
+
+copy_2_move_3:
+	MOVW (R11), R12
+	MOVB 2(R11), R14
+	MOVW R12, (BX)
+	MOVB R14, 2(BX)
+	ADDQ R13, R11
+	ADDQ R13, BX
+	JMP  copy_2_end
+
+copy_2_move_4through7:
+	MOVL (R11), R12
+	MOVL -4(R11)(R13*1), R14
+	MOVL R12, (BX)
+	MOVL R14, -4(BX)(R13*1)
+	ADDQ R13, R11
+	ADDQ R13, BX
+	JMP  copy_2_end
+
+copy_2_move_8through16:
+	MOVQ (R11), R12
+	MOVQ -8(R11)(R13*1), R14
+	MOVQ R12, (BX)
+	MOVQ R14, -8(BX)(R13*1)
+	ADDQ R13, R11
+	ADDQ R13, BX
+
+copy_2_end:
+	JMP handle_loop
+
+	// Copy overlapping match
+copy_overlapping_match:
+	ADDQ R13, DI
+
+copy_slow_3:
+	MOVB (R11), R12
+	MOVB R12, (BX)
+	INCQ R11
+	INCQ BX
+	DECQ R13
+	JNZ  copy_slow_3
+
+handle_loop:
+	ADDQ $0x18, AX
+	INCQ DX
+	CMPQ DX, CX
+	JB   main_loop
+
+loop_finished:
+	// Return value
+	MOVB $0x01, ret+8(FP)
+
+	// Update the context
+	MOVQ ctx+0(FP), AX
+	MOVQ DX, 24(AX)
+	MOVQ DI, 104(AX)
+	SUBQ 80(AX), SI
+	MOVQ SI, 112(AX)
+	RET
+
+error_match_off_too_big:
+	// Return value
+	MOVB $0x00, ret+8(FP)
+
+	// Update the context
+	MOVQ ctx+0(FP), AX
+	MOVQ DX, 24(AX)
+	MOVQ DI, 104(AX)
+	SUBQ 80(AX), SI
+	MOVQ SI, 112(AX)
+	RET
+
+empty_seqs:
+	// Return value
+	MOVB $0x01, ret+8(FP)
+	RET
+
+// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
+	ADDQ    SI, AX
+	MOVQ    AX, (SP)
+	MOVQ    ctx+16(FP), AX
+	MOVQ    72(AX), DI
+	MOVQ    80(AX), R8
+	MOVQ    88(AX), R9
+	XORQ    CX, CX
+	MOVQ    CX, 8(SP)
+	MOVQ    CX, 16(SP)
+	MOVQ    CX, 24(SP)
+	MOVQ    112(AX), R10
+	MOVQ    128(AX), CX
+	MOVQ    CX, 32(SP)
+	MOVQ    144(AX), R11
+	MOVQ    136(AX), R12
+	MOVQ    200(AX), CX
+	MOVQ    CX, 56(SP)
+	MOVQ    176(AX), CX
+	MOVQ    CX, 48(SP)
+	MOVQ    184(AX), AX
+	MOVQ    AX, 40(SP)
+	MOVQ    40(SP), AX
+	ADDQ    AX, 48(SP)
+
+	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	ADDQ R10, 32(SP)
+
+	// outBase += outPosition
+	ADDQ R12, R10
+
+sequenceDecs_decodeSync_amd64_main_loop:
+	MOVQ (SP), R13
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decodeSync_amd64_fill_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R13
+	MOVQ (R13), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decodeSync_amd64_fill_end
+
+sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decodeSync_amd64_fill_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decodeSync_amd64_fill_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R13), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decodeSync_amd64_fill_byte_by_byte
+
+sequenceDecs_decodeSync_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_amd64_fill_end:
+	// Update offset
+	MOVQ  R9, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R14
+	SHLQ  CL, R14
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decodeSync_amd64_of_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decodeSync_amd64_of_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decodeSync_amd64_of_update_zero
+	NEGQ  CX
+	SHRQ  CL, R14
+	ADDQ  R14, AX
+
+sequenceDecs_decodeSync_amd64_of_update_zero:
+	MOVQ AX, 8(SP)
+
+	// Update match length
+	MOVQ  R8, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R14
+	SHLQ  CL, R14
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decodeSync_amd64_ml_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decodeSync_amd64_ml_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decodeSync_amd64_ml_update_zero
+	NEGQ  CX
+	SHRQ  CL, R14
+	ADDQ  R14, AX
+
+sequenceDecs_decodeSync_amd64_ml_update_zero:
+	MOVQ AX, 16(SP)
+
+	// Fill bitreader to have enough for the remaining
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R13
+	MOVQ (R13), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decodeSync_amd64_fill_2_end
+
+sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decodeSync_amd64_fill_2_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R13), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_amd64_fill_2_end:
+	// Update literal length
+	MOVQ  DI, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R14
+	SHLQ  CL, R14
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decodeSync_amd64_ll_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decodeSync_amd64_ll_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decodeSync_amd64_ll_update_zero
+	NEGQ  CX
+	SHRQ  CL, R14
+	ADDQ  R14, AX
+
+sequenceDecs_decodeSync_amd64_ll_update_zero:
+	MOVQ AX, 24(SP)
+
+	// Fill bitreader for state updates
+	MOVQ    R13, (SP)
+	MOVQ    R9, AX
+	SHRQ    $0x08, AX
+	MOVBQZX AL, AX
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decodeSync_amd64_skip_update
+
+	// Update Literal Length State
+	MOVBQZX DI, R13
+	SHRL    $0x10, DI
+	LEAQ    (BX)(R13*1), CX
+	MOVQ    DX, R14
+	MOVQ    CX, BX
+	ROLQ    CL, R14
+	MOVL    $0x00000001, R15
+	MOVB    R13, CL
+	SHLL    CL, R15
+	DECL    R15
+	ANDQ    R15, R14
+	ADDQ    R14, DI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Match Length State
+	MOVBQZX R8, R13
+	SHRL    $0x10, R8
+	LEAQ    (BX)(R13*1), CX
+	MOVQ    DX, R14
+	MOVQ    CX, BX
+	ROLQ    CL, R14
+	MOVL    $0x00000001, R15
+	MOVB    R13, CL
+	SHLL    CL, R15
+	DECL    R15
+	ANDQ    R15, R14
+	ADDQ    R14, R8
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Offset State
+	MOVBQZX R9, R13
+	SHRL    $0x10, R9
+	LEAQ    (BX)(R13*1), CX
+	MOVQ    DX, R14
+	MOVQ    CX, BX
+	ROLQ    CL, R14
+	MOVL    $0x00000001, R15
+	MOVB    R13, CL
+	SHLL    CL, R15
+	DECL    R15
+	ANDQ    R15, R14
+	ADDQ    R14, R9
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decodeSync_amd64_skip_update:
+	// Adjust offset
+	MOVQ   s+0(FP), CX
+	MOVQ   8(SP), R13
+	CMPQ   AX, $0x01
+	JBE    sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0
+	MOVUPS 144(CX), X0
+	MOVQ   R13, 144(CX)
+	MOVUPS X0, 152(CX)
+	JMP    sequenceDecs_decodeSync_amd64_after_adjust
+
+sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0:
+	CMPQ 24(SP), $0x00000000
+	JNE  sequenceDecs_decodeSync_amd64_adjust_offset_maybezero
+	INCQ R13
+	JMP  sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_amd64_adjust_offset_maybezero:
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
+	MOVQ  144(CX), R13
+	JMP   sequenceDecs_decodeSync_amd64_after_adjust
+
+sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
+	MOVQ    R13, AX
+	XORQ    R14, R14
+	MOVQ    $-1, R15
+	CMPQ    R13, $0x03
+	CMOVQEQ R14, AX
+	CMOVQEQ R15, R14
+	ADDQ    144(CX)(AX*8), R14
+	JNZ     sequenceDecs_decodeSync_amd64_adjust_temp_valid
+	MOVQ    $0x00000001, R14
+
+sequenceDecs_decodeSync_amd64_adjust_temp_valid:
+	CMPQ R13, $0x01
+	JZ   sequenceDecs_decodeSync_amd64_adjust_skip
+	MOVQ 152(CX), AX
+	MOVQ AX, 160(CX)
+
+sequenceDecs_decodeSync_amd64_adjust_skip:
+	MOVQ 144(CX), AX
+	MOVQ AX, 152(CX)
+	MOVQ R14, 144(CX)
+	MOVQ R14, R13
+
+sequenceDecs_decodeSync_amd64_after_adjust:
+	MOVQ R13, 8(SP)
+
+	// Check values
+	MOVQ  16(SP), AX
+	MOVQ  24(SP), CX
+	LEAQ  (AX)(CX*1), R14
+	MOVQ  s+0(FP), R15
+	ADDQ  R14, 256(R15)
+	MOVQ  ctx+16(FP), R14
+	SUBQ  CX, 104(R14)
+	JS    error_not_enough_literals
+	CMPQ  AX, $0x00020002
+	JA    sequenceDecs_decodeSync_amd64_error_match_len_too_big
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_amd64_match_len_ofs_ok
+	TESTQ AX, AX
+	JNZ   sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_amd64_match_len_ofs_ok:
+	MOVQ 24(SP), AX
+	MOVQ 8(SP), CX
+	MOVQ 16(SP), R13
+
+	// Check if we have enough space in s.out
+	LEAQ (AX)(R13*1), R14
+	ADDQ R10, R14
+	CMPQ R14, 32(SP)
+	JA   error_not_enough_space
+
+	// Copy literals
+	TESTQ AX, AX
+	JZ    check_offset
+	XORQ  R14, R14
+
+copy_1:
+	MOVUPS (R11)(R14*1), X0
+	MOVUPS X0, (R10)(R14*1)
+	ADDQ   $0x10, R14
+	CMPQ   R14, AX
+	JB     copy_1
+	ADDQ   AX, R11
+	ADDQ   AX, R10
+	ADDQ   AX, R12
+
+	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+	MOVQ R12, AX
+	ADDQ 40(SP), AX
+	CMPQ CX, AX
+	JG   error_match_off_too_big
+	CMPQ CX, 56(SP)
+	JG   error_match_off_too_big
+
+	// Copy match from history
+	MOVQ CX, AX
+	SUBQ R12, AX
+	JLS  copy_match
+	MOVQ 48(SP), R14
+	SUBQ AX, R14
+	CMPQ R13, AX
+	JG   copy_all_from_history
+	MOVQ R13, AX
+	SUBQ $0x10, AX
+	JB   copy_4_small
+
+copy_4_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R10)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R10
+	SUBQ   $0x10, AX
+	JAE    copy_4_loop
+	LEAQ   16(R14)(AX*1), R14
+	LEAQ   16(R10)(AX*1), R10
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R10)
+	JMP    copy_4_end
+
+copy_4_small:
+	CMPQ R13, $0x03
+	JE   copy_4_move_3
+	CMPQ R13, $0x08
+	JB   copy_4_move_4through7
+	JMP  copy_4_move_8through16
+
+copy_4_move_3:
+	MOVW (R14), AX
+	MOVB 2(R14), CL
+	MOVW AX, (R10)
+	MOVB CL, 2(R10)
+	ADDQ R13, R14
+	ADDQ R13, R10
+	JMP  copy_4_end
+
+copy_4_move_4through7:
+	MOVL (R14), AX
+	MOVL -4(R14)(R13*1), CX
+	MOVL AX, (R10)
+	MOVL CX, -4(R10)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R10
+	JMP  copy_4_end
+
+copy_4_move_8through16:
+	MOVQ (R14), AX
+	MOVQ -8(R14)(R13*1), CX
+	MOVQ AX, (R10)
+	MOVQ CX, -8(R10)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R10
+
+copy_4_end:
+	ADDQ R13, R12
+	JMP  handle_loop
+	JMP loop_finished
+
+copy_all_from_history:
+	MOVQ AX, R15
+	SUBQ $0x10, R15
+	JB   copy_5_small
+
+copy_5_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R10)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R10
+	SUBQ   $0x10, R15
+	JAE    copy_5_loop
+	LEAQ   16(R14)(R15*1), R14
+	LEAQ   16(R10)(R15*1), R10
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R10)
+	JMP    copy_5_end
+
+copy_5_small:
+	CMPQ AX, $0x03
+	JE   copy_5_move_3
+	JB   copy_5_move_1or2
+	CMPQ AX, $0x08
+	JB   copy_5_move_4through7
+	JMP  copy_5_move_8through16
+
+copy_5_move_1or2:
+	MOVB (R14), R15
+	MOVB -1(R14)(AX*1), BP
+	MOVB R15, (R10)
+	MOVB BP, -1(R10)(AX*1)
+	ADDQ AX, R14
+	ADDQ AX, R10
+	JMP  copy_5_end
+
+copy_5_move_3:
+	MOVW (R14), R15
+	MOVB 2(R14), BP
+	MOVW R15, (R10)
+	MOVB BP, 2(R10)
+	ADDQ AX, R14
+	ADDQ AX, R10
+	JMP  copy_5_end
+
+copy_5_move_4through7:
+	MOVL (R14), R15
+	MOVL -4(R14)(AX*1), BP
+	MOVL R15, (R10)
+	MOVL BP, -4(R10)(AX*1)
+	ADDQ AX, R14
+	ADDQ AX, R10
+	JMP  copy_5_end
+
+copy_5_move_8through16:
+	MOVQ (R14), R15
+	MOVQ -8(R14)(AX*1), BP
+	MOVQ R15, (R10)
+	MOVQ BP, -8(R10)(AX*1)
+	ADDQ AX, R14
+	ADDQ AX, R10
+
+copy_5_end:
+	ADDQ AX, R12
+	SUBQ AX, R13
+
+	// Copy match from the current buffer
+copy_match:
+	MOVQ R10, AX
+	SUBQ CX, AX
+
+	// ml <= mo
+	CMPQ R13, CX
+	JA   copy_overlapping_match
+
+	// Copy non-overlapping match
+	ADDQ R13, R12
+	MOVQ R10, CX
+	ADDQ R13, R10
+
+copy_2:
+	MOVUPS (AX), X0
+	MOVUPS X0, (CX)
+	ADDQ   $0x10, AX
+	ADDQ   $0x10, CX
+	SUBQ   $0x10, R13
+	JHI    copy_2
+	JMP    handle_loop
+
+	// Copy overlapping match
+copy_overlapping_match:
+	ADDQ R13, R12
+
+copy_slow_3:
+	MOVB (AX), CL
+	MOVB CL, (R10)
+	INCQ AX
+	INCQ R10
+	DECQ R13
+	JNZ  copy_slow_3
+
+handle_loop:
+	MOVQ ctx+16(FP), AX
+	DECQ 96(AX)
+	JNS  sequenceDecs_decodeSync_amd64_main_loop
+
+loop_finished:
+	MOVQ br+8(FP), AX
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
+
+	// Update the context
+	MOVQ ctx+16(FP), AX
+	MOVQ R12, 136(AX)
+	MOVQ 144(AX), CX
+	SUBQ CX, R11
+	MOVQ R11, 168(AX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch:
+	MOVQ 16(SP), AX
+	MOVQ ctx+16(FP), CX
+	MOVQ AX, 216(CX)
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decodeSync_amd64_error_match_len_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+error_match_off_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 8(SP), CX
+	MOVQ CX, 224(AX)
+	MOVQ R12, 136(AX)
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+	// Return with not enough output space error
+error_not_enough_space:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ R12, 136(AX)
+	MOVQ $0x00000005, ret+24(FP)
+	RET
+
+// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: BMI, BMI2, CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
+	ADDQ    BX, CX
+	MOVQ    CX, (SP)
+	MOVQ    ctx+16(FP), CX
+	MOVQ    72(CX), SI
+	MOVQ    80(CX), DI
+	MOVQ    88(CX), R8
+	XORQ    R9, R9
+	MOVQ    R9, 8(SP)
+	MOVQ    R9, 16(SP)
+	MOVQ    R9, 24(SP)
+	MOVQ    112(CX), R9
+	MOVQ    128(CX), R10
+	MOVQ    R10, 32(SP)
+	MOVQ    144(CX), R10
+	MOVQ    136(CX), R11
+	MOVQ    200(CX), R12
+	MOVQ    R12, 56(SP)
+	MOVQ    176(CX), R12
+	MOVQ    R12, 48(SP)
+	MOVQ    184(CX), CX
+	MOVQ    CX, 40(SP)
+	MOVQ    40(SP), CX
+	ADDQ    CX, 48(SP)
+
+	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	ADDQ R9, 32(SP)
+
+	// outBase += outPosition
+	ADDQ R11, R9
+
+sequenceDecs_decodeSync_bmi2_main_loop:
+	MOVQ (SP), R12
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R12
+	MOVQ (R12), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decodeSync_bmi2_fill_end
+
+sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decodeSync_bmi2_fill_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decodeSync_bmi2_fill_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R12
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R12), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
+
+sequenceDecs_decodeSync_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_bmi2_fill_end:
+	// Update offset
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, R8, R13
+	MOVQ   AX, R14
+	LEAQ   (DX)(R13*1), CX
+	ROLQ   CL, R14
+	BZHIQ  R13, R14, R14
+	MOVQ   CX, DX
+	MOVQ   R8, CX
+	SHRQ   $0x20, CX
+	ADDQ   R14, CX
+	MOVQ   CX, 8(SP)
+
+	// Update match length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, DI, R13
+	MOVQ   AX, R14
+	LEAQ   (DX)(R13*1), CX
+	ROLQ   CL, R14
+	BZHIQ  R13, R14, R14
+	MOVQ   CX, DX
+	MOVQ   DI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R14, CX
+	MOVQ   CX, 16(SP)
+
+	// Fill bitreader to have enough for the remaining
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R12
+	MOVQ (R12), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decodeSync_bmi2_fill_2_end
+
+sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decodeSync_bmi2_fill_2_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R12
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R12), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_bmi2_fill_2_end:
+	// Update literal length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, SI, R13
+	MOVQ   AX, R14
+	LEAQ   (DX)(R13*1), CX
+	ROLQ   CL, R14
+	BZHIQ  R13, R14, R14
+	MOVQ   CX, DX
+	MOVQ   SI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R14, CX
+	MOVQ   CX, 24(SP)
+
+	// Fill bitreader for state updates
+	MOVQ    R12, (SP)
+	MOVQ    $0x00000808, CX
+	BEXTRQ  CX, R8, R12
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decodeSync_bmi2_skip_update
+	LEAQ    (SI)(DI*1), R13
+	ADDQ    R8, R13
+	MOVBQZX R13, R13
+	LEAQ    (DX)(R13*1), CX
+	MOVQ    AX, R14
+	MOVQ    CX, DX
+	ROLQ    CL, R14
+	BZHIQ   R13, R14, R14
+
+	// Update Offset State
+	BZHIQ R8, R14, CX
+	SHRXQ R8, R14, R14
+	SHRL  $0x10, R8
+	ADDQ  CX, R8
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Match Length State
+	BZHIQ DI, R14, CX
+	SHRXQ DI, R14, R14
+	SHRL  $0x10, DI
+	ADDQ  CX, DI
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Literal Length State
+	BZHIQ SI, R14, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(SI*8), SI
+
+sequenceDecs_decodeSync_bmi2_skip_update:
+	// Adjust offset
+	MOVQ   s+0(FP), CX
+	MOVQ   8(SP), R13
+	CMPQ   R12, $0x01
+	JBE    sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0
+	MOVUPS 144(CX), X0
+	MOVQ   R13, 144(CX)
+	MOVUPS X0, 152(CX)
+	JMP    sequenceDecs_decodeSync_bmi2_after_adjust
+
+sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0:
+	CMPQ 24(SP), $0x00000000
+	JNE  sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero
+	INCQ R13
+	JMP  sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero:
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
+	MOVQ  144(CX), R13
+	JMP   sequenceDecs_decodeSync_bmi2_after_adjust
+
+sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
+	MOVQ    R13, R12
+	XORQ    R14, R14
+	MOVQ    $-1, R15
+	CMPQ    R13, $0x03
+	CMOVQEQ R14, R12
+	CMOVQEQ R15, R14
+	ADDQ    144(CX)(R12*8), R14
+	JNZ     sequenceDecs_decodeSync_bmi2_adjust_temp_valid
+	MOVQ    $0x00000001, R14
+
+sequenceDecs_decodeSync_bmi2_adjust_temp_valid:
+	CMPQ R13, $0x01
+	JZ   sequenceDecs_decodeSync_bmi2_adjust_skip
+	MOVQ 152(CX), R12
+	MOVQ R12, 160(CX)
+
+sequenceDecs_decodeSync_bmi2_adjust_skip:
+	MOVQ 144(CX), R12
+	MOVQ R12, 152(CX)
+	MOVQ R14, 144(CX)
+	MOVQ R14, R13
+
+sequenceDecs_decodeSync_bmi2_after_adjust:
+	MOVQ R13, 8(SP)
+
+	// Check values
+	MOVQ  16(SP), CX
+	MOVQ  24(SP), R12
+	LEAQ  (CX)(R12*1), R14
+	MOVQ  s+0(FP), R15
+	ADDQ  R14, 256(R15)
+	MOVQ  ctx+16(FP), R14
+	SUBQ  R12, 104(R14)
+	JS    error_not_enough_literals
+	CMPQ  CX, $0x00020002
+	JA    sequenceDecs_decodeSync_bmi2_error_match_len_too_big
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_bmi2_match_len_ofs_ok
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_bmi2_match_len_ofs_ok:
+	MOVQ 24(SP), CX
+	MOVQ 8(SP), R12
+	MOVQ 16(SP), R13
+
+	// Check if we have enough space in s.out
+	LEAQ (CX)(R13*1), R14
+	ADDQ R9, R14
+	CMPQ R14, 32(SP)
+	JA   error_not_enough_space
+
+	// Copy literals
+	TESTQ CX, CX
+	JZ    check_offset
+	XORQ  R14, R14
+
+copy_1:
+	MOVUPS (R10)(R14*1), X0
+	MOVUPS X0, (R9)(R14*1)
+	ADDQ   $0x10, R14
+	CMPQ   R14, CX
+	JB     copy_1
+	ADDQ   CX, R10
+	ADDQ   CX, R9
+	ADDQ   CX, R11
+
+	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+	MOVQ R11, CX
+	ADDQ 40(SP), CX
+	CMPQ R12, CX
+	JG   error_match_off_too_big
+	CMPQ R12, 56(SP)
+	JG   error_match_off_too_big
+
+	// Copy match from history
+	MOVQ R12, CX
+	SUBQ R11, CX
+	JLS  copy_match
+	MOVQ 48(SP), R14
+	SUBQ CX, R14
+	CMPQ R13, CX
+	JG   copy_all_from_history
+	MOVQ R13, CX
+	SUBQ $0x10, CX
+	JB   copy_4_small
+
+copy_4_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R9)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R9
+	SUBQ   $0x10, CX
+	JAE    copy_4_loop
+	LEAQ   16(R14)(CX*1), R14
+	LEAQ   16(R9)(CX*1), R9
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R9)
+	JMP    copy_4_end
+
+copy_4_small:
+	CMPQ R13, $0x03
+	JE   copy_4_move_3
+	CMPQ R13, $0x08
+	JB   copy_4_move_4through7
+	JMP  copy_4_move_8through16
+
+copy_4_move_3:
+	MOVW (R14), CX
+	MOVB 2(R14), R12
+	MOVW CX, (R9)
+	MOVB R12, 2(R9)
+	ADDQ R13, R14
+	ADDQ R13, R9
+	JMP  copy_4_end
+
+copy_4_move_4through7:
+	MOVL (R14), CX
+	MOVL -4(R14)(R13*1), R12
+	MOVL CX, (R9)
+	MOVL R12, -4(R9)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R9
+	JMP  copy_4_end
+
+copy_4_move_8through16:
+	MOVQ (R14), CX
+	MOVQ -8(R14)(R13*1), R12
+	MOVQ CX, (R9)
+	MOVQ R12, -8(R9)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R9
+
+copy_4_end:
+	ADDQ R13, R11
+	JMP  handle_loop
+	JMP loop_finished
+
+copy_all_from_history:
+	MOVQ CX, R15
+	SUBQ $0x10, R15
+	JB   copy_5_small
+
+copy_5_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R9)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R9
+	SUBQ   $0x10, R15
+	JAE    copy_5_loop
+	LEAQ   16(R14)(R15*1), R14
+	LEAQ   16(R9)(R15*1), R9
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R9)
+	JMP    copy_5_end
+
+copy_5_small:
+	CMPQ CX, $0x03
+	JE   copy_5_move_3
+	JB   copy_5_move_1or2
+	CMPQ CX, $0x08
+	JB   copy_5_move_4through7
+	JMP  copy_5_move_8through16
+
+copy_5_move_1or2:
+	MOVB (R14), R15
+	MOVB -1(R14)(CX*1), BP
+	MOVB R15, (R9)
+	MOVB BP, -1(R9)(CX*1)
+	ADDQ CX, R14
+	ADDQ CX, R9
+	JMP  copy_5_end
+
+copy_5_move_3:
+	MOVW (R14), R15
+	MOVB 2(R14), BP
+	MOVW R15, (R9)
+	MOVB BP, 2(R9)
+	ADDQ CX, R14
+	ADDQ CX, R9
+	JMP  copy_5_end
+
+copy_5_move_4through7:
+	MOVL (R14), R15
+	MOVL -4(R14)(CX*1), BP
+	MOVL R15, (R9)
+	MOVL BP, -4(R9)(CX*1)
+	ADDQ CX, R14
+	ADDQ CX, R9
+	JMP  copy_5_end
+
+copy_5_move_8through16:
+	MOVQ (R14), R15
+	MOVQ -8(R14)(CX*1), BP
+	MOVQ R15, (R9)
+	MOVQ BP, -8(R9)(CX*1)
+	ADDQ CX, R14
+	ADDQ CX, R9
+
+copy_5_end:
+	ADDQ CX, R11
+	SUBQ CX, R13
+
+	// Copy match from the current buffer
+copy_match:
+	MOVQ R9, CX
+	SUBQ R12, CX
+
+	// ml <= mo
+	CMPQ R13, R12
+	JA   copy_overlapping_match
+
+	// Copy non-overlapping match
+	ADDQ R13, R11
+	MOVQ R9, R12
+	ADDQ R13, R9
+
+copy_2:
+	MOVUPS (CX), X0
+	MOVUPS X0, (R12)
+	ADDQ   $0x10, CX
+	ADDQ   $0x10, R12
+	SUBQ   $0x10, R13
+	JHI    copy_2
+	JMP    handle_loop
+
+	// Copy overlapping match
+copy_overlapping_match:
+	ADDQ R13, R11
+
+copy_slow_3:
+	MOVB (CX), R12
+	MOVB R12, (R9)
+	INCQ CX
+	INCQ R9
+	DECQ R13
+	JNZ  copy_slow_3
+
+handle_loop:
+	MOVQ ctx+16(FP), CX
+	DECQ 96(CX)
+	JNS  sequenceDecs_decodeSync_bmi2_main_loop
+
+loop_finished:
+	MOVQ br+8(FP), CX
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
+
+	// Update the context
+	MOVQ ctx+16(FP), AX
+	MOVQ R11, 136(AX)
+	MOVQ 144(AX), CX
+	SUBQ CX, R10
+	MOVQ R10, 168(AX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch:
+	MOVQ 16(SP), AX
+	MOVQ ctx+16(FP), CX
+	MOVQ AX, 216(CX)
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decodeSync_bmi2_error_match_len_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+error_match_off_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 8(SP), CX
+	MOVQ CX, 224(AX)
+	MOVQ R11, 136(AX)
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+	// Return with not enough output space error
+error_not_enough_space:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ R11, 136(AX)
+	MOVQ $0x00000005, ret+24(FP)
+	RET
+
+// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
+	ADDQ    SI, AX
+	MOVQ    AX, (SP)
+	MOVQ    ctx+16(FP), AX
+	MOVQ    72(AX), DI
+	MOVQ    80(AX), R8
+	MOVQ    88(AX), R9
+	XORQ    CX, CX
+	MOVQ    CX, 8(SP)
+	MOVQ    CX, 16(SP)
+	MOVQ    CX, 24(SP)
+	MOVQ    112(AX), R10
+	MOVQ    128(AX), CX
+	MOVQ    CX, 32(SP)
+	MOVQ    144(AX), R11
+	MOVQ    136(AX), R12
+	MOVQ    200(AX), CX
+	MOVQ    CX, 56(SP)
+	MOVQ    176(AX), CX
+	MOVQ    CX, 48(SP)
+	MOVQ    184(AX), AX
+	MOVQ    AX, 40(SP)
+	MOVQ    40(SP), AX
+	ADDQ    AX, 48(SP)
+
+	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	ADDQ R10, 32(SP)
+
+	// outBase += outPosition
+	ADDQ R12, R10
+
+sequenceDecs_decodeSync_safe_amd64_main_loop:
+	MOVQ (SP), R13
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R13
+	MOVQ (R13), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decodeSync_safe_amd64_fill_end
+
+sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R13), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
+
+sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_safe_amd64_fill_end:
+	// Update offset
+	MOVQ  R9, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R14
+	SHLQ  CL, R14
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decodeSync_safe_amd64_of_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decodeSync_safe_amd64_of_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decodeSync_safe_amd64_of_update_zero
+	NEGQ  CX
+	SHRQ  CL, R14
+	ADDQ  R14, AX
+
+sequenceDecs_decodeSync_safe_amd64_of_update_zero:
+	MOVQ AX, 8(SP)
+
+	// Update match length
+	MOVQ  R8, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R14
+	SHLQ  CL, R14
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decodeSync_safe_amd64_ml_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decodeSync_safe_amd64_ml_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decodeSync_safe_amd64_ml_update_zero
+	NEGQ  CX
+	SHRQ  CL, R14
+	ADDQ  R14, AX
+
+sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
+	MOVQ AX, 16(SP)
+
+	// Fill bitreader to have enough for the remaining
+	CMPQ SI, $0x08
+	JL   sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
+	MOVQ BX, AX
+	SHRQ $0x03, AX
+	SUBQ AX, R13
+	MOVQ (R13), DX
+	SUBQ AX, SI
+	ANDQ $0x07, BX
+	JMP  sequenceDecs_decodeSync_safe_amd64_fill_2_end
+
+sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
+	CMPQ    SI, $0x00
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
+	CMPQ    BX, $0x07
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
+	SHLQ    $0x08, DX
+	SUBQ    $0x01, R13
+	SUBQ    $0x01, SI
+	SUBQ    $0x08, BX
+	MOVBQZX (R13), AX
+	ORQ     AX, DX
+	JMP     sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_safe_amd64_fill_2_end:
+	// Update literal length
+	MOVQ  DI, AX
+	MOVQ  BX, CX
+	MOVQ  DX, R14
+	SHLQ  CL, R14
+	MOVB  AH, CL
+	SHRQ  $0x20, AX
+	TESTQ CX, CX
+	JZ    sequenceDecs_decodeSync_safe_amd64_ll_update_zero
+	ADDQ  CX, BX
+	CMPQ  BX, $0x40
+	JA    sequenceDecs_decodeSync_safe_amd64_ll_update_zero
+	CMPQ  CX, $0x40
+	JAE   sequenceDecs_decodeSync_safe_amd64_ll_update_zero
+	NEGQ  CX
+	SHRQ  CL, R14
+	ADDQ  R14, AX
+
+sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
+	MOVQ AX, 24(SP)
+
+	// Fill bitreader for state updates
+	MOVQ    R13, (SP)
+	MOVQ    R9, AX
+	SHRQ    $0x08, AX
+	MOVBQZX AL, AX
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decodeSync_safe_amd64_skip_update
+
+	// Update Literal Length State
+	MOVBQZX DI, R13
+	SHRL    $0x10, DI
+	LEAQ    (BX)(R13*1), CX
+	MOVQ    DX, R14
+	MOVQ    CX, BX
+	ROLQ    CL, R14
+	MOVL    $0x00000001, R15
+	MOVB    R13, CL
+	SHLL    CL, R15
+	DECL    R15
+	ANDQ    R15, R14
+	ADDQ    R14, DI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Match Length State
+	MOVBQZX R8, R13
+	SHRL    $0x10, R8
+	LEAQ    (BX)(R13*1), CX
+	MOVQ    DX, R14
+	MOVQ    CX, BX
+	ROLQ    CL, R14
+	MOVL    $0x00000001, R15
+	MOVB    R13, CL
+	SHLL    CL, R15
+	DECL    R15
+	ANDQ    R15, R14
+	ADDQ    R14, R8
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Offset State
+	MOVBQZX R9, R13
+	SHRL    $0x10, R9
+	LEAQ    (BX)(R13*1), CX
+	MOVQ    DX, R14
+	MOVQ    CX, BX
+	ROLQ    CL, R14
+	MOVL    $0x00000001, R15
+	MOVB    R13, CL
+	SHLL    CL, R15
+	DECL    R15
+	ANDQ    R15, R14
+	ADDQ    R14, R9
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decodeSync_safe_amd64_skip_update:
+	// Adjust offset
+	MOVQ   s+0(FP), CX
+	MOVQ   8(SP), R13
+	CMPQ   AX, $0x01
+	JBE    sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0
+	MOVUPS 144(CX), X0
+	MOVQ   R13, 144(CX)
+	MOVUPS X0, 152(CX)
+	JMP    sequenceDecs_decodeSync_safe_amd64_after_adjust
+
+sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0:
+	CMPQ 24(SP), $0x00000000
+	JNE  sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero
+	INCQ R13
+	JMP  sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero:
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
+	MOVQ  144(CX), R13
+	JMP   sequenceDecs_decodeSync_safe_amd64_after_adjust
+
+sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
+	MOVQ    R13, AX
+	XORQ    R14, R14
+	MOVQ    $-1, R15
+	CMPQ    R13, $0x03
+	CMOVQEQ R14, AX
+	CMOVQEQ R15, R14
+	ADDQ    144(CX)(AX*8), R14
+	JNZ     sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid
+	MOVQ    $0x00000001, R14
+
+sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid:
+	CMPQ R13, $0x01
+	JZ   sequenceDecs_decodeSync_safe_amd64_adjust_skip
+	MOVQ 152(CX), AX
+	MOVQ AX, 160(CX)
+
+sequenceDecs_decodeSync_safe_amd64_adjust_skip:
+	MOVQ 144(CX), AX
+	MOVQ AX, 152(CX)
+	MOVQ R14, 144(CX)
+	MOVQ R14, R13
+
+sequenceDecs_decodeSync_safe_amd64_after_adjust:
+	MOVQ R13, 8(SP)
+
+	// Check values
+	MOVQ  16(SP), AX
+	MOVQ  24(SP), CX
+	LEAQ  (AX)(CX*1), R14
+	MOVQ  s+0(FP), R15
+	ADDQ  R14, 256(R15)
+	MOVQ  ctx+16(FP), R14
+	SUBQ  CX, 104(R14)
+	JS    error_not_enough_literals
+	CMPQ  AX, $0x00020002
+	JA    sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok
+	TESTQ AX, AX
+	JNZ   sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok:
+	MOVQ 24(SP), AX
+	MOVQ 8(SP), CX
+	MOVQ 16(SP), R13
+
+	// Check if we have enough space in s.out
+	LEAQ (AX)(R13*1), R14
+	ADDQ R10, R14
+	CMPQ R14, 32(SP)
+	JA   error_not_enough_space
+
+	// Copy literals
+	TESTQ AX, AX
+	JZ    check_offset
+	MOVQ  AX, R14
+	SUBQ  $0x10, R14
+	JB    copy_1_small
+
+copy_1_loop:
+	MOVUPS (R11), X0
+	MOVUPS X0, (R10)
+	ADDQ   $0x10, R11
+	ADDQ   $0x10, R10
+	SUBQ   $0x10, R14
+	JAE    copy_1_loop
+	LEAQ   16(R11)(R14*1), R11
+	LEAQ   16(R10)(R14*1), R10
+	MOVUPS -16(R11), X0
+	MOVUPS X0, -16(R10)
+	JMP    copy_1_end
+
+copy_1_small:
+	CMPQ AX, $0x03
+	JE   copy_1_move_3
+	JB   copy_1_move_1or2
+	CMPQ AX, $0x08
+	JB   copy_1_move_4through7
+	JMP  copy_1_move_8through16
+
+copy_1_move_1or2:
+	MOVB (R11), R14
+	MOVB -1(R11)(AX*1), R15
+	MOVB R14, (R10)
+	MOVB R15, -1(R10)(AX*1)
+	ADDQ AX, R11
+	ADDQ AX, R10
+	JMP  copy_1_end
+
+copy_1_move_3:
+	MOVW (R11), R14
+	MOVB 2(R11), R15
+	MOVW R14, (R10)
+	MOVB R15, 2(R10)
+	ADDQ AX, R11
+	ADDQ AX, R10
+	JMP  copy_1_end
+
+copy_1_move_4through7:
+	MOVL (R11), R14
+	MOVL -4(R11)(AX*1), R15
+	MOVL R14, (R10)
+	MOVL R15, -4(R10)(AX*1)
+	ADDQ AX, R11
+	ADDQ AX, R10
+	JMP  copy_1_end
+
+copy_1_move_8through16:
+	MOVQ (R11), R14
+	MOVQ -8(R11)(AX*1), R15
+	MOVQ R14, (R10)
+	MOVQ R15, -8(R10)(AX*1)
+	ADDQ AX, R11
+	ADDQ AX, R10
+
+copy_1_end:
+	ADDQ AX, R12
+
+	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+	MOVQ R12, AX
+	ADDQ 40(SP), AX
+	CMPQ CX, AX
+	JG   error_match_off_too_big
+	CMPQ CX, 56(SP)
+	JG   error_match_off_too_big
+
+	// Copy match from history
+	MOVQ CX, AX
+	SUBQ R12, AX
+	JLS  copy_match
+	MOVQ 48(SP), R14
+	SUBQ AX, R14
+	CMPQ R13, AX
+	JG   copy_all_from_history
+	MOVQ R13, AX
+	SUBQ $0x10, AX
+	JB   copy_4_small
+
+copy_4_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R10)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R10
+	SUBQ   $0x10, AX
+	JAE    copy_4_loop
+	LEAQ   16(R14)(AX*1), R14
+	LEAQ   16(R10)(AX*1), R10
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R10)
+	JMP    copy_4_end
+
+copy_4_small:
+	CMPQ R13, $0x03
+	JE   copy_4_move_3
+	CMPQ R13, $0x08
+	JB   copy_4_move_4through7
+	JMP  copy_4_move_8through16
+
+copy_4_move_3:
+	MOVW (R14), AX
+	MOVB 2(R14), CL
+	MOVW AX, (R10)
+	MOVB CL, 2(R10)
+	ADDQ R13, R14
+	ADDQ R13, R10
+	JMP  copy_4_end
+
+copy_4_move_4through7:
+	MOVL (R14), AX
+	MOVL -4(R14)(R13*1), CX
+	MOVL AX, (R10)
+	MOVL CX, -4(R10)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R10
+	JMP  copy_4_end
+
+copy_4_move_8through16:
+	MOVQ (R14), AX
+	MOVQ -8(R14)(R13*1), CX
+	MOVQ AX, (R10)
+	MOVQ CX, -8(R10)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R10
+
+copy_4_end:
+	ADDQ R13, R12
+	JMP  handle_loop
+	JMP loop_finished
+
+copy_all_from_history:
+	MOVQ AX, R15
+	SUBQ $0x10, R15
+	JB   copy_5_small
+
+copy_5_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R10)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R10
+	SUBQ   $0x10, R15
+	JAE    copy_5_loop
+	LEAQ   16(R14)(R15*1), R14
+	LEAQ   16(R10)(R15*1), R10
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R10)
+	JMP    copy_5_end
+
+copy_5_small:
+	CMPQ AX, $0x03
+	JE   copy_5_move_3
+	JB   copy_5_move_1or2
+	CMPQ AX, $0x08
+	JB   copy_5_move_4through7
+	JMP  copy_5_move_8through16
+
+copy_5_move_1or2:
+	MOVB (R14), R15
+	MOVB -1(R14)(AX*1), BP
+	MOVB R15, (R10)
+	MOVB BP, -1(R10)(AX*1)
+	ADDQ AX, R14
+	ADDQ AX, R10
+	JMP  copy_5_end
+
+copy_5_move_3:
+	MOVW (R14), R15
+	MOVB 2(R14), BP
+	MOVW R15, (R10)
+	MOVB BP, 2(R10)
+	ADDQ AX, R14
+	ADDQ AX, R10
+	JMP  copy_5_end
+
+copy_5_move_4through7:
+	MOVL (R14), R15
+	MOVL -4(R14)(AX*1), BP
+	MOVL R15, (R10)
+	MOVL BP, -4(R10)(AX*1)
+	ADDQ AX, R14
+	ADDQ AX, R10
+	JMP  copy_5_end
+
+copy_5_move_8through16:
+	MOVQ (R14), R15
+	MOVQ -8(R14)(AX*1), BP
+	MOVQ R15, (R10)
+	MOVQ BP, -8(R10)(AX*1)
+	ADDQ AX, R14
+	ADDQ AX, R10
+
+copy_5_end:
+	ADDQ AX, R12
+	SUBQ AX, R13
+
+	// Copy match from the current buffer
+copy_match:
+	MOVQ R10, AX
+	SUBQ CX, AX
+
+	// ml <= mo
+	CMPQ R13, CX
+	JA   copy_overlapping_match
+
+	// Copy non-overlapping match
+	ADDQ R13, R12
+	MOVQ R13, CX
+	SUBQ $0x10, CX
+	JB   copy_2_small
+
+copy_2_loop:
+	MOVUPS (AX), X0
+	MOVUPS X0, (R10)
+	ADDQ   $0x10, AX
+	ADDQ   $0x10, R10
+	SUBQ   $0x10, CX
+	JAE    copy_2_loop
+	LEAQ   16(AX)(CX*1), AX
+	LEAQ   16(R10)(CX*1), R10
+	MOVUPS -16(AX), X0
+	MOVUPS X0, -16(R10)
+	JMP    copy_2_end
+
+copy_2_small:
+	CMPQ R13, $0x03
+	JE   copy_2_move_3
+	JB   copy_2_move_1or2
+	CMPQ R13, $0x08
+	JB   copy_2_move_4through7
+	JMP  copy_2_move_8through16
+
+copy_2_move_1or2:
+	MOVB (AX), CL
+	MOVB -1(AX)(R13*1), R14
+	MOVB CL, (R10)
+	MOVB R14, -1(R10)(R13*1)
+	ADDQ R13, AX
+	ADDQ R13, R10
+	JMP  copy_2_end
+
+copy_2_move_3:
+	MOVW (AX), CX
+	MOVB 2(AX), R14
+	MOVW CX, (R10)
+	MOVB R14, 2(R10)
+	ADDQ R13, AX
+	ADDQ R13, R10
+	JMP  copy_2_end
+
+copy_2_move_4through7:
+	MOVL (AX), CX
+	MOVL -4(AX)(R13*1), R14
+	MOVL CX, (R10)
+	MOVL R14, -4(R10)(R13*1)
+	ADDQ R13, AX
+	ADDQ R13, R10
+	JMP  copy_2_end
+
+copy_2_move_8through16:
+	MOVQ (AX), CX
+	MOVQ -8(AX)(R13*1), R14
+	MOVQ CX, (R10)
+	MOVQ R14, -8(R10)(R13*1)
+	ADDQ R13, AX
+	ADDQ R13, R10
+
+copy_2_end:
+	JMP handle_loop
+
+	// Copy overlapping match
+copy_overlapping_match:
+	ADDQ R13, R12
+
+copy_slow_3:
+	MOVB (AX), CL
+	MOVB CL, (R10)
+	INCQ AX
+	INCQ R10
+	DECQ R13
+	JNZ  copy_slow_3
+
+handle_loop:
+	MOVQ ctx+16(FP), AX
+	DECQ 96(AX)
+	JNS  sequenceDecs_decodeSync_safe_amd64_main_loop
+
+loop_finished:
+	MOVQ br+8(FP), AX
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)
+
+	// Update the context
+	MOVQ ctx+16(FP), AX
+	MOVQ R12, 136(AX)
+	MOVQ 144(AX), CX
+	SUBQ CX, R11
+	MOVQ R11, 168(AX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch:
+	MOVQ 16(SP), AX
+	MOVQ ctx+16(FP), CX
+	MOVQ AX, 216(CX)
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+error_match_off_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 8(SP), CX
+	MOVQ CX, 224(AX)
+	MOVQ R12, 136(AX)
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+	// Return with not enough output space error
+error_not_enough_space:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ R12, 136(AX)
+	MOVQ $0x00000005, ret+24(FP)
+	RET
+
+// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: BMI, BMI2, CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
+	ADDQ    BX, CX
+	MOVQ    CX, (SP)
+	MOVQ    ctx+16(FP), CX
+	MOVQ    72(CX), SI
+	MOVQ    80(CX), DI
+	MOVQ    88(CX), R8
+	XORQ    R9, R9
+	MOVQ    R9, 8(SP)
+	MOVQ    R9, 16(SP)
+	MOVQ    R9, 24(SP)
+	MOVQ    112(CX), R9
+	MOVQ    128(CX), R10
+	MOVQ    R10, 32(SP)
+	MOVQ    144(CX), R10
+	MOVQ    136(CX), R11
+	MOVQ    200(CX), R12
+	MOVQ    R12, 56(SP)
+	MOVQ    176(CX), R12
+	MOVQ    R12, 48(SP)
+	MOVQ    184(CX), CX
+	MOVQ    CX, 40(SP)
+	MOVQ    40(SP), CX
+	ADDQ    CX, 48(SP)
+
+	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	ADDQ R9, 32(SP)
+
+	// outBase += outPosition
+	ADDQ R11, R9
+
+sequenceDecs_decodeSync_safe_bmi2_main_loop:
+	MOVQ (SP), R12
+
+	// Fill bitreader to have enough for the offset and match length.
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R12
+	MOVQ (R12), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decodeSync_safe_bmi2_fill_end
+
+sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R12
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R12), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
+
+sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_safe_bmi2_fill_end:
+	// Update offset
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, R8, R13
+	MOVQ   AX, R14
+	LEAQ   (DX)(R13*1), CX
+	ROLQ   CL, R14
+	BZHIQ  R13, R14, R14
+	MOVQ   CX, DX
+	MOVQ   R8, CX
+	SHRQ   $0x20, CX
+	ADDQ   R14, CX
+	MOVQ   CX, 8(SP)
+
+	// Update match length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, DI, R13
+	MOVQ   AX, R14
+	LEAQ   (DX)(R13*1), CX
+	ROLQ   CL, R14
+	BZHIQ  R13, R14, R14
+	MOVQ   CX, DX
+	MOVQ   DI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R14, CX
+	MOVQ   CX, 16(SP)
+
+	// Fill bitreader to have enough for the remaining
+	CMPQ BX, $0x08
+	JL   sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
+	MOVQ DX, CX
+	SHRQ $0x03, CX
+	SUBQ CX, R12
+	MOVQ (R12), AX
+	SUBQ CX, BX
+	ANDQ $0x07, DX
+	JMP  sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+
+sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
+	CMPQ    BX, $0x00
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
+	CMPQ    DX, $0x07
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+	SHLQ    $0x08, AX
+	SUBQ    $0x01, R12
+	SUBQ    $0x01, BX
+	SUBQ    $0x08, DX
+	MOVBQZX (R12), CX
+	ORQ     CX, AX
+	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
+sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
+	// Update literal length
+	MOVQ   $0x00000808, CX
+	BEXTRQ CX, SI, R13
+	MOVQ   AX, R14
+	LEAQ   (DX)(R13*1), CX
+	ROLQ   CL, R14
+	BZHIQ  R13, R14, R14
+	MOVQ   CX, DX
+	MOVQ   SI, CX
+	SHRQ   $0x20, CX
+	ADDQ   R14, CX
+	MOVQ   CX, 24(SP)
+
+	// Fill bitreader for state updates
+	MOVQ    R12, (SP)
+	MOVQ    $0x00000808, CX
+	BEXTRQ  CX, R8, R12
+	MOVQ    ctx+16(FP), CX
+	CMPQ    96(CX), $0x00
+	JZ      sequenceDecs_decodeSync_safe_bmi2_skip_update
+	LEAQ    (SI)(DI*1), R13
+	ADDQ    R8, R13
+	MOVBQZX R13, R13
+	LEAQ    (DX)(R13*1), CX
+	MOVQ    AX, R14
+	MOVQ    CX, DX
+	ROLQ    CL, R14
+	BZHIQ   R13, R14, R14
+
+	// Update Offset State
+	BZHIQ R8, R14, CX
+	SHRXQ R8, R14, R14
+	SHRL  $0x10, R8
+	ADDQ  CX, R8
+
+	// Load ctx.ofTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 48(CX), CX
+	MOVQ (CX)(R8*8), R8
+
+	// Update Match Length State
+	BZHIQ DI, R14, CX
+	SHRXQ DI, R14, R14
+	SHRL  $0x10, DI
+	ADDQ  CX, DI
+
+	// Load ctx.mlTable
+	MOVQ ctx+16(FP), CX
+	MOVQ 24(CX), CX
+	MOVQ (CX)(DI*8), DI
+
+	// Update Literal Length State
+	BZHIQ SI, R14, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI
+
+	// Load ctx.llTable
+	MOVQ ctx+16(FP), CX
+	MOVQ (CX), CX
+	MOVQ (CX)(SI*8), SI
+
+sequenceDecs_decodeSync_safe_bmi2_skip_update:
+	// Adjust offset
+	MOVQ   s+0(FP), CX
+	MOVQ   8(SP), R13
+	CMPQ   R12, $0x01
+	JBE    sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0
+	MOVUPS 144(CX), X0
+	MOVQ   R13, 144(CX)
+	MOVUPS X0, 152(CX)
+	JMP    sequenceDecs_decodeSync_safe_bmi2_after_adjust
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0:
+	CMPQ 24(SP), $0x00000000
+	JNE  sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero
+	INCQ R13
+	JMP  sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero:
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
+	MOVQ  144(CX), R13
+	JMP   sequenceDecs_decodeSync_safe_bmi2_after_adjust
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
+	MOVQ    R13, R12
+	XORQ    R14, R14
+	MOVQ    $-1, R15
+	CMPQ    R13, $0x03
+	CMOVQEQ R14, R12
+	CMOVQEQ R15, R14
+	ADDQ    144(CX)(R12*8), R14
+	JNZ     sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid
+	MOVQ    $0x00000001, R14
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid:
+	CMPQ R13, $0x01
+	JZ   sequenceDecs_decodeSync_safe_bmi2_adjust_skip
+	MOVQ 152(CX), R12
+	MOVQ R12, 160(CX)
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_skip:
+	MOVQ 144(CX), R12
+	MOVQ R12, 152(CX)
+	MOVQ R14, 144(CX)
+	MOVQ R14, R13
+
+sequenceDecs_decodeSync_safe_bmi2_after_adjust:
+	MOVQ R13, 8(SP)
+
+	// Check values
+	MOVQ  16(SP), CX
+	MOVQ  24(SP), R12
+	LEAQ  (CX)(R12*1), R14
+	MOVQ  s+0(FP), R15
+	ADDQ  R14, 256(R15)
+	MOVQ  ctx+16(FP), R14
+	SUBQ  R12, 104(R14)
+	JS    error_not_enough_literals
+	CMPQ  CX, $0x00020002
+	JA    sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big
+	TESTQ R13, R13
+	JNZ   sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok
+	TESTQ CX, CX
+	JNZ   sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok:
+	MOVQ 24(SP), CX
+	MOVQ 8(SP), R12
+	MOVQ 16(SP), R13
+
+	// Check if we have enough space in s.out
+	LEAQ (CX)(R13*1), R14
+	ADDQ R9, R14
+	CMPQ R14, 32(SP)
+	JA   error_not_enough_space
+
+	// Copy literals
+	TESTQ CX, CX
+	JZ    check_offset
+	MOVQ  CX, R14
+	SUBQ  $0x10, R14
+	JB    copy_1_small
+
+copy_1_loop:
+	MOVUPS (R10), X0
+	MOVUPS X0, (R9)
+	ADDQ   $0x10, R10
+	ADDQ   $0x10, R9
+	SUBQ   $0x10, R14
+	JAE    copy_1_loop
+	LEAQ   16(R10)(R14*1), R10
+	LEAQ   16(R9)(R14*1), R9
+	MOVUPS -16(R10), X0
+	MOVUPS X0, -16(R9)
+	JMP    copy_1_end
+
+copy_1_small:
+	CMPQ CX, $0x03
+	JE   copy_1_move_3
+	JB   copy_1_move_1or2
+	CMPQ CX, $0x08
+	JB   copy_1_move_4through7
+	JMP  copy_1_move_8through16
+
+copy_1_move_1or2:
+	MOVB (R10), R14
+	MOVB -1(R10)(CX*1), R15
+	MOVB R14, (R9)
+	MOVB R15, -1(R9)(CX*1)
+	ADDQ CX, R10
+	ADDQ CX, R9
+	JMP  copy_1_end
+
+copy_1_move_3:
+	MOVW (R10), R14
+	MOVB 2(R10), R15
+	MOVW R14, (R9)
+	MOVB R15, 2(R9)
+	ADDQ CX, R10
+	ADDQ CX, R9
+	JMP  copy_1_end
+
+copy_1_move_4through7:
+	MOVL (R10), R14
+	MOVL -4(R10)(CX*1), R15
+	MOVL R14, (R9)
+	MOVL R15, -4(R9)(CX*1)
+	ADDQ CX, R10
+	ADDQ CX, R9
+	JMP  copy_1_end
+
+copy_1_move_8through16:
+	MOVQ (R10), R14
+	MOVQ -8(R10)(CX*1), R15
+	MOVQ R14, (R9)
+	MOVQ R15, -8(R9)(CX*1)
+	ADDQ CX, R10
+	ADDQ CX, R9
+
+copy_1_end:
+	ADDQ CX, R11
+
+	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+	MOVQ R11, CX
+	ADDQ 40(SP), CX
+	CMPQ R12, CX
+	JG   error_match_off_too_big
+	CMPQ R12, 56(SP)
+	JG   error_match_off_too_big
+
+	// Copy match from history
+	MOVQ R12, CX
+	SUBQ R11, CX
+	JLS  copy_match
+	MOVQ 48(SP), R14
+	SUBQ CX, R14
+	CMPQ R13, CX
+	JG   copy_all_from_history
+	MOVQ R13, CX
+	SUBQ $0x10, CX
+	JB   copy_4_small
+
+copy_4_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R9)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R9
+	SUBQ   $0x10, CX
+	JAE    copy_4_loop
+	LEAQ   16(R14)(CX*1), R14
+	LEAQ   16(R9)(CX*1), R9
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R9)
+	JMP    copy_4_end
+
+copy_4_small:
+	CMPQ R13, $0x03
+	JE   copy_4_move_3
+	CMPQ R13, $0x08
+	JB   copy_4_move_4through7
+	JMP  copy_4_move_8through16
+
+copy_4_move_3:
+	MOVW (R14), CX
+	MOVB 2(R14), R12
+	MOVW CX, (R9)
+	MOVB R12, 2(R9)
+	ADDQ R13, R14
+	ADDQ R13, R9
+	JMP  copy_4_end
+
+copy_4_move_4through7:
+	MOVL (R14), CX
+	MOVL -4(R14)(R13*1), R12
+	MOVL CX, (R9)
+	MOVL R12, -4(R9)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R9
+	JMP  copy_4_end
+
+copy_4_move_8through16:
+	MOVQ (R14), CX
+	MOVQ -8(R14)(R13*1), R12
+	MOVQ CX, (R9)
+	MOVQ R12, -8(R9)(R13*1)
+	ADDQ R13, R14
+	ADDQ R13, R9
+
+copy_4_end:
+	ADDQ R13, R11
+	JMP  handle_loop
+	JMP loop_finished
+
+copy_all_from_history:
+	MOVQ CX, R15
+	SUBQ $0x10, R15
+	JB   copy_5_small
+
+copy_5_loop:
+	MOVUPS (R14), X0
+	MOVUPS X0, (R9)
+	ADDQ   $0x10, R14
+	ADDQ   $0x10, R9
+	SUBQ   $0x10, R15
+	JAE    copy_5_loop
+	LEAQ   16(R14)(R15*1), R14
+	LEAQ   16(R9)(R15*1), R9
+	MOVUPS -16(R14), X0
+	MOVUPS X0, -16(R9)
+	JMP    copy_5_end
+
+copy_5_small:
+	CMPQ CX, $0x03
+	JE   copy_5_move_3
+	JB   copy_5_move_1or2
+	CMPQ CX, $0x08
+	JB   copy_5_move_4through7
+	JMP  copy_5_move_8through16
+
+copy_5_move_1or2:
+	MOVB (R14), R15
+	MOVB -1(R14)(CX*1), BP
+	MOVB R15, (R9)
+	MOVB BP, -1(R9)(CX*1)
+	ADDQ CX, R14
+	ADDQ CX, R9
+	JMP  copy_5_end
+
+copy_5_move_3:
+	MOVW (R14), R15
+	MOVB 2(R14), BP
+	MOVW R15, (R9)
+	MOVB BP, 2(R9)
+	ADDQ CX, R14
+	ADDQ CX, R9
+	JMP  copy_5_end
+
+copy_5_move_4through7:
+	MOVL (R14), R15
+	MOVL -4(R14)(CX*1), BP
+	MOVL R15, (R9)
+	MOVL BP, -4(R9)(CX*1)
+	ADDQ CX, R14
+	ADDQ CX, R9
+	JMP  copy_5_end
+
+copy_5_move_8through16:
+	MOVQ (R14), R15
+	MOVQ -8(R14)(CX*1), BP
+	MOVQ R15, (R9)
+	MOVQ BP, -8(R9)(CX*1)
+	ADDQ CX, R14
+	ADDQ CX, R9
+
+copy_5_end:
+	ADDQ CX, R11
+	SUBQ CX, R13
+
+	// Copy match from the current buffer
+copy_match:
+	MOVQ R9, CX
+	SUBQ R12, CX
+
+	// ml <= mo
+	CMPQ R13, R12
+	JA   copy_overlapping_match
+
+	// Copy non-overlapping match
+	ADDQ R13, R11
+	MOVQ R13, R12
+	SUBQ $0x10, R12
+	JB   copy_2_small
+
+copy_2_loop:
+	MOVUPS (CX), X0
+	MOVUPS X0, (R9)
+	ADDQ   $0x10, CX
+	ADDQ   $0x10, R9
+	SUBQ   $0x10, R12
+	JAE    copy_2_loop
+	LEAQ   16(CX)(R12*1), CX
+	LEAQ   16(R9)(R12*1), R9
+	MOVUPS -16(CX), X0
+	MOVUPS X0, -16(R9)
+	JMP    copy_2_end
+
+copy_2_small:
+	CMPQ R13, $0x03
+	JE   copy_2_move_3
+	JB   copy_2_move_1or2
+	CMPQ R13, $0x08
+	JB   copy_2_move_4through7
+	JMP  copy_2_move_8through16
+
+copy_2_move_1or2:
+	MOVB (CX), R12
+	MOVB -1(CX)(R13*1), R14
+	MOVB R12, (R9)
+	MOVB R14, -1(R9)(R13*1)
+	ADDQ R13, CX
+	ADDQ R13, R9
+	JMP  copy_2_end
+
+copy_2_move_3:
+	MOVW (CX), R12
+	MOVB 2(CX), R14
+	MOVW R12, (R9)
+	MOVB R14, 2(R9)
+	ADDQ R13, CX
+	ADDQ R13, R9
+	JMP  copy_2_end
+
+copy_2_move_4through7:
+	MOVL (CX), R12
+	MOVL -4(CX)(R13*1), R14
+	MOVL R12, (R9)
+	MOVL R14, -4(R9)(R13*1)
+	ADDQ R13, CX
+	ADDQ R13, R9
+	JMP  copy_2_end
+
+copy_2_move_8through16:
+	MOVQ (CX), R12
+	MOVQ -8(CX)(R13*1), R14
+	MOVQ R12, (R9)
+	MOVQ R14, -8(R9)(R13*1)
+	ADDQ R13, CX
+	ADDQ R13, R9
+
+copy_2_end:
+	JMP handle_loop
+
+	// Copy overlapping match
+copy_overlapping_match:
+	ADDQ R13, R11
+
+copy_slow_3:
+	MOVB (CX), R12
+	MOVB R12, (R9)
+	INCQ CX
+	INCQ R9
+	DECQ R13
+	JNZ  copy_slow_3
+
+handle_loop:
+	MOVQ ctx+16(FP), CX
+	DECQ 96(CX)
+	JNS  sequenceDecs_decodeSync_safe_bmi2_main_loop
+
+loop_finished:
+	MOVQ br+8(FP), CX
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)
+
+	// Update the context
+	MOVQ ctx+16(FP), AX
+	MOVQ R11, 136(AX)
+	MOVQ 144(AX), CX
+	SUBQ CX, R10
+	MOVQ R10, 168(AX)
+
+	// Return success
+	MOVQ $0x00000000, ret+24(FP)
+	RET
+
+	// Return with match length error
+sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch:
+	MOVQ 16(SP), AX
+	MOVQ ctx+16(FP), CX
+	MOVQ AX, 216(CX)
+	MOVQ $0x00000001, ret+24(FP)
+	RET
+
+	// Return with match too long error
+sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ $0x00000002, ret+24(FP)
+	RET
+
+	// Return with match offset too long error
+error_match_off_too_big:
+	MOVQ ctx+16(FP), AX
+	MOVQ 8(SP), CX
+	MOVQ CX, 224(AX)
+	MOVQ R11, 136(AX)
+	MOVQ $0x00000003, ret+24(FP)
+	RET
+
+	// Return with not enough literals error
+error_not_enough_literals:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ $0x00000004, ret+24(FP)
+	RET
+
+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
+	// Return with not enough output space error
+error_not_enough_space:
+	MOVQ ctx+16(FP), AX
+	MOVQ 24(SP), CX
+	MOVQ CX, 208(AX)
+	MOVQ 16(SP), CX
+	MOVQ CX, 216(AX)
+	MOVQ R11, 136(AX)
+	MOVQ $0x00000005, ret+24(FP)
+	RET
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
new file mode 100644
index 00000000000..2fb35b788c1
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
@@ -0,0 +1,237 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+package zstd
+
+import (
+	"fmt"
+	"io"
+)
+
+// decode sequences from the stream with the provided history but without dictionary.
+func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
+	return false, nil
+}
+
+// decode sequences from the stream without the provided history.
+func (s *sequenceDecs) decode(seqs []seqVals) error {
+	br := s.br
+
+	// Grab full sizes tables, to avoid bounds checks.
+	llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
+	llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
+	s.seqSize = 0
+	litRemain := len(s.literals)
+
+	maxBlockSize := maxCompressedBlockSize
+	if s.windowSize < maxBlockSize {
+		maxBlockSize = s.windowSize
+	}
+	for i := range seqs {
+		var ll, mo, ml int
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
+			// inlined function:
+			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
+
+			// Final will not read from stream.
+			var llB, mlB, moB uint8
+			ll, llB = llState.final()
+			ml, mlB = mlState.final()
+			mo, moB = ofState.final()
+
+			// extra bits are stored in reverse order.
+			br.fillFast()
+			mo += br.getBits(moB)
+			if s.maxBits > 32 {
+				br.fillFast()
+			}
+			ml += br.getBits(mlB)
+			ll += br.getBits(llB)
+
+			if moB > 1 {
+				s.prevOffset[2] = s.prevOffset[1]
+				s.prevOffset[1] = s.prevOffset[0]
+				s.prevOffset[0] = mo
+			} else {
+				// mo = s.adjustOffset(mo, ll, moB)
+				// Inlined for rather big speedup
+				if ll == 0 {
+					// There is an exception though, when current sequence's literals_length = 0.
+					// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+					// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+					mo++
+				}
+
+				if mo == 0 {
+					mo = s.prevOffset[0]
+				} else {
+					var temp int
+					if mo == 3 {
+						temp = s.prevOffset[0] - 1
+					} else {
+						temp = s.prevOffset[mo]
+					}
+
+					if temp == 0 {
+						// 0 is not valid; input is corrupted; force offset to 1
+						println("WARNING: temp was 0")
+						temp = 1
+					}
+
+					if mo != 1 {
+						s.prevOffset[2] = s.prevOffset[1]
+					}
+					s.prevOffset[1] = s.prevOffset[0]
+					s.prevOffset[0] = temp
+					mo = temp
+				}
+			}
+			br.fillFast()
+		} else {
+			if br.overread() {
+				if debugDecoder {
+					printf("reading sequence %d, exceeded available data\n", i)
+				}
+				return io.ErrUnexpectedEOF
+			}
+			ll, mo, ml = s.next(br, llState, mlState, ofState)
+			br.fill()
+		}
+
+		if debugSequences {
+			println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
+		}
+		// Evaluate.
+		// We might be doing this async, so do it early.
+		if mo == 0 && ml > 0 {
+			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+		}
+		if ml > maxMatchLen {
+			return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
+		}
+		s.seqSize += ll + ml
+		if s.seqSize > maxBlockSize {
+			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+		}
+		litRemain -= ll
+		if litRemain < 0 {
+			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
+		}
+		seqs[i] = seqVals{
+			ll: ll,
+			ml: ml,
+			mo: mo,
+		}
+		if i == len(seqs)-1 {
+			// This is the last sequence, so we shouldn't update state.
+			break
+		}
+
+		// Manually inlined, ~ 5-20% faster
+		// Update all 3 states at once. Approx 20% faster.
+		nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
+		if nBits == 0 {
+			llState = llTable[llState.newState()&maxTableMask]
+			mlState = mlTable[mlState.newState()&maxTableMask]
+			ofState = ofTable[ofState.newState()&maxTableMask]
+		} else {
+			bits := br.get32BitsFast(nBits)
+			lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
+			llState = llTable[(llState.newState()+lowBits)&maxTableMask]
+
+			lowBits = uint16(bits >> (ofState.nbBits() & 31))
+			lowBits &= bitMask[mlState.nbBits()&15]
+			mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
+
+			lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
+			ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
+		}
+	}
+	s.seqSize += litRemain
+	if s.seqSize > maxBlockSize {
+		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+	}
+	err := br.close()
+	if err != nil {
+		printf("Closing sequences: %v, %+v\n", err, *br)
+	}
+	return err
+}
+
+// executeSimple handles cases when a dictionary is not used.
+func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
+	// Ensure we have enough output size...
+	if len(s.out)+s.seqSize > cap(s.out) {
+		addBytes := s.seqSize + len(s.out)
+		s.out = append(s.out, make([]byte, addBytes)...)
+		s.out = s.out[:len(s.out)-addBytes]
+	}
+
+	if debugDecoder {
+		printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
+	}
+
+	var t = len(s.out)
+	out := s.out[:t+s.seqSize]
+
+	for _, seq := range seqs {
+		// Add literals
+		copy(out[t:], s.literals[:seq.ll])
+		t += seq.ll
+		s.literals = s.literals[seq.ll:]
+
+		// Malformed input
+		if seq.mo > t+len(hist) || seq.mo > s.windowSize {
+			return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
+		}
+
+		// Copy from history.
+		if v := seq.mo - t; v > 0 {
+			// v is the start position in history from end.
+			start := len(hist) - v
+			if seq.ml > v {
+				// Some goes into the current block.
+				// Copy remainder of history
+				copy(out[t:], hist[start:])
+				t += v
+				seq.ml -= v
+			} else {
+				copy(out[t:], hist[start:start+seq.ml])
+				t += seq.ml
+				continue
+			}
+		}
+
+		// We must be in the current buffer now
+		if seq.ml > 0 {
+			start := t - seq.mo
+			if seq.ml <= t-start {
+				// No overlap
+				copy(out[t:], out[start:start+seq.ml])
+				t += seq.ml
+			} else {
+				// Overlapping copy
+				// Extend destination slice and copy one byte at the time.
+				src := out[start : start+seq.ml]
+				dst := out[t:]
+				dst = dst[:len(src)]
+				t += len(src)
+				// Destination is the space we just added.
+				for i := range src {
+					dst[i] = src[i]
+				}
+			}
+		}
+	}
+	// Add final literals
+	copy(out[t:], s.literals)
+	if debugDecoder {
+		t += len(s.literals)
+		if t != len(out) {
+			panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
+		}
+	}
+	s.out = out
+
+	return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
index 9e1baad73be..ec13594e89b 100644
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 	var written int64
 	var readHeader bool
 	{
-		var header []byte
-		var n int
-		header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+		header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
 
+		var n int
 		n, r.err = w.Write(header)
 		if r.err != nil {
 			return written, r.err
diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go
index 967f29b3120..29c15c8c4ef 100644
--- a/vendor/github.com/klauspost/compress/zstd/zip.go
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@@ -18,36 +18,58 @@ const ZipMethodWinZip = 93
 // See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
 const ZipMethodPKWare = 20
 
-var zipReaderPool sync.Pool
+// zipReaderPool is the default reader pool.
+var zipReaderPool = sync.Pool{New: func() interface{} {
+	z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1))
+	if err != nil {
+		panic(err)
+	}
+	return z
+}}
 
-// newZipReader cannot be used since we would leak goroutines...
-func newZipReader(r io.Reader) io.ReadCloser {
-	dec, ok := zipReaderPool.Get().(*Decoder)
-	if ok {
-		dec.Reset(r)
-	} else {
-		d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
-		if err != nil {
-			panic(err)
+// newZipReader creates a pooled zip decompressor.
+func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser {
+	pool := &zipReaderPool
+	if len(opts) > 0 {
+		opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...)
+		// Force concurrency 1
+		opts = append(opts, WithDecoderConcurrency(1))
+		// Create our own pool
+		pool = &sync.Pool{}
+	}
+	return func(r io.Reader) io.ReadCloser {
+		dec, ok := pool.Get().(*Decoder)
+		if ok {
+			dec.Reset(r)
+		} else {
+			d, err := NewReader(r, opts...)
+			if err != nil {
+				panic(err)
+			}
+			dec = d
 		}
-		dec = d
+		return &pooledZipReader{dec: dec, pool: pool}
 	}
-	return &pooledZipReader{dec: dec}
 }
 
 type pooledZipReader struct {
-	mu  sync.Mutex // guards Close and Read
-	dec *Decoder
+	mu   sync.Mutex // guards Close and Read
+	pool *sync.Pool
+	dec  *Decoder
 }
 
 func (r *pooledZipReader) Read(p []byte) (n int, err error) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	if r.dec == nil {
-		return 0, errors.New("Read after Close")
+		return 0, errors.New("read after close or EOF")
 	}
 	dec, err := r.dec.Read(p)
-
+	if err == io.EOF {
+		r.dec.Reset(nil)
+		r.pool.Put(r.dec)
+		r.dec = nil
+	}
 	return dec, err
 }
 
@@ -57,7 +79,7 @@ func (r *pooledZipReader) Close() error {
 	var err error
 	if r.dec != nil {
 		err = r.dec.Reset(nil)
-		zipReaderPool.Put(r.dec)
+		r.pool.Put(r.dec)
 		r.dec = nil
 	}
 	return err
@@ -111,12 +133,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
 
 // ZipDecompressor returns a decompressor that can be registered with zip libraries.
 // See ZipCompressor for example.
-func ZipDecompressor() func(r io.Reader) io.ReadCloser {
-	return func(r io.Reader) io.ReadCloser {
-		d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
-		if err != nil {
-			panic(err)
-		}
-		return d.IOReadCloser()
-	}
+// Options can be specified. WithDecoderConcurrency(1) is forced,
+// and by default a 128MB maximum decompression window is specified.
+// The window size can be overridden if required.
+func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser {
+	return newZipReader(opts...)
 }
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index ef1d49a009c..4be7cc73671 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -9,7 +9,6 @@ import (
 	"errors"
 	"log"
 	"math"
-	"math/bits"
 )
 
 // enable debug printing
@@ -36,8 +35,8 @@ const forcePreDef = false
 // zstdMinMatch is the minimum zstd match length.
 const zstdMinMatch = 3
 
-// Reset the buffer offset when reaching this.
-const bufferReset = math.MaxInt32 - MaxWindowSize
+// fcsUnknown is used for unknown frame content size.
+const fcsUnknown = math.MaxUint64
 
 var (
 	// ErrReservedBlockType is returned when a reserved block type is found.
@@ -52,6 +51,10 @@ var (
 	// Typically returned on invalid input.
 	ErrBlockTooSmall = errors.New("block too small")
 
+	// ErrUnexpectedBlockSize is returned when a block has unexpected size.
+	// Typically returned on invalid input.
+	ErrUnexpectedBlockSize = errors.New("unexpected block size")
+
 	// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
 	// Typically this indicates wrong or corrupted input.
 	ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
@@ -68,13 +71,16 @@ var (
 	ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
 
 	// ErrUnknownDictionary is returned if the dictionary ID is unknown.
-	// For the time being dictionaries are not supported.
 	ErrUnknownDictionary = errors.New("unknown dictionary")
 
 	// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
 	// This is only returned if SingleSegment is specified on the frame.
 	ErrFrameSizeExceeded = errors.New("frame size exceeded")
 
+	// ErrFrameSizeMismatch is returned if the stated frame size does not match the expected size.
+	// This is only returned if SingleSegment is specified on the frame.
+	ErrFrameSizeMismatch = errors.New("frame size does not match size on stream")
+
 	// ErrCRCMismatch is returned if CRC mismatches.
 	ErrCRCMismatch = errors.New("CRC check failed")
 
@@ -99,49 +105,12 @@ func printf(format string, a ...interface{}) {
 	}
 }
 
-// matchLenFast does matching, but will not match the last up to 7 bytes.
-func matchLenFast(a, b []byte) int {
-	endI := len(a) & (math.MaxInt32 - 7)
-	for i := 0; i < endI; i += 8 {
-		if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-			return i + bits.TrailingZeros64(diff)>>3
-		}
-	}
-	return endI
-}
-
-// matchLen returns the maximum length.
-// a must be the shortest of the two.
-// The function also returns whether all bytes matched.
-func matchLen(a, b []byte) int {
-	b = b[:len(a)]
-	for i := 0; i < len(a)-7; i += 8 {
-		if diff := load64(a, i) ^ load64(b, i); diff != 0 {
-			return i + (bits.TrailingZeros64(diff) >> 3)
-		}
-	}
-
-	checked := (len(a) >> 3) << 3
-	a = a[checked:]
-	b = b[checked:]
-	for i := range a {
-		if a[i] != b[i] {
-			return i + checked
-		}
-	}
-	return len(a) + checked
-}
-
 func load3232(b []byte, i int32) uint32 {
-	return binary.LittleEndian.Uint32(b[i:])
+	return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:])
 }
 
 func load6432(b []byte, i int32) uint64 {
-	return binary.LittleEndian.Uint64(b[i:])
-}
-
-func load64(b []byte, i int) uint64 {
-	return binary.LittleEndian.Uint64(b[i:])
+	return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:])
 }
 
 type byter interface {
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go
index ffff4b6d186..36b0aeb8f1f 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/config.go
@@ -48,6 +48,17 @@ type ImageConfig struct {
 
 	// StopSignal contains the system call signal that will be sent to the container to exit.
 	StopSignal string `json:"StopSignal,omitempty"`
+
+	// ArgsEscaped
+	//
+	// Deprecated: This field is present only for legacy compatibility with
+	// Docker and should not be used by new image builders.  It is used by Docker
+	// for Windows images to indicate that the `Entrypoint` or `Cmd` or both,
+	// contains only a single element array, that is a pre-escaped, and combined
+	// into a single string `CommandLine`. If `true` the value in `Entrypoint` or
+	// `Cmd` should be used as-is to avoid double escaping.
+	// https://github.com/opencontainers/image-spec/pull/892
+	ArgsEscaped bool `json:"ArgsEscaped,omitempty"`
 }
 
 // RootFS describes a layer content addresses
@@ -86,22 +97,8 @@ type Image struct {
 	// Author defines the name and/or email address of the person or entity which created and is responsible for maintaining the image.
 	Author string `json:"author,omitempty"`
 
-	// Architecture is the CPU architecture which the binaries in this image are built to run on.
-	Architecture string `json:"architecture"`
-
-	// Variant is the variant of the specified CPU architecture which image binaries are intended to run on.
-	Variant string `json:"variant,omitempty"`
-
-	// OS is the name of the operating system which the image is built to run on.
-	OS string `json:"os"`
-
-	// OSVersion is an optional field specifying the operating system
-	// version, for example on Windows `10.0.14393.1066`.
-	OSVersion string `json:"os.version,omitempty"`
-
-	// OSFeatures is an optional field specifying an array of strings,
-	// each listing a required OS feature (for example on Windows `win32k`).
-	OSFeatures []string `json:"os.features,omitempty"`
+	// Platform describes the platform which the image in the manifest runs on.
+	Platform
 
 	// Config defines the execution parameters which should be used as a base when running a container using the image.
 	Config ImageConfig `json:"config,omitempty"`
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go
index 6e442a0853f..1881b11814b 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/descriptor.go
@@ -1,4 +1,4 @@
-// Copyright 2016 The Linux Foundation
+// Copyright 2016-2022 The Linux Foundation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ import digest "github.com/opencontainers/go-digest"
 // when marshalled to JSON.
 type Descriptor struct {
 	// MediaType is the media type of the object this schema refers to.
-	MediaType string `json:"mediaType,omitempty"`
+	MediaType string `json:"mediaType"`
 
 	// Digest is the digest of the targeted content.
 	Digest digest.Digest `json:"digest"`
@@ -35,16 +35,24 @@ type Descriptor struct {
 	// Annotations contains arbitrary metadata relating to the targeted content.
 	Annotations map[string]string `json:"annotations,omitempty"`
 
+	// Data is an embedding of the targeted content. This is encoded as a base64
+	// string when marshalled to JSON (automatically, by encoding/json). If
+	// present, Data can be used directly to avoid fetching the targeted content.
+	Data []byte `json:"data,omitempty"`
+
 	// Platform describes the platform which the image in the manifest runs on.
 	//
 	// This should only be used when referring to a manifest.
 	Platform *Platform `json:"platform,omitempty"`
+
+	// ArtifactType is the IANA media type of this artifact.
+	ArtifactType string `json:"artifactType,omitempty"`
 }
 
 // Platform describes the platform which the image in the manifest runs on.
 type Platform struct {
 	// Architecture field specifies the CPU architecture, for example
-	// `amd64` or `ppc64`.
+	// `amd64` or `ppc64le`.
 	Architecture string `json:"architecture"`
 
 	// OS specifies the operating system, for example `linux` or `windows`.
@@ -62,3 +70,11 @@ type Platform struct {
 	// example `v7` to specify ARMv7 when architecture is `arm`.
 	Variant string `json:"variant,omitempty"`
 }
+
+// DescriptorEmptyJSON is the descriptor of a blob with content of `{}`.
+var DescriptorEmptyJSON = Descriptor{
+	MediaType: MediaTypeEmptyJSON,
+	Digest:    `sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a`,
+	Size:      2,
+	Data:      []byte(`{}`),
+}
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go
index 82da6c6a898..e2bed9d4e46 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/index.go
@@ -21,12 +21,18 @@ import "github.com/opencontainers/image-spec/specs-go"
 type Index struct {
 	specs.Versioned
 
-	// MediaType specificies the type of this document data structure e.g. `application/vnd.oci.image.index.v1+json`
+	// MediaType specifies the type of this document data structure e.g. `application/vnd.oci.image.index.v1+json`
 	MediaType string `json:"mediaType,omitempty"`
 
+	// ArtifactType specifies the IANA media type of artifact when the manifest is used for an artifact.
+	ArtifactType string `json:"artifactType,omitempty"`
+
 	// Manifests references platform specific manifests.
 	Manifests []Descriptor `json:"manifests"`
 
+	// Subject is an optional link from the image manifest to another manifest forming an association between the image manifest and the other manifest.
+	Subject *Descriptor `json:"subject,omitempty"`
+
 	// Annotations contains arbitrary metadata for the image index.
 	Annotations map[string]string `json:"annotations,omitempty"`
 }
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go
index fc79e9e0d14..c5503cb3053 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/layout.go
@@ -15,10 +15,14 @@
 package v1
 
 const (
-	// ImageLayoutFile is the file name of oci image layout file
+	// ImageLayoutFile is the file name containing ImageLayout in an OCI Image Layout
 	ImageLayoutFile = "oci-layout"
 	// ImageLayoutVersion is the version of ImageLayout
 	ImageLayoutVersion = "1.0.0"
+	// ImageIndexFile is the file name of the entry point for references and descriptors in an OCI Image Layout
+	ImageIndexFile = "index.json"
+	// ImageBlobsDir is the directory name containing content addressable blobs in an OCI Image Layout
+	ImageBlobsDir = "blobs"
 )
 
 // ImageLayout is the structure in the "oci-layout" file, found in the root
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go
index d72d15ce4bb..26fec52a6bc 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/manifest.go
@@ -1,4 +1,4 @@
-// Copyright 2016 The Linux Foundation
+// Copyright 2016-2022 The Linux Foundation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -20,9 +20,12 @@ import "github.com/opencontainers/image-spec/specs-go"
 type Manifest struct {
 	specs.Versioned
 
-	// MediaType specificies the type of this document data structure e.g. `application/vnd.oci.image.manifest.v1+json`
+	// MediaType specifies the type of this document data structure e.g. `application/vnd.oci.image.manifest.v1+json`
 	MediaType string `json:"mediaType,omitempty"`
 
+	// ArtifactType specifies the IANA media type of artifact when the manifest is used for an artifact.
+	ArtifactType string `json:"artifactType,omitempty"`
+
 	// Config references a configuration object for a container, by digest.
 	// The referenced configuration object is a JSON blob that the runtime uses to set up the container.
 	Config Descriptor `json:"config"`
@@ -30,6 +33,9 @@ type Manifest struct {
 	// Layers is an indexed list of layers referenced by the manifest.
 	Layers []Descriptor `json:"layers"`
 
+	// Subject is an optional link from the image manifest to another manifest forming an association between the image manifest and the other manifest.
+	Subject *Descriptor `json:"subject,omitempty"`
+
 	// Annotations contains arbitrary metadata for the image manifest.
 	Annotations map[string]string `json:"annotations,omitempty"`
 }
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go b/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go
index 4f35ac134fe..ce8313e7962 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/v1/mediatype.go
@@ -21,12 +21,20 @@ const (
 	// MediaTypeLayoutHeader specifies the media type for the oci-layout.
 	MediaTypeLayoutHeader = "application/vnd.oci.layout.header.v1+json"
 
+	// MediaTypeImageIndex specifies the media type for an image index.
+	MediaTypeImageIndex = "application/vnd.oci.image.index.v1+json"
+
 	// MediaTypeImageManifest specifies the media type for an image manifest.
 	MediaTypeImageManifest = "application/vnd.oci.image.manifest.v1+json"
 
-	// MediaTypeImageIndex specifies the media type for an image index.
-	MediaTypeImageIndex = "application/vnd.oci.image.index.v1+json"
+	// MediaTypeImageConfig specifies the media type for the image configuration.
+	MediaTypeImageConfig = "application/vnd.oci.image.config.v1+json"
+
+	// MediaTypeEmptyJSON specifies the media type for an unused blob containing the value "{}".
+	MediaTypeEmptyJSON = "application/vnd.oci.empty.v1+json"
+)
 
+const (
 	// MediaTypeImageLayer is the media type used for layers referenced by the manifest.
 	MediaTypeImageLayer = "application/vnd.oci.image.layer.v1.tar"
 
@@ -37,21 +45,41 @@ const (
 	// MediaTypeImageLayerZstd is the media type used for zstd compressed
 	// layers referenced by the manifest.
 	MediaTypeImageLayerZstd = "application/vnd.oci.image.layer.v1.tar+zstd"
+)
 
+// Non-distributable layer media-types.
+//
+// Deprecated: Non-distributable layers are deprecated, and not recommended
+// for future use. Implementations SHOULD NOT produce new non-distributable
+// layers.
+// https://github.com/opencontainers/image-spec/pull/965
+const (
 	// MediaTypeImageLayerNonDistributable is the media type for layers referenced by
 	// the manifest but with distribution restrictions.
+	//
+	// Deprecated: Non-distributable layers are deprecated, and not recommended
+	// for future use. Implementations SHOULD NOT produce new non-distributable
+	// layers.
+	// https://github.com/opencontainers/image-spec/pull/965
 	MediaTypeImageLayerNonDistributable = "application/vnd.oci.image.layer.nondistributable.v1.tar"
 
 	// MediaTypeImageLayerNonDistributableGzip is the media type for
 	// gzipped layers referenced by the manifest but with distribution
 	// restrictions.
+	//
+	// Deprecated: Non-distributable layers are deprecated, and not recommended
+	// for future use. Implementations SHOULD NOT produce new non-distributable
+	// layers.
+	// https://github.com/opencontainers/image-spec/pull/965
 	MediaTypeImageLayerNonDistributableGzip = "application/vnd.oci.image.layer.nondistributable.v1.tar+gzip"
 
 	// MediaTypeImageLayerNonDistributableZstd is the media type for zstd
 	// compressed layers referenced by the manifest but with distribution
 	// restrictions.
+	//
+	// Deprecated: Non-distributable layers are deprecated, and not recommended
+	// for future use. Implementations SHOULD NOT produce new non-distributable
+	// layers.
+	// https://github.com/opencontainers/image-spec/pull/965
 	MediaTypeImageLayerNonDistributableZstd = "application/vnd.oci.image.layer.nondistributable.v1.tar+zstd"
-
-	// MediaTypeImageConfig specifies the media type for the image configuration.
-	MediaTypeImageConfig = "application/vnd.oci.image.config.v1+json"
 )
diff --git a/vendor/github.com/opencontainers/image-spec/specs-go/version.go b/vendor/github.com/opencontainers/image-spec/specs-go/version.go
index 31f99cf645c..6d01f6e7175 100644
--- a/vendor/github.com/opencontainers/image-spec/specs-go/version.go
+++ b/vendor/github.com/opencontainers/image-spec/specs-go/version.go
@@ -20,12 +20,12 @@ const (
 	// VersionMajor is for an API incompatible changes
 	VersionMajor = 1
 	// VersionMinor is for functionality in a backwards-compatible manner
-	VersionMinor = 0
+	VersionMinor = 1
 	// VersionPatch is for backwards-compatible bug fixes
-	VersionPatch = 2
+	VersionPatch = 0
 
 	// VersionDev indicates development branch. Releases will be empty string.
-	VersionDev = "-dev"
+	VersionDev = "-rc.6"
 )
 
 // Version is the specification version that the package types support.
diff --git a/vendor/google.golang.org/genproto/LICENSE b/vendor/google.golang.org/genproto/googleapis/rpc/LICENSE
similarity index 100%
rename from vendor/google.golang.org/genproto/LICENSE
rename to vendor/google.golang.org/genproto/googleapis/rpc/LICENSE
diff --git a/vendor/google.golang.org/grpc/README.md b/vendor/google.golang.org/grpc/README.md
index 0e6ae69a584..ab0fbb79b86 100644
--- a/vendor/google.golang.org/grpc/README.md
+++ b/vendor/google.golang.org/grpc/README.md
@@ -1,8 +1,8 @@
 # gRPC-Go
 
-[![Build Status](https://travis-ci.org/grpc/grpc-go.svg)](https://travis-ci.org/grpc/grpc-go)
 [![GoDoc](https://pkg.go.dev/badge/google.golang.org/grpc)][API]
 [![GoReportCard](https://goreportcard.com/badge/grpc/grpc-go)](https://goreportcard.com/report/github.com/grpc/grpc-go)
+[![codecov](https://codecov.io/gh/grpc/grpc-go/graph/badge.svg)](https://codecov.io/gh/grpc/grpc-go)
 
 The [Go][] implementation of [gRPC][]: A high performance, open source, general
 RPC framework that puts mobile and HTTP/2 first. For more information see the
@@ -14,21 +14,14 @@ RPC framework that puts mobile and HTTP/2 first. For more information see the
 
 ## Installation
 
-With [Go module][] support (Go 1.11+), simply add the following import
+Simply add the following import to your code, and then `go [build|run|test]`
+will automatically fetch the necessary dependencies:
+
 
 ```go
 import "google.golang.org/grpc"
 ```
 
-to your code, and then `go [build|run|test]` will automatically fetch the
-necessary dependencies.
-
-Otherwise, to install the `grpc-go` package, run the following command:
-
-```console
-$ go get -u google.golang.org/grpc
-```
-
 > **Note:** If you are trying to access `grpc-go` from **China**, see the
 > [FAQ](#FAQ) below.
 
@@ -56,15 +49,6 @@ To build Go code, there are several options:
 
 - Set up a VPN and access google.golang.org through that.
 
-- Without Go module support: `git clone` the repo manually:
-
-  ```sh
-  git clone https://github.com/grpc/grpc-go.git $GOPATH/src/google.golang.org/grpc
-  ```
-
-  You will need to do the same for all of grpc's dependencies in `golang.org`,
-  e.g. `golang.org/x/net`.
-
 - With Go module support: it is possible to use the `replace` feature of `go
   mod` to create aliases for golang.org packages.  In your project's directory:
 
@@ -76,33 +60,13 @@ To build Go code, there are several options:
   ```
 
   Again, this will need to be done for all transitive dependencies hosted on
-  golang.org as well. For details, refer to [golang/go issue #28652](https://github.com/golang/go/issues/28652).
+  golang.org as well. For details, refer to [golang/go issue
+  #28652](https://github.com/golang/go/issues/28652).
 
 ### Compiling error, undefined: grpc.SupportPackageIsVersion
 
-#### If you are using Go modules:
-
-Ensure your gRPC-Go version is `require`d at the appropriate version in
-the same module containing the generated `.pb.go` files.  For example,
-`SupportPackageIsVersion6` needs `v1.27.0`, so in your `go.mod` file:
-
-```go
-module <your module name>
-
-require (
-    google.golang.org/grpc v1.27.0
-)
-```
-
-#### If you are *not* using Go modules:
-
-Update the `proto` package, gRPC package, and rebuild the `.proto` files:
-
-```sh
-go get -u github.com/golang/protobuf/{proto,protoc-gen-go}
-go get -u google.golang.org/grpc
-protoc --go_out=plugins=grpc:. *.proto
-```
+Please update to the latest version of gRPC-Go using
+`go get google.golang.org/grpc`.
 
 ### How to turn on logging
 
@@ -121,9 +85,11 @@ possible reasons, including:
  1. mis-configured transport credentials, connection failed on handshaking
  1. bytes disrupted, possibly by a proxy in between
  1. server shutdown
- 1. Keepalive parameters caused connection shutdown, for example if you have configured
-    your server to terminate connections regularly to [trigger DNS lookups](https://github.com/grpc/grpc-go/issues/3170#issuecomment-552517779).
-    If this is the case, you may want to increase your [MaxConnectionAgeGrace](https://pkg.go.dev/google.golang.org/grpc/keepalive?tab=doc#ServerParameters),
+ 1. Keepalive parameters caused connection shutdown, for example if you have
+    configured your server to terminate connections regularly to [trigger DNS
+    lookups](https://github.com/grpc/grpc-go/issues/3170#issuecomment-552517779).
+    If this is the case, you may want to increase your
+    [MaxConnectionAgeGrace](https://pkg.go.dev/google.golang.org/grpc/keepalive?tab=doc#ServerParameters),
     to allow longer RPC calls to finish.
 
 It can be tricky to debug this because the error happens on the client side but
diff --git a/vendor/google.golang.org/grpc/attributes/attributes.go b/vendor/google.golang.org/grpc/attributes/attributes.go
index 3efca459149..52d530d7ad0 100644
--- a/vendor/google.golang.org/grpc/attributes/attributes.go
+++ b/vendor/google.golang.org/grpc/attributes/attributes.go
@@ -34,26 +34,26 @@ import (
 // key/value pairs.  Keys must be hashable, and users should define their own
 // types for keys.  Values should not be modified after they are added to an
 // Attributes or if they were received from one.  If values implement 'Equal(o
-// interface{}) bool', it will be called by (*Attributes).Equal to determine
-// whether two values with the same key should be considered equal.
+// any) bool', it will be called by (*Attributes).Equal to determine whether
+// two values with the same key should be considered equal.
 type Attributes struct {
-	m map[interface{}]interface{}
+	m map[any]any
 }
 
 // New returns a new Attributes containing the key/value pair.
-func New(key, value interface{}) *Attributes {
-	return &Attributes{m: map[interface{}]interface{}{key: value}}
+func New(key, value any) *Attributes {
+	return &Attributes{m: map[any]any{key: value}}
 }
 
 // WithValue returns a new Attributes containing the previous keys and values
 // and the new key/value pair.  If the same key appears multiple times, the
 // last value overwrites all previous values for that key.  To remove an
 // existing key, use a nil value.  value should not be modified later.
-func (a *Attributes) WithValue(key, value interface{}) *Attributes {
+func (a *Attributes) WithValue(key, value any) *Attributes {
 	if a == nil {
 		return New(key, value)
 	}
-	n := &Attributes{m: make(map[interface{}]interface{}, len(a.m)+1)}
+	n := &Attributes{m: make(map[any]any, len(a.m)+1)}
 	for k, v := range a.m {
 		n.m[k] = v
 	}
@@ -63,20 +63,19 @@ func (a *Attributes) WithValue(key, value interface{}) *Attributes {
 
 // Value returns the value associated with these attributes for key, or nil if
 // no value is associated with key.  The returned value should not be modified.
-func (a *Attributes) Value(key interface{}) interface{} {
+func (a *Attributes) Value(key any) any {
 	if a == nil {
 		return nil
 	}
 	return a.m[key]
 }
 
-// Equal returns whether a and o are equivalent.  If 'Equal(o interface{})
-// bool' is implemented for a value in the attributes, it is called to
-// determine if the value matches the one stored in the other attributes.  If
-// Equal is not implemented, standard equality is used to determine if the two
-// values are equal. Note that some types (e.g. maps) aren't comparable by
-// default, so they must be wrapped in a struct, or in an alias type, with Equal
-// defined.
+// Equal returns whether a and o are equivalent.  If 'Equal(o any) bool' is
+// implemented for a value in the attributes, it is called to determine if the
+// value matches the one stored in the other attributes.  If Equal is not
+// implemented, standard equality is used to determine if the two values are
+// equal. Note that some types (e.g. maps) aren't comparable by default, so
+// they must be wrapped in a struct, or in an alias type, with Equal defined.
 func (a *Attributes) Equal(o *Attributes) bool {
 	if a == nil && o == nil {
 		return true
@@ -93,7 +92,7 @@ func (a *Attributes) Equal(o *Attributes) bool {
 			// o missing element of a
 			return false
 		}
-		if eq, ok := v.(interface{ Equal(o interface{}) bool }); ok {
+		if eq, ok := v.(interface{ Equal(o any) bool }); ok {
 			if !eq.Equal(ov) {
 				return false
 			}
@@ -112,19 +111,31 @@ func (a *Attributes) String() string {
 	sb.WriteString("{")
 	first := true
 	for k, v := range a.m {
-		var key, val string
-		if str, ok := k.(interface{ String() string }); ok {
-			key = str.String()
-		}
-		if str, ok := v.(interface{ String() string }); ok {
-			val = str.String()
-		}
 		if !first {
 			sb.WriteString(", ")
 		}
-		sb.WriteString(fmt.Sprintf("%q: %q, ", key, val))
+		sb.WriteString(fmt.Sprintf("%q: %q ", str(k), str(v)))
 		first = false
 	}
 	sb.WriteString("}")
 	return sb.String()
 }
+
+func str(x any) (s string) {
+	if v, ok := x.(fmt.Stringer); ok {
+		return fmt.Sprint(v)
+	} else if v, ok := x.(string); ok {
+		return v
+	}
+	return fmt.Sprintf("<%p>", x)
+}
+
+// MarshalJSON helps implement the json.Marshaler interface, thereby rendering
+// the Attributes correctly when printing (via pretty.JSON) structs containing
+// Attributes as fields.
+//
+// Is it impossible to unmarshal attributes from a JSON representation and this
+// method is meant only for debugging purposes.
+func (a *Attributes) MarshalJSON() ([]byte, error) {
+	return []byte(a.String()), nil
+}
diff --git a/vendor/google.golang.org/grpc/balancer/balancer.go b/vendor/google.golang.org/grpc/balancer/balancer.go
index 8f00523c0e2..d79560a2e26 100644
--- a/vendor/google.golang.org/grpc/balancer/balancer.go
+++ b/vendor/google.golang.org/grpc/balancer/balancer.go
@@ -30,6 +30,7 @@ import (
 	"google.golang.org/grpc/channelz"
 	"google.golang.org/grpc/connectivity"
 	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/grpclog"
 	"google.golang.org/grpc/internal"
 	"google.golang.org/grpc/metadata"
 	"google.golang.org/grpc/resolver"
@@ -39,6 +40,8 @@ import (
 var (
 	// m is a map from name to balancer builder.
 	m = make(map[string]Builder)
+
+	logger = grpclog.Component("balancer")
 )
 
 // Register registers the balancer builder to the balancer map. b.Name
@@ -51,6 +54,12 @@ var (
 // an init() function), and is not thread-safe. If multiple Balancers are
 // registered with the same name, the one registered last will take effect.
 func Register(b Builder) {
+	if strings.ToLower(b.Name()) != b.Name() {
+		// TODO: Skip the use of strings.ToLower() to index the map after v1.59
+		// is released to switch to case sensitive balancer registry. Also,
+		// remove this warning and update the docstrings for Register and Get.
+		logger.Warningf("Balancer registered with name %q. grpc-go will be switching to case sensitive balancer registries soon", b.Name())
+	}
 	m[strings.ToLower(b.Name())] = b
 }
 
@@ -70,6 +79,12 @@ func init() {
 // Note that the compare is done in a case-insensitive fashion.
 // If no builder is register with the name, nil will be returned.
 func Get(name string) Builder {
+	if strings.ToLower(name) != name {
+		// TODO: Skip the use of strings.ToLower() to index the map after v1.59
+		// is released to switch to case sensitive balancer registry. Also,
+		// remove this warning and update the docstrings for Register and Get.
+		logger.Warningf("Balancer retrieved for name %q. grpc-go will be switching to case sensitive balancer registries soon", name)
+	}
 	if b, ok := m[strings.ToLower(name)]; ok {
 		return b
 	}
@@ -105,8 +120,8 @@ type SubConn interface {
 	//
 	// This will trigger a state transition for the SubConn.
 	//
-	// Deprecated: This method is now part of the ClientConn interface and will
-	// eventually be removed from here.
+	// Deprecated: this method will be removed.  Create new SubConns for new
+	// addresses instead.
 	UpdateAddresses([]resolver.Address)
 	// Connect starts the connecting for this SubConn.
 	Connect()
@@ -115,6 +130,13 @@ type SubConn interface {
 	// creates a new one and returns it.  Returns a close function which must
 	// be called when the Producer is no longer needed.
 	GetOrBuildProducer(ProducerBuilder) (p Producer, close func())
+	// Shutdown shuts down the SubConn gracefully.  Any started RPCs will be
+	// allowed to complete.  No future calls should be made on the SubConn.
+	// One final state update will be delivered to the StateListener (or
+	// UpdateSubConnState; deprecated) with ConnectivityState of Shutdown to
+	// indicate the shutdown operation.  This may be delivered before
+	// in-progress RPCs are complete and the actual connection is closed.
+	Shutdown()
 }
 
 // NewSubConnOptions contains options to create new SubConn.
@@ -129,6 +151,11 @@ type NewSubConnOptions struct {
 	// HealthCheckEnabled indicates whether health check service should be
 	// enabled on this SubConn
 	HealthCheckEnabled bool
+	// StateListener is called when the state of the subconn changes.  If nil,
+	// Balancer.UpdateSubConnState will be called instead.  Will never be
+	// invoked until after Connect() is called on the SubConn created with
+	// these options.
+	StateListener func(SubConnState)
 }
 
 // State contains the balancer's state relevant to the gRPC ClientConn.
@@ -150,16 +177,24 @@ type ClientConn interface {
 	// NewSubConn is called by balancer to create a new SubConn.
 	// It doesn't block and wait for the connections to be established.
 	// Behaviors of the SubConn can be controlled by options.
+	//
+	// Deprecated: please be aware that in a future version, SubConns will only
+	// support one address per SubConn.
 	NewSubConn([]resolver.Address, NewSubConnOptions) (SubConn, error)
 	// RemoveSubConn removes the SubConn from ClientConn.
 	// The SubConn will be shutdown.
+	//
+	// Deprecated: use SubConn.Shutdown instead.
 	RemoveSubConn(SubConn)
 	// UpdateAddresses updates the addresses used in the passed in SubConn.
 	// gRPC checks if the currently connected address is still in the new list.
 	// If so, the connection will be kept. Else, the connection will be
 	// gracefully closed, and a new connection will be created.
 	//
-	// This will trigger a state transition for the SubConn.
+	// This may trigger a state transition for the SubConn.
+	//
+	// Deprecated: this method will be removed.  Create new SubConns for new
+	// addresses instead.
 	UpdateAddresses(SubConn, []resolver.Address)
 
 	// UpdateState notifies gRPC that the balancer's internal state has
@@ -250,7 +285,7 @@ type DoneInfo struct {
 	// trailing metadata.
 	//
 	// The only supported type now is *orca_v3.LoadReport.
-	ServerLoad interface{}
+	ServerLoad any
 }
 
 var (
@@ -343,9 +378,13 @@ type Balancer interface {
 	ResolverError(error)
 	// UpdateSubConnState is called by gRPC when the state of a SubConn
 	// changes.
+	//
+	// Deprecated: Use NewSubConnOptions.StateListener when creating the
+	// SubConn instead.
 	UpdateSubConnState(SubConn, SubConnState)
-	// Close closes the balancer. The balancer is not required to call
-	// ClientConn.RemoveSubConn for its existing SubConns.
+	// Close closes the balancer. The balancer is not currently required to
+	// call SubConn.Shutdown for its existing SubConns; however, this will be
+	// required in a future release, so it is recommended.
 	Close()
 }
 
@@ -390,15 +429,14 @@ var ErrBadResolverState = errors.New("bad resolver state")
 type ProducerBuilder interface {
 	// Build creates a Producer.  The first parameter is always a
 	// grpc.ClientConnInterface (a type to allow creating RPCs/streams on the
-	// associated SubConn), but is declared as interface{} to avoid a
-	// dependency cycle.  Should also return a close function that will be
-	// called when all references to the Producer have been given up.
-	Build(grpcClientConnInterface interface{}) (p Producer, close func())
+	// associated SubConn), but is declared as `any` to avoid a dependency
+	// cycle.  Should also return a close function that will be called when all
+	// references to the Producer have been given up.
+	Build(grpcClientConnInterface any) (p Producer, close func())
 }
 
 // A Producer is a type shared among potentially many consumers.  It is
 // associated with a SubConn, and an implementation will typically contain
 // other methods to provide additional functionality, e.g. configuration or
 // subscription registration.
-type Producer interface {
-}
+type Producer any
diff --git a/vendor/google.golang.org/grpc/balancer/base/balancer.go b/vendor/google.golang.org/grpc/balancer/base/balancer.go
index 3929c26d31e..a7f1eeec8e6 100644
--- a/vendor/google.golang.org/grpc/balancer/base/balancer.go
+++ b/vendor/google.golang.org/grpc/balancer/base/balancer.go
@@ -105,7 +105,12 @@ func (b *baseBalancer) UpdateClientConnState(s balancer.ClientConnState) error {
 		addrsSet.Set(a, nil)
 		if _, ok := b.subConns.Get(a); !ok {
 			// a is a new address (not existing in b.subConns).
-			sc, err := b.cc.NewSubConn([]resolver.Address{a}, balancer.NewSubConnOptions{HealthCheckEnabled: b.config.HealthCheck})
+			var sc balancer.SubConn
+			opts := balancer.NewSubConnOptions{
+				HealthCheckEnabled: b.config.HealthCheck,
+				StateListener:      func(scs balancer.SubConnState) { b.updateSubConnState(sc, scs) },
+			}
+			sc, err := b.cc.NewSubConn([]resolver.Address{a}, opts)
 			if err != nil {
 				logger.Warningf("base.baseBalancer: failed to create new SubConn: %v", err)
 				continue
@@ -121,10 +126,10 @@ func (b *baseBalancer) UpdateClientConnState(s balancer.ClientConnState) error {
 		sc := sci.(balancer.SubConn)
 		// a was removed by resolver.
 		if _, ok := addrsSet.Get(a); !ok {
-			b.cc.RemoveSubConn(sc)
+			sc.Shutdown()
 			b.subConns.Delete(a)
 			// Keep the state of this sc in b.scStates until sc's state becomes Shutdown.
-			// The entry will be deleted in UpdateSubConnState.
+			// The entry will be deleted in updateSubConnState.
 		}
 	}
 	// If resolver state contains no addresses, return an error so ClientConn
@@ -177,7 +182,12 @@ func (b *baseBalancer) regeneratePicker() {
 	b.picker = b.pickerBuilder.Build(PickerBuildInfo{ReadySCs: readySCs})
 }
 
+// UpdateSubConnState is a nop because a StateListener is always set in NewSubConn.
 func (b *baseBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
+	logger.Errorf("base.baseBalancer: UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
+}
+
+func (b *baseBalancer) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
 	s := state.ConnectivityState
 	if logger.V(2) {
 		logger.Infof("base.baseBalancer: handle SubConn state change: %p, %v", sc, s)
@@ -204,8 +214,8 @@ func (b *baseBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.Su
 	case connectivity.Idle:
 		sc.Connect()
 	case connectivity.Shutdown:
-		// When an address was removed by resolver, b called RemoveSubConn but
-		// kept the sc's state in scStates. Remove state for this sc here.
+		// When an address was removed by resolver, b called Shutdown but kept
+		// the sc's state in scStates. Remove state for this sc here.
 		delete(b.scStates, sc)
 	case connectivity.TransientFailure:
 		// Save error to be reported via picker.
@@ -226,7 +236,7 @@ func (b *baseBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.Su
 }
 
 // Close is a nop because base balancer doesn't have internal state to clean up,
-// and it doesn't need to call RemoveSubConn for the SubConns.
+// and it doesn't need to call Shutdown for the SubConns.
 func (b *baseBalancer) Close() {
 }
 
diff --git a/vendor/google.golang.org/grpc/balancer_conn_wrappers.go b/vendor/google.golang.org/grpc/balancer_wrapper.go
similarity index 52%
rename from vendor/google.golang.org/grpc/balancer_conn_wrappers.go
rename to vendor/google.golang.org/grpc/balancer_wrapper.go
index 04b9ad41169..b5e30cff021 100644
--- a/vendor/google.golang.org/grpc/balancer_conn_wrappers.go
+++ b/vendor/google.golang.org/grpc/balancer_wrapper.go
@@ -32,21 +32,13 @@ import (
 	"google.golang.org/grpc/resolver"
 )
 
-type ccbMode int
-
-const (
-	ccbModeActive = iota
-	ccbModeIdle
-	ccbModeClosed
-	ccbModeExitingIdle
-)
-
 // ccBalancerWrapper sits between the ClientConn and the Balancer.
 //
 // ccBalancerWrapper implements methods corresponding to the ones on the
 // balancer.Balancer interface. The ClientConn is free to call these methods
 // concurrently and the ccBalancerWrapper ensures that calls from the ClientConn
-// to the Balancer happen synchronously and in order.
+// to the Balancer happen in order by performing them in the serializer, without
+// any mutexes held.
 //
 // ccBalancerWrapper also implements the balancer.ClientConn interface and is
 // passed to the Balancer implementations. It invokes unexported methods on the
@@ -57,99 +49,75 @@ const (
 type ccBalancerWrapper struct {
 	// The following fields are initialized when the wrapper is created and are
 	// read-only afterwards, and therefore can be accessed without a mutex.
-	cc   *ClientConn
-	opts balancer.BuildOptions
+	cc               *ClientConn
+	opts             balancer.BuildOptions
+	serializer       *grpcsync.CallbackSerializer
+	serializerCancel context.CancelFunc
 
-	// Outgoing (gRPC --> balancer) calls are guaranteed to execute in a
-	// mutually exclusive manner as they are scheduled in the serializer. Fields
-	// accessed *only* in these serializer callbacks, can therefore be accessed
-	// without a mutex.
-	balancer        *gracefulswitch.Balancer
+	// The following fields are only accessed within the serializer or during
+	// initialization.
 	curBalancerName string
+	balancer        *gracefulswitch.Balancer
 
-	// mu guards access to the below fields. Access to the serializer and its
-	// cancel function needs to be mutex protected because they are overwritten
-	// when the wrapper exits idle mode.
-	mu               sync.Mutex
-	serializer       *grpcsync.CallbackSerializer // To serialize all outoing calls.
-	serializerCancel context.CancelFunc           // To close the seralizer at close/enterIdle time.
-	mode             ccbMode                      // Tracks the current mode of the wrapper.
+	// The following field is protected by mu.  Caller must take cc.mu before
+	// taking mu.
+	mu     sync.Mutex
+	closed bool
 }
 
-// newCCBalancerWrapper creates a new balancer wrapper. The underlying balancer
-// is not created until the switchTo() method is invoked.
-func newCCBalancerWrapper(cc *ClientConn, bopts balancer.BuildOptions) *ccBalancerWrapper {
-	ctx, cancel := context.WithCancel(context.Background())
+// newCCBalancerWrapper creates a new balancer wrapper in idle state. The
+// underlying balancer is not created until the switchTo() method is invoked.
+func newCCBalancerWrapper(cc *ClientConn) *ccBalancerWrapper {
+	ctx, cancel := context.WithCancel(cc.ctx)
 	ccb := &ccBalancerWrapper{
-		cc:               cc,
-		opts:             bopts,
+		cc: cc,
+		opts: balancer.BuildOptions{
+			DialCreds:        cc.dopts.copts.TransportCredentials,
+			CredsBundle:      cc.dopts.copts.CredsBundle,
+			Dialer:           cc.dopts.copts.Dialer,
+			Authority:        cc.authority,
+			CustomUserAgent:  cc.dopts.copts.UserAgent,
+			ChannelzParentID: cc.channelzID,
+			Target:           cc.parsedTarget,
+		},
 		serializer:       grpcsync.NewCallbackSerializer(ctx),
 		serializerCancel: cancel,
 	}
-	ccb.balancer = gracefulswitch.NewBalancer(ccb, bopts)
+	ccb.balancer = gracefulswitch.NewBalancer(ccb, ccb.opts)
 	return ccb
 }
 
 // updateClientConnState is invoked by grpc to push a ClientConnState update to
-// the underlying balancer.
+// the underlying balancer.  This is always executed from the serializer, so
+// it is safe to call into the balancer here.
 func (ccb *ccBalancerWrapper) updateClientConnState(ccs *balancer.ClientConnState) error {
-	ccb.mu.Lock()
-	errCh := make(chan error, 1)
-	// Here and everywhere else where Schedule() is called, it is done with the
-	// lock held. But the lock guards only the scheduling part. The actual
-	// callback is called asynchronously without the lock being held.
-	ok := ccb.serializer.Schedule(func(_ context.Context) {
-		// If the addresses specified in the update contain addresses of type
-		// "grpclb" and the selected LB policy is not "grpclb", these addresses
-		// will be filtered out and ccs will be modified with the updated
-		// address list.
-		if ccb.curBalancerName != grpclbName {
-			var addrs []resolver.Address
-			for _, addr := range ccs.ResolverState.Addresses {
-				if addr.Type == resolver.GRPCLB {
-					continue
-				}
-				addrs = append(addrs, addr)
-			}
-			ccs.ResolverState.Addresses = addrs
+	errCh := make(chan error)
+	ok := ccb.serializer.Schedule(func(ctx context.Context) {
+		defer close(errCh)
+		if ctx.Err() != nil || ccb.balancer == nil {
+			return
+		}
+		err := ccb.balancer.UpdateClientConnState(*ccs)
+		if logger.V(2) && err != nil {
+			logger.Infof("error from balancer.UpdateClientConnState: %v", err)
 		}
-		errCh <- ccb.balancer.UpdateClientConnState(*ccs)
+		errCh <- err
 	})
 	if !ok {
-		// If we are unable to schedule a function with the serializer, it
-		// indicates that it has been closed. A serializer is only closed when
-		// the wrapper is closed or is in idle.
-		ccb.mu.Unlock()
-		return fmt.Errorf("grpc: cannot send state update to a closed or idle balancer")
-	}
-	ccb.mu.Unlock()
-
-	// We get here only if the above call to Schedule succeeds, in which case it
-	// is guaranteed that the scheduled function will run. Therefore it is safe
-	// to block on this channel.
-	err := <-errCh
-	if logger.V(2) && err != nil {
-		logger.Infof("error from balancer.UpdateClientConnState: %v", err)
+		return nil
 	}
-	return err
-}
-
-// updateSubConnState is invoked by grpc to push a subConn state update to the
-// underlying balancer.
-func (ccb *ccBalancerWrapper) updateSubConnState(sc balancer.SubConn, s connectivity.State, err error) {
-	ccb.mu.Lock()
-	ccb.serializer.Schedule(func(_ context.Context) {
-		ccb.balancer.UpdateSubConnState(sc, balancer.SubConnState{ConnectivityState: s, ConnectionError: err})
-	})
-	ccb.mu.Unlock()
+	return <-errCh
 }
 
+// resolverError is invoked by grpc to push a resolver error to the underlying
+// balancer.  The call to the balancer is executed from the serializer.
 func (ccb *ccBalancerWrapper) resolverError(err error) {
-	ccb.mu.Lock()
-	ccb.serializer.Schedule(func(_ context.Context) {
+	ccb.serializer.Schedule(func(ctx context.Context) {
+		if ctx.Err() != nil || ccb.balancer == nil {
+			return
+		}
 		ccb.balancer.ResolverError(err)
 	})
-	ccb.mu.Unlock()
 }
 
 // switchTo is invoked by grpc to instruct the balancer wrapper to switch to the
@@ -163,8 +131,10 @@ func (ccb *ccBalancerWrapper) resolverError(err error) {
 // the ccBalancerWrapper keeps track of the current LB policy name, and skips
 // the graceful balancer switching process if the name does not change.
 func (ccb *ccBalancerWrapper) switchTo(name string) {
-	ccb.mu.Lock()
-	ccb.serializer.Schedule(func(_ context.Context) {
+	ccb.serializer.Schedule(func(ctx context.Context) {
+		if ctx.Err() != nil || ccb.balancer == nil {
+			return
+		}
 		// TODO: Other languages use case-sensitive balancer registries. We should
 		// switch as well. See: https://github.com/grpc/grpc-go/issues/5288.
 		if strings.EqualFold(ccb.curBalancerName, name) {
@@ -172,7 +142,6 @@ func (ccb *ccBalancerWrapper) switchTo(name string) {
 		}
 		ccb.buildLoadBalancingPolicy(name)
 	})
-	ccb.mu.Unlock()
 }
 
 // buildLoadBalancingPolicy performs the following:
@@ -199,151 +168,69 @@ func (ccb *ccBalancerWrapper) buildLoadBalancingPolicy(name string) {
 	ccb.curBalancerName = builder.Name()
 }
 
+// close initiates async shutdown of the wrapper.  cc.mu must be held when
+// calling this function.  To determine the wrapper has finished shutting down,
+// the channel should block on ccb.serializer.Done() without cc.mu held.
 func (ccb *ccBalancerWrapper) close() {
-	channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: closing")
-	ccb.closeBalancer(ccbModeClosed)
-}
-
-// enterIdleMode is invoked by grpc when the channel enters idle mode upon
-// expiry of idle_timeout. This call blocks until the balancer is closed.
-func (ccb *ccBalancerWrapper) enterIdleMode() {
-	channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: entering idle mode")
-	ccb.closeBalancer(ccbModeIdle)
-}
-
-// closeBalancer is invoked when the channel is being closed or when it enters
-// idle mode upon expiry of idle_timeout.
-func (ccb *ccBalancerWrapper) closeBalancer(m ccbMode) {
 	ccb.mu.Lock()
-	if ccb.mode == ccbModeClosed || ccb.mode == ccbModeIdle {
-		ccb.mu.Unlock()
-		return
-	}
-
-	ccb.mode = m
-	done := ccb.serializer.Done
-	b := ccb.balancer
-	ok := ccb.serializer.Schedule(func(_ context.Context) {
-		// Close the serializer to ensure that no more calls from gRPC are sent
-		// to the balancer.
-		ccb.serializerCancel()
-		// Empty the current balancer name because we don't have a balancer
-		// anymore and also so that we act on the next call to switchTo by
-		// creating a new balancer specified by the new resolver.
-		ccb.curBalancerName = ""
-	})
-	if !ok {
-		ccb.mu.Unlock()
-		return
-	}
+	ccb.closed = true
 	ccb.mu.Unlock()
-
-	// Give enqueued callbacks a chance to finish.
-	<-done
-	// Spawn a goroutine to close the balancer (since it may block trying to
-	// cleanup all allocated resources) and return early.
-	go b.Close()
-}
-
-// exitIdleMode is invoked by grpc when the channel exits idle mode either
-// because of an RPC or because of an invocation of the Connect() API. This
-// recreates the balancer that was closed previously when entering idle mode.
-//
-// If the channel is not in idle mode, we know for a fact that we are here as a
-// result of the user calling the Connect() method on the ClientConn. In this
-// case, we can simply forward the call to the underlying balancer, instructing
-// it to reconnect to the backends.
-func (ccb *ccBalancerWrapper) exitIdleMode() {
-	ccb.mu.Lock()
-	if ccb.mode == ccbModeClosed {
-		// Request to exit idle is a no-op when wrapper is already closed.
-		ccb.mu.Unlock()
-		return
-	}
-
-	if ccb.mode == ccbModeIdle {
-		// Recreate the serializer which was closed when we entered idle.
-		ctx, cancel := context.WithCancel(context.Background())
-		ccb.serializer = grpcsync.NewCallbackSerializer(ctx)
-		ccb.serializerCancel = cancel
-	}
-
-	// The ClientConn guarantees that mutual exclusion between close() and
-	// exitIdleMode(), and since we just created a new serializer, we can be
-	// sure that the below function will be scheduled.
-	done := make(chan struct{})
-	ccb.serializer.Schedule(func(_ context.Context) {
-		defer close(done)
-
-		ccb.mu.Lock()
-		defer ccb.mu.Unlock()
-
-		if ccb.mode != ccbModeIdle {
-			ccb.balancer.ExitIdle()
+	channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: closing")
+	ccb.serializer.Schedule(func(context.Context) {
+		if ccb.balancer == nil {
 			return
 		}
-
-		// Gracefulswitch balancer does not support a switchTo operation after
-		// being closed. Hence we need to create a new one here.
-		ccb.balancer = gracefulswitch.NewBalancer(ccb, ccb.opts)
-		ccb.mode = ccbModeActive
-		channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: exiting idle mode")
-
+		ccb.balancer.Close()
+		ccb.balancer = nil
 	})
-	ccb.mu.Unlock()
-
-	<-done
+	ccb.serializerCancel()
 }
 
-func (ccb *ccBalancerWrapper) isIdleOrClosed() bool {
-	ccb.mu.Lock()
-	defer ccb.mu.Unlock()
-	return ccb.mode == ccbModeIdle || ccb.mode == ccbModeClosed
+// exitIdle invokes the balancer's exitIdle method in the serializer.
+func (ccb *ccBalancerWrapper) exitIdle() {
+	ccb.serializer.Schedule(func(ctx context.Context) {
+		if ctx.Err() != nil || ccb.balancer == nil {
+			return
+		}
+		ccb.balancer.ExitIdle()
+	})
 }
 
 func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) {
-	if ccb.isIdleOrClosed() {
-		return nil, fmt.Errorf("grpc: cannot create SubConn when balancer is closed or idle")
+	ccb.cc.mu.Lock()
+	defer ccb.cc.mu.Unlock()
+
+	ccb.mu.Lock()
+	if ccb.closed {
+		ccb.mu.Unlock()
+		return nil, fmt.Errorf("balancer is being closed; no new SubConns allowed")
 	}
+	ccb.mu.Unlock()
 
 	if len(addrs) == 0 {
 		return nil, fmt.Errorf("grpc: cannot create SubConn with empty address list")
 	}
-	ac, err := ccb.cc.newAddrConn(addrs, opts)
+	ac, err := ccb.cc.newAddrConnLocked(addrs, opts)
 	if err != nil {
 		channelz.Warningf(logger, ccb.cc.channelzID, "acBalancerWrapper: NewSubConn: failed to newAddrConn: %v", err)
 		return nil, err
 	}
-	acbw := &acBalancerWrapper{ac: ac, producers: make(map[balancer.ProducerBuilder]*refCountedProducer)}
+	acbw := &acBalancerWrapper{
+		ccb:           ccb,
+		ac:            ac,
+		producers:     make(map[balancer.ProducerBuilder]*refCountedProducer),
+		stateListener: opts.StateListener,
+	}
 	ac.acbw = acbw
 	return acbw, nil
 }
 
 func (ccb *ccBalancerWrapper) RemoveSubConn(sc balancer.SubConn) {
-	if ccb.isIdleOrClosed() {
-		// It it safe to ignore this call when the balancer is closed or in idle
-		// because the ClientConn takes care of closing the connections.
-		//
-		// Not returning early from here when the balancer is closed or in idle
-		// leads to a deadlock though, because of the following sequence of
-		// calls when holding cc.mu:
-		// cc.exitIdleMode --> ccb.enterIdleMode --> gsw.Close -->
-		// ccb.RemoveAddrConn --> cc.removeAddrConn
-		return
-	}
-
-	acbw, ok := sc.(*acBalancerWrapper)
-	if !ok {
-		return
-	}
-	ccb.cc.removeAddrConn(acbw.ac, errConnDrain)
+	// The graceful switch balancer will never call this.
+	logger.Errorf("ccb RemoveSubConn(%v) called unexpectedly, sc")
 }
 
 func (ccb *ccBalancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) {
-	if ccb.isIdleOrClosed() {
-		return
-	}
-
 	acbw, ok := sc.(*acBalancerWrapper)
 	if !ok {
 		return
@@ -352,25 +239,39 @@ func (ccb *ccBalancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resol
 }
 
 func (ccb *ccBalancerWrapper) UpdateState(s balancer.State) {
-	if ccb.isIdleOrClosed() {
+	ccb.cc.mu.Lock()
+	defer ccb.cc.mu.Unlock()
+
+	ccb.mu.Lock()
+	if ccb.closed {
+		ccb.mu.Unlock()
 		return
 	}
-
+	ccb.mu.Unlock()
 	// Update picker before updating state.  Even though the ordering here does
 	// not matter, it can lead to multiple calls of Pick in the common start-up
 	// case where we wait for ready and then perform an RPC.  If the picker is
 	// updated later, we could call the "connecting" picker when the state is
 	// updated, and then call the "ready" picker after the picker gets updated.
-	ccb.cc.blockingpicker.updatePicker(s.Picker)
+
+	// Note that there is no need to check if the balancer wrapper was closed,
+	// as we know the graceful switch LB policy will not call cc if it has been
+	// closed.
+	ccb.cc.pickerWrapper.updatePicker(s.Picker)
 	ccb.cc.csMgr.updateState(s.ConnectivityState)
 }
 
 func (ccb *ccBalancerWrapper) ResolveNow(o resolver.ResolveNowOptions) {
-	if ccb.isIdleOrClosed() {
+	ccb.cc.mu.RLock()
+	defer ccb.cc.mu.RUnlock()
+
+	ccb.mu.Lock()
+	if ccb.closed {
+		ccb.mu.Unlock()
 		return
 	}
-
-	ccb.cc.resolveNow(o)
+	ccb.mu.Unlock()
+	ccb.cc.resolveNowLocked(o)
 }
 
 func (ccb *ccBalancerWrapper) Target() string {
@@ -380,12 +281,28 @@ func (ccb *ccBalancerWrapper) Target() string {
 // acBalancerWrapper is a wrapper on top of ac for balancers.
 // It implements balancer.SubConn interface.
 type acBalancerWrapper struct {
-	ac *addrConn // read-only
+	ac            *addrConn          // read-only
+	ccb           *ccBalancerWrapper // read-only
+	stateListener func(balancer.SubConnState)
 
 	mu        sync.Mutex
 	producers map[balancer.ProducerBuilder]*refCountedProducer
 }
 
+// updateState is invoked by grpc to push a subConn state update to the
+// underlying balancer.
+func (acbw *acBalancerWrapper) updateState(s connectivity.State, err error) {
+	acbw.ccb.serializer.Schedule(func(ctx context.Context) {
+		if ctx.Err() != nil || acbw.ccb.balancer == nil {
+			return
+		}
+		// Even though it is optional for balancers, gracefulswitch ensures
+		// opts.StateListener is set, so this cannot ever be nil.
+		// TODO: delete this comment when UpdateSubConnState is removed.
+		acbw.stateListener(balancer.SubConnState{ConnectivityState: s, ConnectionError: err})
+	})
+}
+
 func (acbw *acBalancerWrapper) String() string {
 	return fmt.Sprintf("SubConn(id:%d)", acbw.ac.channelzID.Int())
 }
@@ -398,6 +315,10 @@ func (acbw *acBalancerWrapper) Connect() {
 	go acbw.ac.connect()
 }
 
+func (acbw *acBalancerWrapper) Shutdown() {
+	acbw.ccb.cc.removeAddrConn(acbw.ac, errConnDrain)
+}
+
 // NewStream begins a streaming RPC on the addrConn.  If the addrConn is not
 // ready, blocks until it is or ctx expires.  Returns an error when the context
 // expires or the addrConn is shut down.
@@ -411,7 +332,7 @@ func (acbw *acBalancerWrapper) NewStream(ctx context.Context, desc *StreamDesc,
 
 // Invoke performs a unary RPC.  If the addrConn is not ready, returns
 // errSubConnNotReady.
-func (acbw *acBalancerWrapper) Invoke(ctx context.Context, method string, args interface{}, reply interface{}, opts ...CallOption) error {
+func (acbw *acBalancerWrapper) Invoke(ctx context.Context, method string, args any, reply any, opts ...CallOption) error {
 	cs, err := acbw.NewStream(ctx, unaryStreamDesc, method, opts...)
 	if err != nil {
 		return err
diff --git a/vendor/google.golang.org/grpc/binarylog/grpc_binarylog_v1/binarylog.pb.go b/vendor/google.golang.org/grpc/binarylog/grpc_binarylog_v1/binarylog.pb.go
index ec2c2fa14dd..e9e97d45111 100644
--- a/vendor/google.golang.org/grpc/binarylog/grpc_binarylog_v1/binarylog.pb.go
+++ b/vendor/google.golang.org/grpc/binarylog/grpc_binarylog_v1/binarylog.pb.go
@@ -18,7 +18,7 @@
 
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.30.0
+// 	protoc-gen-go v1.31.0
 // 	protoc        v4.22.0
 // source: grpc/binlog/v1/binarylog.proto
 
@@ -430,7 +430,7 @@ type ClientHeader struct {
 	MethodName string `protobuf:"bytes,2,opt,name=method_name,json=methodName,proto3" json:"method_name,omitempty"`
 	// A single process may be used to run multiple virtual
 	// servers with different identities.
-	// The authority is the name of such a server identitiy.
+	// The authority is the name of such a server identity.
 	// It is typically a portion of the URI in the form of
 	// <host> or <host>:<port> .
 	Authority string `protobuf:"bytes,3,opt,name=authority,proto3" json:"authority,omitempty"`
diff --git a/vendor/google.golang.org/grpc/call.go b/vendor/google.golang.org/grpc/call.go
index e6a1dc5d75e..788c89c16f9 100644
--- a/vendor/google.golang.org/grpc/call.go
+++ b/vendor/google.golang.org/grpc/call.go
@@ -26,12 +26,7 @@ import (
 // received.  This is typically called by generated code.
 //
 // All errors returned by Invoke are compatible with the status package.
-func (cc *ClientConn) Invoke(ctx context.Context, method string, args, reply interface{}, opts ...CallOption) error {
-	if err := cc.idlenessMgr.onCallBegin(); err != nil {
-		return err
-	}
-	defer cc.idlenessMgr.onCallEnd()
-
+func (cc *ClientConn) Invoke(ctx context.Context, method string, args, reply any, opts ...CallOption) error {
 	// allow interceptor to see all applicable call options, which means those
 	// configured as defaults from dial option as well as per-call options
 	opts = combine(cc.dopts.callOptions, opts)
@@ -61,13 +56,13 @@ func combine(o1 []CallOption, o2 []CallOption) []CallOption {
 // received.  This is typically called by generated code.
 //
 // DEPRECATED: Use ClientConn.Invoke instead.
-func Invoke(ctx context.Context, method string, args, reply interface{}, cc *ClientConn, opts ...CallOption) error {
+func Invoke(ctx context.Context, method string, args, reply any, cc *ClientConn, opts ...CallOption) error {
 	return cc.Invoke(ctx, method, args, reply, opts...)
 }
 
 var unaryStreamDesc = &StreamDesc{ServerStreams: false, ClientStreams: false}
 
-func invoke(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error {
+func invoke(ctx context.Context, method string, req, reply any, cc *ClientConn, opts ...CallOption) error {
 	cs, err := newClientStream(ctx, unaryStreamDesc, cc, method, opts...)
 	if err != nil {
 		return err
diff --git a/vendor/google.golang.org/grpc/clientconn.go b/vendor/google.golang.org/grpc/clientconn.go
index 95a7459b02f..f6e815e6bfc 100644
--- a/vendor/google.golang.org/grpc/clientconn.go
+++ b/vendor/google.golang.org/grpc/clientconn.go
@@ -33,10 +33,11 @@ import (
 	"google.golang.org/grpc/balancer/base"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/connectivity"
-	"google.golang.org/grpc/credentials"
-	"google.golang.org/grpc/internal/backoff"
+	"google.golang.org/grpc/internal"
 	"google.golang.org/grpc/internal/channelz"
 	"google.golang.org/grpc/internal/grpcsync"
+	"google.golang.org/grpc/internal/idle"
+	"google.golang.org/grpc/internal/pretty"
 	iresolver "google.golang.org/grpc/internal/resolver"
 	"google.golang.org/grpc/internal/transport"
 	"google.golang.org/grpc/keepalive"
@@ -45,16 +46,14 @@ import (
 	"google.golang.org/grpc/status"
 
 	_ "google.golang.org/grpc/balancer/roundrobin"           // To register roundrobin.
-	_ "google.golang.org/grpc/internal/resolver/dns"         // To register dns resolver.
 	_ "google.golang.org/grpc/internal/resolver/passthrough" // To register passthrough resolver.
 	_ "google.golang.org/grpc/internal/resolver/unix"        // To register unix resolver.
+	_ "google.golang.org/grpc/resolver/dns"                  // To register dns resolver.
 )
 
 const (
 	// minimum time to give a connection to complete
 	minConnectTimeout = 20 * time.Second
-	// must match grpclbName in grpclb/grpclb.go
-	grpclbName = "grpclb"
 )
 
 var (
@@ -118,48 +117,20 @@ func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*ires
 	}, nil
 }
 
-// DialContext creates a client connection to the given target. By default, it's
-// a non-blocking dial (the function won't wait for connections to be
-// established, and connecting happens in the background). To make it a blocking
-// dial, use WithBlock() dial option.
-//
-// In the non-blocking case, the ctx does not act against the connection. It
-// only controls the setup steps.
-//
-// In the blocking case, ctx can be used to cancel or expire the pending
-// connection. Once this function returns, the cancellation and expiration of
-// ctx will be noop. Users should call ClientConn.Close to terminate all the
-// pending operations after this function returns.
-//
-// The target name syntax is defined in
-// https://github.com/grpc/grpc/blob/master/doc/naming.md.
-// e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
-func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
+// newClient returns a new client in idle mode.
+func newClient(target string, opts ...DialOption) (conn *ClientConn, err error) {
 	cc := &ClientConn{
 		target: target,
-		csMgr:  &connectivityStateManager{},
 		conns:  make(map[*addrConn]struct{}),
 		dopts:  defaultDialOptions(),
 		czData: new(channelzData),
 	}
 
-	// We start the channel off in idle mode, but kick it out of idle at the end
-	// of this method, instead of waiting for the first RPC. Other gRPC
-	// implementations do wait for the first RPC to kick the channel out of
-	// idle. But doing so would be a major behavior change for our users who are
-	// used to seeing the channel active after Dial.
-	//
-	// Taking this approach of kicking it out of idle at the end of this method
-	// allows us to share the code between channel creation and exiting idle
-	// mode. This will also make it easy for us to switch to starting the
-	// channel off in idle, if at all we ever get to do that.
-	cc.idlenessState = ccIdlenessStateIdle
-
 	cc.retryThrottler.Store((*retryThrottler)(nil))
 	cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil})
 	cc.ctx, cc.cancel = context.WithCancel(context.Background())
-	cc.exitIdleCond = sync.NewCond(&cc.mu)
 
+	// Apply dial options.
 	disableGlobalOpts := false
 	for _, opt := range opts {
 		if _, ok := opt.(*disableGlobalDialOptions); ok {
@@ -177,19 +148,9 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
 	for _, opt := range opts {
 		opt.apply(&cc.dopts)
 	}
-
 	chainUnaryClientInterceptors(cc)
 	chainStreamClientInterceptors(cc)
 
-	defer func() {
-		if err != nil {
-			cc.Close()
-		}
-	}()
-
-	// Register ClientConn with channelz.
-	cc.channelzRegistration(target)
-
 	if err := cc.validateTransportCredentials(); err != nil {
 		return nil, err
 	}
@@ -203,10 +164,80 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
 	}
 	cc.mkp = cc.dopts.copts.KeepaliveParams
 
-	if cc.dopts.copts.UserAgent != "" {
-		cc.dopts.copts.UserAgent += " " + grpcUA
-	} else {
-		cc.dopts.copts.UserAgent = grpcUA
+	// Register ClientConn with channelz.
+	cc.channelzRegistration(target)
+
+	// TODO: Ideally it should be impossible to error from this function after
+	// channelz registration.  This will require removing some channelz logs
+	// from the following functions that can error.  Errors can be returned to
+	// the user, and successful logs can be emitted here, after the checks have
+	// passed and channelz is subsequently registered.
+
+	// Determine the resolver to use.
+	if err := cc.parseTargetAndFindResolver(); err != nil {
+		channelz.RemoveEntry(cc.channelzID)
+		return nil, err
+	}
+	if err = cc.determineAuthority(); err != nil {
+		channelz.RemoveEntry(cc.channelzID)
+		return nil, err
+	}
+
+	cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelzID)
+	cc.pickerWrapper = newPickerWrapper(cc.dopts.copts.StatsHandlers)
+
+	cc.initIdleStateLocked() // Safe to call without the lock, since nothing else has a reference to cc.
+	cc.idlenessMgr = idle.NewManager((*idler)(cc), cc.dopts.idleTimeout)
+	return cc, nil
+}
+
+// DialContext creates a client connection to the given target. By default, it's
+// a non-blocking dial (the function won't wait for connections to be
+// established, and connecting happens in the background). To make it a blocking
+// dial, use WithBlock() dial option.
+//
+// In the non-blocking case, the ctx does not act against the connection. It
+// only controls the setup steps.
+//
+// In the blocking case, ctx can be used to cancel or expire the pending
+// connection. Once this function returns, the cancellation and expiration of
+// ctx will be noop. Users should call ClientConn.Close to terminate all the
+// pending operations after this function returns.
+//
+// The target name syntax is defined in
+// https://github.com/grpc/grpc/blob/master/doc/naming.md.
+// e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
+func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
+	cc, err := newClient(target, opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	// We start the channel off in idle mode, but kick it out of idle now,
+	// instead of waiting for the first RPC. Other gRPC implementations do wait
+	// for the first RPC to kick the channel out of idle. But doing so would be
+	// a major behavior change for our users who are used to seeing the channel
+	// active after Dial.
+	//
+	// Taking this approach of kicking it out of idle at the end of this method
+	// allows us to share the code between channel creation and exiting idle
+	// mode. This will also make it easy for us to switch to starting the
+	// channel off in idle, i.e. by making newClient exported.
+
+	defer func() {
+		if err != nil {
+			cc.Close()
+		}
+	}()
+
+	// This creates the name resolver, load balancer, etc.
+	if err := cc.idlenessMgr.ExitIdleMode(); err != nil {
+		return nil, err
+	}
+
+	// Return now for non-blocking dials.
+	if !cc.dopts.block {
+		return cc, nil
 	}
 
 	if cc.dopts.timeout > 0 {
@@ -229,49 +260,6 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
 		}
 	}()
 
-	if cc.dopts.bs == nil {
-		cc.dopts.bs = backoff.DefaultExponential
-	}
-
-	// Determine the resolver to use.
-	if err := cc.parseTargetAndFindResolver(); err != nil {
-		return nil, err
-	}
-	if err = cc.determineAuthority(); err != nil {
-		return nil, err
-	}
-
-	if cc.dopts.scChan != nil {
-		// Blocking wait for the initial service config.
-		select {
-		case sc, ok := <-cc.dopts.scChan:
-			if ok {
-				cc.sc = &sc
-				cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc})
-			}
-		case <-ctx.Done():
-			return nil, ctx.Err()
-		}
-	}
-	if cc.dopts.scChan != nil {
-		go cc.scWatcher()
-	}
-
-	// This creates the name resolver, load balancer, blocking picker etc.
-	if err := cc.exitIdleMode(); err != nil {
-		return nil, err
-	}
-
-	// Configure idleness support with configured idle timeout or default idle
-	// timeout duration. Idleness can be explicitly disabled by the user, by
-	// setting the dial option to 0.
-	cc.idlenessMgr = newIdlenessManager(cc, cc.dopts.idleTimeout)
-
-	// Return early for non-blocking dials.
-	if !cc.dopts.block {
-		return cc, nil
-	}
-
 	// A blocking dial blocks until the clientConn is ready.
 	for {
 		s := cc.GetState()
@@ -316,117 +304,82 @@ func (cc *ClientConn) addTraceEvent(msg string) {
 	channelz.AddTraceEvent(logger, cc.channelzID, 0, ted)
 }
 
+type idler ClientConn
+
+func (i *idler) EnterIdleMode() {
+	(*ClientConn)(i).enterIdleMode()
+}
+
+func (i *idler) ExitIdleMode() error {
+	return (*ClientConn)(i).exitIdleMode()
+}
+
 // exitIdleMode moves the channel out of idle mode by recreating the name
-// resolver and load balancer.
-func (cc *ClientConn) exitIdleMode() error {
+// resolver and load balancer.  This should never be called directly; use
+// cc.idlenessMgr.ExitIdleMode instead.
+func (cc *ClientConn) exitIdleMode() (err error) {
 	cc.mu.Lock()
 	if cc.conns == nil {
 		cc.mu.Unlock()
 		return errConnClosing
 	}
-	if cc.idlenessState != ccIdlenessStateIdle {
-		cc.mu.Unlock()
-		logger.Info("ClientConn asked to exit idle mode when not in idle mode")
-		return nil
-	}
-
-	defer func() {
-		// When Close() and exitIdleMode() race against each other, one of the
-		// following two can happen:
-		// - Close() wins the race and runs first. exitIdleMode() runs after, and
-		//   sees that the ClientConn is already closed and hence returns early.
-		// - exitIdleMode() wins the race and runs first and recreates the balancer
-		//   and releases the lock before recreating the resolver. If Close() runs
-		//   in this window, it will wait for exitIdleMode to complete.
-		//
-		// We achieve this synchronization using the below condition variable.
-		cc.mu.Lock()
-		cc.idlenessState = ccIdlenessStateActive
-		cc.exitIdleCond.Signal()
-		cc.mu.Unlock()
-	}()
-
-	cc.idlenessState = ccIdlenessStateExitingIdle
-	exitedIdle := false
-	if cc.blockingpicker == nil {
-		cc.blockingpicker = newPickerWrapper()
-	} else {
-		cc.blockingpicker.exitIdleMode()
-		exitedIdle = true
-	}
-
-	var credsClone credentials.TransportCredentials
-	if creds := cc.dopts.copts.TransportCredentials; creds != nil {
-		credsClone = creds.Clone()
-	}
-	if cc.balancerWrapper == nil {
-		cc.balancerWrapper = newCCBalancerWrapper(cc, balancer.BuildOptions{
-			DialCreds:        credsClone,
-			CredsBundle:      cc.dopts.copts.CredsBundle,
-			Dialer:           cc.dopts.copts.Dialer,
-			Authority:        cc.authority,
-			CustomUserAgent:  cc.dopts.copts.UserAgent,
-			ChannelzParentID: cc.channelzID,
-			Target:           cc.parsedTarget,
-		})
-	} else {
-		cc.balancerWrapper.exitIdleMode()
-	}
-	cc.firstResolveEvent = grpcsync.NewEvent()
 	cc.mu.Unlock()
 
 	// This needs to be called without cc.mu because this builds a new resolver
-	// which might update state or report error inline which needs to be handled
-	// by cc.updateResolverState() which also grabs cc.mu.
-	if err := cc.initResolverWrapper(credsClone); err != nil {
+	// which might update state or report error inline, which would then need to
+	// acquire cc.mu.
+	if err := cc.resolverWrapper.start(); err != nil {
 		return err
 	}
 
-	if exitedIdle {
-		cc.addTraceEvent("exiting idle mode")
-	}
+	cc.addTraceEvent("exiting idle mode")
 	return nil
 }
 
+// initIdleStateLocked initializes common state to how it should be while idle.
+func (cc *ClientConn) initIdleStateLocked() {
+	cc.resolverWrapper = newCCResolverWrapper(cc)
+	cc.balancerWrapper = newCCBalancerWrapper(cc)
+	cc.firstResolveEvent = grpcsync.NewEvent()
+	// cc.conns == nil is a proxy for the ClientConn being closed. So, instead
+	// of setting it to nil here, we recreate the map. This also means that we
+	// don't have to do this when exiting idle mode.
+	cc.conns = make(map[*addrConn]struct{})
+}
+
 // enterIdleMode puts the channel in idle mode, and as part of it shuts down the
-// name resolver, load balancer and any subchannels.
-func (cc *ClientConn) enterIdleMode() error {
+// name resolver, load balancer, and any subchannels.  This should never be
+// called directly; use cc.idlenessMgr.EnterIdleMode instead.
+func (cc *ClientConn) enterIdleMode() {
 	cc.mu.Lock()
+
 	if cc.conns == nil {
 		cc.mu.Unlock()
-		return ErrClientConnClosing
-	}
-	if cc.idlenessState != ccIdlenessStateActive {
-		logger.Error("ClientConn asked to enter idle mode when not active")
-		return nil
+		return
 	}
 
-	// cc.conns == nil is a proxy for the ClientConn being closed. So, instead
-	// of setting it to nil here, we recreate the map. This also means that we
-	// don't have to do this when exiting idle mode.
 	conns := cc.conns
-	cc.conns = make(map[*addrConn]struct{})
 
-	// TODO: Currently, we close the resolver wrapper upon entering idle mode
-	// and create a new one upon exiting idle mode. This means that the
-	// `cc.resolverWrapper` field would be overwritten everytime we exit idle
-	// mode. While this means that we need to hold `cc.mu` when accessing
-	// `cc.resolverWrapper`, it makes the code simpler in the wrapper. We should
-	// try to do the same for the balancer and picker wrappers too.
-	cc.resolverWrapper.close()
-	cc.blockingpicker.enterIdleMode()
-	cc.balancerWrapper.enterIdleMode()
+	rWrapper := cc.resolverWrapper
+	rWrapper.close()
+	cc.pickerWrapper.reset()
+	bWrapper := cc.balancerWrapper
+	bWrapper.close()
 	cc.csMgr.updateState(connectivity.Idle)
-	cc.idlenessState = ccIdlenessStateIdle
+	cc.addTraceEvent("entering idle mode")
+
+	cc.initIdleStateLocked()
+
 	cc.mu.Unlock()
 
-	go func() {
-		cc.addTraceEvent("entering idle mode")
-		for ac := range conns {
-			ac.tearDown(errConnIdling)
-		}
-	}()
-	return nil
+	// Block until the name resolver and LB policy are closed.
+	<-rWrapper.serializer.Done()
+	<-bWrapper.serializer.Done()
+
+	// Close all subchannels after the LB policy is closed.
+	for ac := range conns {
+		ac.tearDown(errConnIdling)
+	}
 }
 
 // validateTransportCredentials performs a series of checks on the configured
@@ -474,7 +427,6 @@ func (cc *ClientConn) validateTransportCredentials() error {
 func (cc *ClientConn) channelzRegistration(target string) {
 	cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
 	cc.addTraceEvent("created")
-	cc.csMgr.channelzID = cc.channelzID
 }
 
 // chainUnaryClientInterceptors chains all unary client interceptors into one.
@@ -491,7 +443,7 @@ func chainUnaryClientInterceptors(cc *ClientConn) {
 	} else if len(interceptors) == 1 {
 		chainedInt = interceptors[0]
 	} else {
-		chainedInt = func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error {
+		chainedInt = func(ctx context.Context, method string, req, reply any, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error {
 			return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...)
 		}
 	}
@@ -503,7 +455,7 @@ func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, final
 	if curr == len(interceptors)-1 {
 		return finalInvoker
 	}
-	return func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error {
+	return func(ctx context.Context, method string, req, reply any, cc *ClientConn, opts ...CallOption) error {
 		return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...)
 	}
 }
@@ -539,13 +491,27 @@ func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStr
 	}
 }
 
+// newConnectivityStateManager creates an connectivityStateManager with
+// the specified id.
+func newConnectivityStateManager(ctx context.Context, id *channelz.Identifier) *connectivityStateManager {
+	return &connectivityStateManager{
+		channelzID: id,
+		pubSub:     grpcsync.NewPubSub(ctx),
+	}
+}
+
 // connectivityStateManager keeps the connectivity.State of ClientConn.
 // This struct will eventually be exported so the balancers can access it.
+//
+// TODO: If possible, get rid of the `connectivityStateManager` type, and
+// provide this functionality using the `PubSub`, to avoid keeping track of
+// the connectivity state at two places.
 type connectivityStateManager struct {
 	mu         sync.Mutex
 	state      connectivity.State
 	notifyChan chan struct{}
 	channelzID *channelz.Identifier
+	pubSub     *grpcsync.PubSub
 }
 
 // updateState updates the connectivity.State of ClientConn.
@@ -561,6 +527,8 @@ func (csm *connectivityStateManager) updateState(state connectivity.State) {
 		return
 	}
 	csm.state = state
+	csm.pubSub.Publish(state)
+
 	channelz.Infof(logger, csm.channelzID, "Channel Connectivity change to %v", state)
 	if csm.notifyChan != nil {
 		// There are other goroutines waiting on this channel.
@@ -590,7 +558,7 @@ func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
 type ClientConnInterface interface {
 	// Invoke performs a unary RPC and returns after the response is received
 	// into reply.
-	Invoke(ctx context.Context, method string, args interface{}, reply interface{}, opts ...CallOption) error
+	Invoke(ctx context.Context, method string, args any, reply any, opts ...CallOption) error
 	// NewStream begins a streaming RPC.
 	NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error)
 }
@@ -621,53 +589,35 @@ type ClientConn struct {
 	dopts           dialOptions          // Default and user specified dial options.
 	channelzID      *channelz.Identifier // Channelz identifier for the channel.
 	resolverBuilder resolver.Builder     // See parseTargetAndFindResolver().
-	balancerWrapper *ccBalancerWrapper   // Uses gracefulswitch.balancer underneath.
-	idlenessMgr     idlenessManager
+	idlenessMgr     *idle.Manager
 
 	// The following provide their own synchronization, and therefore don't
 	// require cc.mu to be held to access them.
 	csMgr              *connectivityStateManager
-	blockingpicker     *pickerWrapper
+	pickerWrapper      *pickerWrapper
 	safeConfigSelector iresolver.SafeConfigSelector
 	czData             *channelzData
 	retryThrottler     atomic.Value // Updated from service config.
 
-	// firstResolveEvent is used to track whether the name resolver sent us at
-	// least one update. RPCs block on this event.
-	firstResolveEvent *grpcsync.Event
-
 	// mu protects the following fields.
 	// TODO: split mu so the same mutex isn't used for everything.
 	mu              sync.RWMutex
-	resolverWrapper *ccResolverWrapper         // Initialized in Dial; cleared in Close.
+	resolverWrapper *ccResolverWrapper         // Always recreated whenever entering idle to simplify Close.
+	balancerWrapper *ccBalancerWrapper         // Always recreated whenever entering idle to simplify Close.
 	sc              *ServiceConfig             // Latest service config received from the resolver.
 	conns           map[*addrConn]struct{}     // Set to nil on close.
 	mkp             keepalive.ClientParameters // May be updated upon receipt of a GoAway.
-	idlenessState   ccIdlenessState            // Tracks idleness state of the channel.
-	exitIdleCond    *sync.Cond                 // Signalled when channel exits idle.
+	// firstResolveEvent is used to track whether the name resolver sent us at
+	// least one update. RPCs block on this event.  May be accessed without mu
+	// if we know we cannot be asked to enter idle mode while accessing it (e.g.
+	// when the idle manager has already been closed, or if we are already
+	// entering idle mode).
+	firstResolveEvent *grpcsync.Event
 
 	lceMu               sync.Mutex // protects lastConnectionError
 	lastConnectionError error
 }
 
-// ccIdlenessState tracks the idleness state of the channel.
-//
-// Channels start off in `active` and move to `idle` after a period of
-// inactivity. When moving back to `active` upon an incoming RPC, they
-// transition through `exiting_idle`. This state is useful for synchronization
-// with Close().
-//
-// This state tracking is mostly for self-protection. The idlenessManager is
-// expected to keep track of the state as well, and is expected not to call into
-// the ClientConn unnecessarily.
-type ccIdlenessState int8
-
-const (
-	ccIdlenessStateActive ccIdlenessState = iota
-	ccIdlenessStateIdle
-	ccIdlenessStateExitingIdle
-)
-
 // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
 // ctx expires. A true value is returned in former case and false in latter.
 //
@@ -707,29 +657,15 @@ func (cc *ClientConn) GetState() connectivity.State {
 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later
 // release.
 func (cc *ClientConn) Connect() {
-	cc.exitIdleMode()
+	if err := cc.idlenessMgr.ExitIdleMode(); err != nil {
+		cc.addTraceEvent(err.Error())
+		return
+	}
 	// If the ClientConn was not in idle mode, we need to call ExitIdle on the
 	// LB policy so that connections can be created.
-	cc.balancerWrapper.exitIdleMode()
-}
-
-func (cc *ClientConn) scWatcher() {
-	for {
-		select {
-		case sc, ok := <-cc.dopts.scChan:
-			if !ok {
-				return
-			}
-			cc.mu.Lock()
-			// TODO: load balance policy runtime change is ignored.
-			// We may revisit this decision in the future.
-			cc.sc = &sc
-			cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc})
-			cc.mu.Unlock()
-		case <-cc.ctx.Done():
-			return
-		}
-	}
+	cc.mu.Lock()
+	cc.balancerWrapper.exitIdle()
+	cc.mu.Unlock()
 }
 
 // waitForResolvedAddrs blocks until the resolver has provided addresses or the
@@ -759,6 +695,16 @@ func init() {
 		panic(fmt.Sprintf("impossible error parsing empty service config: %v", cfg.Err))
 	}
 	emptyServiceConfig = cfg.Config.(*ServiceConfig)
+
+	internal.SubscribeToConnectivityStateChanges = func(cc *ClientConn, s grpcsync.Subscriber) func() {
+		return cc.csMgr.pubSub.Subscribe(s)
+	}
+	internal.EnterIdleModeForTesting = func(cc *ClientConn) {
+		cc.idlenessMgr.EnterIdleModeForTesting()
+	}
+	internal.ExitIdleModeForTesting = func(cc *ClientConn) error {
+		return cc.idlenessMgr.ExitIdleMode()
+	}
 }
 
 func (cc *ClientConn) maybeApplyDefaultServiceConfig(addrs []resolver.Address) {
@@ -773,9 +719,8 @@ func (cc *ClientConn) maybeApplyDefaultServiceConfig(addrs []resolver.Address) {
 	}
 }
 
-func (cc *ClientConn) updateResolverState(s resolver.State, err error) error {
+func (cc *ClientConn) updateResolverStateAndUnlock(s resolver.State, err error) error {
 	defer cc.firstResolveEvent.Fire()
-	cc.mu.Lock()
 	// Check if the ClientConn is already closed. Some fields (e.g.
 	// balancerWrapper) are set to nil when closing the ClientConn, and could
 	// cause nil pointer panic if we don't have this check.
@@ -821,7 +766,7 @@ func (cc *ClientConn) updateResolverState(s resolver.State, err error) error {
 			if cc.sc == nil {
 				// Apply the failing LB only if we haven't received valid service config
 				// from the name resolver in the past.
-				cc.applyFailingLB(s.ServiceConfig)
+				cc.applyFailingLBLocked(s.ServiceConfig)
 				cc.mu.Unlock()
 				return ret
 			}
@@ -843,15 +788,13 @@ func (cc *ClientConn) updateResolverState(s resolver.State, err error) error {
 	return ret
 }
 
-// applyFailingLB is akin to configuring an LB policy on the channel which
+// applyFailingLBLocked is akin to configuring an LB policy on the channel which
 // always fails RPCs. Here, an actual LB policy is not configured, but an always
 // erroring picker is configured, which returns errors with information about
 // what was invalid in the received service config. A config selector with no
 // service config is configured, and the connectivity state of the channel is
 // set to TransientFailure.
-//
-// Caller must hold cc.mu.
-func (cc *ClientConn) applyFailingLB(sc *serviceconfig.ParseResult) {
+func (cc *ClientConn) applyFailingLBLocked(sc *serviceconfig.ParseResult) {
 	var err error
 	if sc.Err != nil {
 		err = status.Errorf(codes.Unavailable, "error parsing service config: %v", sc.Err)
@@ -859,22 +802,36 @@ func (cc *ClientConn) applyFailingLB(sc *serviceconfig.ParseResult) {
 		err = status.Errorf(codes.Unavailable, "illegal service config type: %T", sc.Config)
 	}
 	cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil})
-	cc.blockingpicker.updatePicker(base.NewErrPicker(err))
+	cc.pickerWrapper.updatePicker(base.NewErrPicker(err))
 	cc.csMgr.updateState(connectivity.TransientFailure)
 }
 
-func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State, err error) {
-	cc.balancerWrapper.updateSubConnState(sc, s, err)
+// Makes a copy of the input addresses slice and clears out the balancer
+// attributes field. Addresses are passed during subconn creation and address
+// update operations. In both cases, we will clear the balancer attributes by
+// calling this function, and therefore we will be able to use the Equal method
+// provided by the resolver.Address type for comparison.
+func copyAddressesWithoutBalancerAttributes(in []resolver.Address) []resolver.Address {
+	out := make([]resolver.Address, len(in))
+	for i := range in {
+		out[i] = in[i]
+		out[i].BalancerAttributes = nil
+	}
+	return out
 }
 
-// newAddrConn creates an addrConn for addrs and adds it to cc.conns.
+// newAddrConnLocked creates an addrConn for addrs and adds it to cc.conns.
 //
 // Caller needs to make sure len(addrs) > 0.
-func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
+func (cc *ClientConn) newAddrConnLocked(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
+	if cc.conns == nil {
+		return nil, ErrClientConnClosing
+	}
+
 	ac := &addrConn{
 		state:        connectivity.Idle,
 		cc:           cc,
-		addrs:        addrs,
+		addrs:        copyAddressesWithoutBalancerAttributes(addrs),
 		scopts:       opts,
 		dopts:        cc.dopts,
 		czData:       new(channelzData),
@@ -882,12 +839,6 @@ func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSub
 		stateChan:    make(chan struct{}),
 	}
 	ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
-	// Track ac in cc. This needs to be done before any getTransport(...) is called.
-	cc.mu.Lock()
-	defer cc.mu.Unlock()
-	if cc.conns == nil {
-		return nil, ErrClientConnClosing
-	}
 
 	var err error
 	ac.channelzID, err = channelz.RegisterSubChannel(ac, cc.channelzID, "")
@@ -903,6 +854,7 @@ func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSub
 		},
 	})
 
+	// Track ac in cc. This needs to be done before any getTransport(...) is called.
 	cc.conns[ac] = struct{}{}
 	return ac, nil
 }
@@ -995,8 +947,9 @@ func equalAddresses(a, b []resolver.Address) bool {
 // connections or connection attempts.
 func (ac *addrConn) updateAddrs(addrs []resolver.Address) {
 	ac.mu.Lock()
-	channelz.Infof(logger, ac.channelzID, "addrConn: updateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
+	channelz.Infof(logger, ac.channelzID, "addrConn: updateAddrs curAddr: %v, addrs: %v", pretty.ToJSON(ac.curAddr), pretty.ToJSON(addrs))
 
+	addrs = copyAddressesWithoutBalancerAttributes(addrs)
 	if equalAddresses(ac.addrs, addrs) {
 		ac.mu.Unlock()
 		return
@@ -1031,8 +984,8 @@ func (ac *addrConn) updateAddrs(addrs []resolver.Address) {
 	ac.cancel()
 	ac.ctx, ac.cancel = context.WithCancel(ac.cc.ctx)
 
-	// We have to defer here because GracefulClose => Close => onClose, which
-	// requires locking ac.mu.
+	// We have to defer here because GracefulClose => onClose, which requires
+	// locking ac.mu.
 	if ac.transport != nil {
 		defer ac.transport.GracefulClose()
 		ac.transport = nil
@@ -1108,7 +1061,7 @@ func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
 }
 
 func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, balancer.PickResult, error) {
-	return cc.blockingpicker.pick(ctx, failfast, balancer.PickInfo{
+	return cc.pickerWrapper.pick(ctx, failfast, balancer.PickInfo{
 		Ctx:            ctx,
 		FullMethodName: method,
 	})
@@ -1137,35 +1090,25 @@ func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSel
 	}
 
 	var newBalancerName string
-	if cc.sc != nil && cc.sc.lbConfig != nil {
+	if cc.sc == nil || (cc.sc.lbConfig == nil && cc.sc.LB == nil) {
+		// No service config or no LB policy specified in config.
+		newBalancerName = PickFirstBalancerName
+	} else if cc.sc.lbConfig != nil {
 		newBalancerName = cc.sc.lbConfig.name
-	} else {
-		var isGRPCLB bool
-		for _, a := range addrs {
-			if a.Type == resolver.GRPCLB {
-				isGRPCLB = true
-				break
-			}
-		}
-		if isGRPCLB {
-			newBalancerName = grpclbName
-		} else if cc.sc != nil && cc.sc.LB != nil {
-			newBalancerName = *cc.sc.LB
-		} else {
-			newBalancerName = PickFirstBalancerName
-		}
+	} else { // cc.sc.LB != nil
+		newBalancerName = *cc.sc.LB
 	}
 	cc.balancerWrapper.switchTo(newBalancerName)
 }
 
 func (cc *ClientConn) resolveNow(o resolver.ResolveNowOptions) {
 	cc.mu.RLock()
-	r := cc.resolverWrapper
+	cc.resolverWrapper.resolveNow(o)
 	cc.mu.RUnlock()
-	if r == nil {
-		return
-	}
-	go r.resolveNow(o)
+}
+
+func (cc *ClientConn) resolveNowLocked(o resolver.ResolveNowOptions) {
+	cc.resolverWrapper.resolveNow(o)
 }
 
 // ResetConnectBackoff wakes up all subchannels in transient failure and causes
@@ -1192,7 +1135,14 @@ func (cc *ClientConn) ResetConnectBackoff() {
 
 // Close tears down the ClientConn and all underlying connections.
 func (cc *ClientConn) Close() error {
-	defer cc.cancel()
+	defer func() {
+		cc.cancel()
+		<-cc.csMgr.pubSub.Done()
+	}()
+
+	// Prevent calls to enter/exit idle immediately, and ensure we are not
+	// currently entering/exiting idle mode.
+	cc.idlenessMgr.Close()
 
 	cc.mu.Lock()
 	if cc.conns == nil {
@@ -1200,34 +1150,22 @@ func (cc *ClientConn) Close() error {
 		return ErrClientConnClosing
 	}
 
-	for cc.idlenessState == ccIdlenessStateExitingIdle {
-		cc.exitIdleCond.Wait()
-	}
-
 	conns := cc.conns
 	cc.conns = nil
 	cc.csMgr.updateState(connectivity.Shutdown)
 
-	pWrapper := cc.blockingpicker
-	rWrapper := cc.resolverWrapper
-	bWrapper := cc.balancerWrapper
-	idlenessMgr := cc.idlenessMgr
+	// We can safely unlock and continue to access all fields now as
+	// cc.conns==nil, preventing any further operations on cc.
 	cc.mu.Unlock()
 
+	cc.resolverWrapper.close()
 	// The order of closing matters here since the balancer wrapper assumes the
 	// picker is closed before it is closed.
-	if pWrapper != nil {
-		pWrapper.close()
-	}
-	if bWrapper != nil {
-		bWrapper.close()
-	}
-	if rWrapper != nil {
-		rWrapper.close()
-	}
-	if idlenessMgr != nil {
-		idlenessMgr.close()
-	}
+	cc.pickerWrapper.close()
+	cc.balancerWrapper.close()
+
+	<-cc.resolverWrapper.serializer.Done()
+	<-cc.balancerWrapper.serializer.Done()
 
 	for ac := range conns {
 		ac.tearDown(ErrClientConnClosing)
@@ -1248,7 +1186,7 @@ type addrConn struct {
 
 	cc     *ClientConn
 	dopts  dialOptions
-	acbw   balancer.SubConn
+	acbw   *acBalancerWrapper
 	scopts balancer.NewSubConnOptions
 
 	// transport is set when there's a viable transport (note: ac state may not be READY as LB channel
@@ -1286,7 +1224,7 @@ func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error)
 	} else {
 		channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v, last error: %s", s, lastErr)
 	}
-	ac.cc.handleSubConnStateChange(ac.acbw, s, lastErr)
+	ac.acbw.updateState(s, lastErr)
 }
 
 // adjustParams updates parameters used to create transports upon
@@ -1336,12 +1274,14 @@ func (ac *addrConn) resetTransport() {
 
 	if err := ac.tryAllAddrs(acCtx, addrs, connectDeadline); err != nil {
 		ac.cc.resolveNow(resolver.ResolveNowOptions{})
-		// After exhausting all addresses, the addrConn enters
-		// TRANSIENT_FAILURE.
+		ac.mu.Lock()
 		if acCtx.Err() != nil {
+			// addrConn was torn down.
+			ac.mu.Unlock()
 			return
 		}
-		ac.mu.Lock()
+		// After exhausting all addresses, the addrConn enters
+		// TRANSIENT_FAILURE.
 		ac.updateConnectivityState(connectivity.TransientFailure, err)
 
 		// Backoff.
@@ -1537,7 +1477,7 @@ func (ac *addrConn) startHealthCheck(ctx context.Context) {
 
 	// Set up the health check helper functions.
 	currentTr := ac.transport
-	newStream := func(method string) (interface{}, error) {
+	newStream := func(method string) (any, error) {
 		ac.mu.Lock()
 		if ac.transport != currentTr {
 			ac.mu.Unlock()
@@ -1625,16 +1565,7 @@ func (ac *addrConn) tearDown(err error) {
 	ac.updateConnectivityState(connectivity.Shutdown, nil)
 	ac.cancel()
 	ac.curAddr = resolver.Address{}
-	if err == errConnDrain && curTr != nil {
-		// GracefulClose(...) may be executed multiple times when
-		// i) receiving multiple GoAway frames from the server; or
-		// ii) there are concurrent name resolver/Balancer triggered
-		// address removal and GoAway.
-		// We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
-		ac.mu.Unlock()
-		curTr.GracefulClose()
-		ac.mu.Lock()
-	}
+
 	channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{
 		Desc:     "Subchannel deleted",
 		Severity: channelz.CtInfo,
@@ -1648,6 +1579,29 @@ func (ac *addrConn) tearDown(err error) {
 	// being deleted right away.
 	channelz.RemoveEntry(ac.channelzID)
 	ac.mu.Unlock()
+
+	// We have to release the lock before the call to GracefulClose/Close here
+	// because both of them call onClose(), which requires locking ac.mu.
+	if curTr != nil {
+		if err == errConnDrain {
+			// Close the transport gracefully when the subConn is being shutdown.
+			//
+			// GracefulClose() may be executed multiple times if:
+			// - multiple GoAway frames are received from the server
+			// - there are concurrent name resolver or balancer triggered
+			//   address removal and GoAway
+			curTr.GracefulClose()
+		} else {
+			// Hard close the transport when the channel is entering idle or is
+			// being shutdown. In the case where the channel is being shutdown,
+			// closing of transports is also taken care of by cancelation of cc.ctx.
+			// But in the case where the channel is entering idle, we need to
+			// explicitly close the transports here. Instead of distinguishing
+			// between these two cases, it is simpler to close the transport
+			// unconditionally here.
+			curTr.Close(err)
+		}
+	}
 }
 
 func (ac *addrConn) getState() connectivity.State {
@@ -1774,7 +1728,7 @@ func (cc *ClientConn) parseTargetAndFindResolver() error {
 	if err != nil {
 		channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", cc.target, err)
 	} else {
-		channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget)
+		channelz.Infof(logger, cc.channelzID, "parsed dial target is: %#v", parsedTarget)
 		rb = cc.getResolver(parsedTarget.URL.Scheme)
 		if rb != nil {
 			cc.parsedTarget = parsedTarget
@@ -1807,19 +1761,70 @@ func (cc *ClientConn) parseTargetAndFindResolver() error {
 }
 
 // parseTarget uses RFC 3986 semantics to parse the given target into a
-// resolver.Target struct containing scheme, authority and url. Query
-// params are stripped from the endpoint.
+// resolver.Target struct containing url. Query params are stripped from the
+// endpoint.
 func parseTarget(target string) (resolver.Target, error) {
 	u, err := url.Parse(target)
 	if err != nil {
 		return resolver.Target{}, err
 	}
 
-	return resolver.Target{
-		Scheme:    u.Scheme,
-		Authority: u.Host,
-		URL:       *u,
-	}, nil
+	return resolver.Target{URL: *u}, nil
+}
+
+func encodeAuthority(authority string) string {
+	const upperhex = "0123456789ABCDEF"
+
+	// Return for characters that must be escaped as per
+	// Valid chars are mentioned here:
+	// https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
+	shouldEscape := func(c byte) bool {
+		// Alphanum are always allowed.
+		if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+			return false
+		}
+		switch c {
+		case '-', '_', '.', '~': // Unreserved characters
+			return false
+		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // Subdelim characters
+			return false
+		case ':', '[', ']', '@': // Authority related delimeters
+			return false
+		}
+		// Everything else must be escaped.
+		return true
+	}
+
+	hexCount := 0
+	for i := 0; i < len(authority); i++ {
+		c := authority[i]
+		if shouldEscape(c) {
+			hexCount++
+		}
+	}
+
+	if hexCount == 0 {
+		return authority
+	}
+
+	required := len(authority) + 2*hexCount
+	t := make([]byte, required)
+
+	j := 0
+	// This logic is a barebones version of escape in the go net/url library.
+	for i := 0; i < len(authority); i++ {
+		switch c := authority[i]; {
+		case shouldEscape(c):
+			t[j] = '%'
+			t[j+1] = upperhex[c>>4]
+			t[j+2] = upperhex[c&15]
+			j += 3
+		default:
+			t[j] = authority[i]
+			j++
+		}
+	}
+	return string(t)
 }
 
 // Determine channel authority. The order of precedence is as follows:
@@ -1855,54 +1860,17 @@ func (cc *ClientConn) determineAuthority() error {
 	}
 
 	endpoint := cc.parsedTarget.Endpoint()
-	target := cc.target
-	switch {
-	case authorityFromDialOption != "":
+	if authorityFromDialOption != "" {
 		cc.authority = authorityFromDialOption
-	case authorityFromCreds != "":
+	} else if authorityFromCreds != "" {
 		cc.authority = authorityFromCreds
-	case strings.HasPrefix(target, "unix:") || strings.HasPrefix(target, "unix-abstract:"):
-		// TODO: remove when the unix resolver implements optional interface to
-		// return channel authority.
-		cc.authority = "localhost"
-	case strings.HasPrefix(endpoint, ":"):
+	} else if auth, ok := cc.resolverBuilder.(resolver.AuthorityOverrider); ok {
+		cc.authority = auth.OverrideAuthority(cc.parsedTarget)
+	} else if strings.HasPrefix(endpoint, ":") {
 		cc.authority = "localhost" + endpoint
-	default:
-		// TODO: Define an optional interface on the resolver builder to return
-		// the channel authority given the user's dial target. For resolvers
-		// which don't implement this interface, we will use the endpoint from
-		// "scheme://authority/endpoint" as the default authority.
-		cc.authority = endpoint
+	} else {
+		cc.authority = encodeAuthority(endpoint)
 	}
 	channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority)
 	return nil
 }
-
-// initResolverWrapper creates a ccResolverWrapper, which builds the name
-// resolver. This method grabs the lock to assign the newly built resolver
-// wrapper to the cc.resolverWrapper field.
-func (cc *ClientConn) initResolverWrapper(creds credentials.TransportCredentials) error {
-	rw, err := newCCResolverWrapper(cc, ccResolverWrapperOpts{
-		target:  cc.parsedTarget,
-		builder: cc.resolverBuilder,
-		bOpts: resolver.BuildOptions{
-			DisableServiceConfig: cc.dopts.disableServiceConfig,
-			DialCreds:            creds,
-			CredsBundle:          cc.dopts.copts.CredsBundle,
-			Dialer:               cc.dopts.copts.Dialer,
-		},
-		channelzID: cc.channelzID,
-	})
-	if err != nil {
-		return fmt.Errorf("failed to build resolver: %v", err)
-	}
-	// Resolver implementations may report state update or error inline when
-	// built (or right after), and this is handled in cc.updateResolverState.
-	// Also, an error from the resolver might lead to a re-resolution request
-	// from the balancer, which is handled in resolveNow() where
-	// `cc.resolverWrapper` is accessed. Hence, we need to hold the lock here.
-	cc.mu.Lock()
-	cc.resolverWrapper = rw
-	cc.mu.Unlock()
-	return nil
-}
diff --git a/vendor/google.golang.org/grpc/codec.go b/vendor/google.golang.org/grpc/codec.go
index 12977654781..411e3dfd47c 100644
--- a/vendor/google.golang.org/grpc/codec.go
+++ b/vendor/google.golang.org/grpc/codec.go
@@ -27,8 +27,8 @@ import (
 // omits the name/string, which vary between the two and are not needed for
 // anything besides the registry in the encoding package.
 type baseCodec interface {
-	Marshal(v interface{}) ([]byte, error)
-	Unmarshal(data []byte, v interface{}) error
+	Marshal(v any) ([]byte, error)
+	Unmarshal(data []byte, v any) error
 }
 
 var _ baseCodec = Codec(nil)
@@ -41,9 +41,9 @@ var _ baseCodec = encoding.Codec(nil)
 // Deprecated: use encoding.Codec instead.
 type Codec interface {
 	// Marshal returns the wire format of v.
-	Marshal(v interface{}) ([]byte, error)
+	Marshal(v any) ([]byte, error)
 	// Unmarshal parses the wire format into v.
-	Unmarshal(data []byte, v interface{}) error
+	Unmarshal(data []byte, v any) error
 	// String returns the name of the Codec implementation.  This is unused by
 	// gRPC.
 	String() string
diff --git a/vendor/google.golang.org/grpc/codes/codes.go b/vendor/google.golang.org/grpc/codes/codes.go
index 11b106182db..08476ad1fe1 100644
--- a/vendor/google.golang.org/grpc/codes/codes.go
+++ b/vendor/google.golang.org/grpc/codes/codes.go
@@ -25,7 +25,13 @@ import (
 	"strconv"
 )
 
-// A Code is an unsigned 32-bit error code as defined in the gRPC spec.
+// A Code is a status code defined according to the [gRPC documentation].
+//
+// Only the codes defined as consts in this package are valid codes. Do not use
+// other code values.  Behavior of other codes is implementation-specific and
+// interoperability between implementations is not guaranteed.
+//
+// [gRPC documentation]: https://github.com/grpc/grpc/blob/master/doc/statuscodes.md
 type Code uint32
 
 const (
diff --git a/vendor/google.golang.org/grpc/credentials/tls.go b/vendor/google.golang.org/grpc/credentials/tls.go
index 877b7cd21af..5dafd34edf9 100644
--- a/vendor/google.golang.org/grpc/credentials/tls.go
+++ b/vendor/google.golang.org/grpc/credentials/tls.go
@@ -44,10 +44,25 @@ func (t TLSInfo) AuthType() string {
 	return "tls"
 }
 
+// cipherSuiteLookup returns the string version of a TLS cipher suite ID.
+func cipherSuiteLookup(cipherSuiteID uint16) string {
+	for _, s := range tls.CipherSuites() {
+		if s.ID == cipherSuiteID {
+			return s.Name
+		}
+	}
+	for _, s := range tls.InsecureCipherSuites() {
+		if s.ID == cipherSuiteID {
+			return s.Name
+		}
+	}
+	return fmt.Sprintf("unknown ID: %v", cipherSuiteID)
+}
+
 // GetSecurityValue returns security info requested by channelz.
 func (t TLSInfo) GetSecurityValue() ChannelzSecurityValue {
 	v := &TLSChannelzSecurityValue{
-		StandardName: cipherSuiteLookup[t.State.CipherSuite],
+		StandardName: cipherSuiteLookup(t.State.CipherSuite),
 	}
 	// Currently there's no way to get LocalCertificate info from tls package.
 	if len(t.State.PeerCertificates) > 0 {
@@ -138,10 +153,39 @@ func (c *tlsCreds) OverrideServerName(serverNameOverride string) error {
 	return nil
 }
 
+// The following cipher suites are forbidden for use with HTTP/2 by
+// https://datatracker.ietf.org/doc/html/rfc7540#appendix-A
+var tls12ForbiddenCipherSuites = map[uint16]struct{}{
+	tls.TLS_RSA_WITH_AES_128_CBC_SHA:         {},
+	tls.TLS_RSA_WITH_AES_256_CBC_SHA:         {},
+	tls.TLS_RSA_WITH_AES_128_GCM_SHA256:      {},
+	tls.TLS_RSA_WITH_AES_256_GCM_SHA384:      {},
+	tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA: {},
+	tls.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA: {},
+	tls.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA:   {},
+	tls.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA:   {},
+}
+
 // NewTLS uses c to construct a TransportCredentials based on TLS.
 func NewTLS(c *tls.Config) TransportCredentials {
 	tc := &tlsCreds{credinternal.CloneTLSConfig(c)}
 	tc.config.NextProtos = credinternal.AppendH2ToNextProtos(tc.config.NextProtos)
+	// If the user did not configure a MinVersion and did not configure a
+	// MaxVersion < 1.2, use MinVersion=1.2, which is required by
+	// https://datatracker.ietf.org/doc/html/rfc7540#section-9.2
+	if tc.config.MinVersion == 0 && (tc.config.MaxVersion == 0 || tc.config.MaxVersion >= tls.VersionTLS12) {
+		tc.config.MinVersion = tls.VersionTLS12
+	}
+	// If the user did not configure CipherSuites, use all "secure" cipher
+	// suites reported by the TLS package, but remove some explicitly forbidden
+	// by https://datatracker.ietf.org/doc/html/rfc7540#appendix-A
+	if tc.config.CipherSuites == nil {
+		for _, cs := range tls.CipherSuites() {
+			if _, ok := tls12ForbiddenCipherSuites[cs.ID]; !ok {
+				tc.config.CipherSuites = append(tc.config.CipherSuites, cs.ID)
+			}
+		}
+	}
 	return tc
 }
 
@@ -205,32 +249,3 @@ type TLSChannelzSecurityValue struct {
 	LocalCertificate  []byte
 	RemoteCertificate []byte
 }
-
-var cipherSuiteLookup = map[uint16]string{
-	tls.TLS_RSA_WITH_RC4_128_SHA:                "TLS_RSA_WITH_RC4_128_SHA",
-	tls.TLS_RSA_WITH_3DES_EDE_CBC_SHA:           "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
-	tls.TLS_RSA_WITH_AES_128_CBC_SHA:            "TLS_RSA_WITH_AES_128_CBC_SHA",
-	tls.TLS_RSA_WITH_AES_256_CBC_SHA:            "TLS_RSA_WITH_AES_256_CBC_SHA",
-	tls.TLS_RSA_WITH_AES_128_GCM_SHA256:         "TLS_RSA_WITH_AES_128_GCM_SHA256",
-	tls.TLS_RSA_WITH_AES_256_GCM_SHA384:         "TLS_RSA_WITH_AES_256_GCM_SHA384",
-	tls.TLS_ECDHE_ECDSA_WITH_RC4_128_SHA:        "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
-	tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA:    "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
-	tls.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA:    "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
-	tls.TLS_ECDHE_RSA_WITH_RC4_128_SHA:          "TLS_ECDHE_RSA_WITH_RC4_128_SHA",
-	tls.TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA:     "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
-	tls.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA:      "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
-	tls.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA:      "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
-	tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256:   "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
-	tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
-	tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384:   "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
-	tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
-	tls.TLS_FALLBACK_SCSV:                       "TLS_FALLBACK_SCSV",
-	tls.TLS_RSA_WITH_AES_128_CBC_SHA256:         "TLS_RSA_WITH_AES_128_CBC_SHA256",
-	tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
-	tls.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256:   "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
-	tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305:    "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
-	tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:  "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
-	tls.TLS_AES_128_GCM_SHA256:                  "TLS_AES_128_GCM_SHA256",
-	tls.TLS_AES_256_GCM_SHA384:                  "TLS_AES_256_GCM_SHA384",
-	tls.TLS_CHACHA20_POLY1305_SHA256:            "TLS_CHACHA20_POLY1305_SHA256",
-}
diff --git a/vendor/google.golang.org/grpc/dialoptions.go b/vendor/google.golang.org/grpc/dialoptions.go
index 15a3d5102a9..ba242618040 100644
--- a/vendor/google.golang.org/grpc/dialoptions.go
+++ b/vendor/google.golang.org/grpc/dialoptions.go
@@ -46,6 +46,7 @@ func init() {
 	internal.WithBinaryLogger = withBinaryLogger
 	internal.JoinDialOptions = newJoinDialOption
 	internal.DisableGlobalDialOptions = newDisableGlobalDialOptions
+	internal.WithRecvBufferPool = withRecvBufferPool
 }
 
 // dialOptions configure a Dial call. dialOptions are set by the DialOption
@@ -63,7 +64,6 @@ type dialOptions struct {
 	block                       bool
 	returnLastError             bool
 	timeout                     time.Duration
-	scChan                      <-chan ServiceConfig
 	authority                   string
 	binaryLogger                binarylog.Logger
 	copts                       transport.ConnectOptions
@@ -78,6 +78,7 @@ type dialOptions struct {
 	defaultServiceConfigRawJSON *string
 	resolvers                   []resolver.Builder
 	idleTimeout                 time.Duration
+	recvBufferPool              SharedBufferPool
 }
 
 // DialOption configures how we set up the connection.
@@ -138,6 +139,20 @@ func newJoinDialOption(opts ...DialOption) DialOption {
 	return &joinDialOption{opts: opts}
 }
 
+// WithSharedWriteBuffer allows reusing per-connection transport write buffer.
+// If this option is set to true every connection will release the buffer after
+// flushing the data on the wire.
+//
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a
+// later release.
+func WithSharedWriteBuffer(val bool) DialOption {
+	return newFuncDialOption(func(o *dialOptions) {
+		o.copts.SharedWriteBuffer = val
+	})
+}
+
 // WithWriteBufferSize determines how much data can be batched before doing a
 // write on the wire. The corresponding memory allocation for this buffer will
 // be twice the size to keep syscalls low. The default value for this buffer is
@@ -235,19 +250,6 @@ func WithDecompressor(dc Decompressor) DialOption {
 	})
 }
 
-// WithServiceConfig returns a DialOption which has a channel to read the
-// service configuration.
-//
-// Deprecated: service config should be received through name resolver or via
-// WithDefaultServiceConfig, as specified at
-// https://github.com/grpc/grpc/blob/master/doc/service_config.md.  Will be
-// removed in a future 1.x release.
-func WithServiceConfig(c <-chan ServiceConfig) DialOption {
-	return newFuncDialOption(func(o *dialOptions) {
-		o.scChan = c
-	})
-}
-
 // WithConnectParams configures the ClientConn to use the provided ConnectParams
 // for creating and maintaining connections to servers.
 //
@@ -398,6 +400,17 @@ func WithTimeout(d time.Duration) DialOption {
 // connections. If FailOnNonTempDialError() is set to true, and an error is
 // returned by f, gRPC checks the error's Temporary() method to decide if it
 // should try to reconnect to the network address.
+//
+// Note: All supported releases of Go (as of December 2023) override the OS
+// defaults for TCP keepalive time and interval to 15s. To enable TCP keepalive
+// with OS defaults for keepalive time and interval, use a net.Dialer that sets
+// the KeepAlive field to a negative value, and sets the SO_KEEPALIVE socket
+// option to true from the Control field. For a concrete example of how to do
+// this, see internal.NetDialerWithTCPKeepalive().
+//
+// For more information, please see [issue 23459] in the Go github repo.
+//
+// [issue 23459]: https://github.com/golang/go/issues/23459
 func WithContextDialer(f func(context.Context, string) (net.Conn, error)) DialOption {
 	return newFuncDialOption(func(o *dialOptions) {
 		o.copts.Dialer = f
@@ -472,7 +485,7 @@ func FailOnNonTempDialError(f bool) DialOption {
 // the RPCs.
 func WithUserAgent(s string) DialOption {
 	return newFuncDialOption(func(o *dialOptions) {
-		o.copts.UserAgent = s
+		o.copts.UserAgent = s + " " + grpcUA
 	})
 }
 
@@ -622,12 +635,16 @@ func withHealthCheckFunc(f internal.HealthChecker) DialOption {
 
 func defaultDialOptions() dialOptions {
 	return dialOptions{
-		healthCheckFunc: internal.HealthCheckFunc,
 		copts: transport.ConnectOptions{
-			WriteBufferSize: defaultWriteBufSize,
 			ReadBufferSize:  defaultReadBufSize,
+			WriteBufferSize: defaultWriteBufSize,
 			UseProxy:        true,
+			UserAgent:       grpcUA,
 		},
+		bs:              internalbackoff.DefaultExponential,
+		healthCheckFunc: internal.HealthCheckFunc,
+		idleTimeout:     30 * time.Minute,
+		recvBufferPool:  nopBufferPool{},
 	}
 }
 
@@ -664,8 +681,8 @@ func WithResolvers(rs ...resolver.Builder) DialOption {
 // channel will exit idle mode when the Connect() method is called or when an
 // RPC is initiated.
 //
-// By default this feature is disabled, which can also be explicitly configured
-// by passing zero to this function.
+// A default timeout of 30 minutes will be used if this dial option is not set
+// at dial time and idleness can be disabled by passing a timeout of zero.
 //
 // # Experimental
 //
@@ -676,3 +693,26 @@ func WithIdleTimeout(d time.Duration) DialOption {
 		o.idleTimeout = d
 	})
 }
+
+// WithRecvBufferPool returns a DialOption that configures the ClientConn
+// to use the provided shared buffer pool for parsing incoming messages. Depending
+// on the application's workload, this could result in reduced memory allocation.
+//
+// If you are unsure about how to implement a memory pool but want to utilize one,
+// begin with grpc.NewSharedBufferPool.
+//
+// Note: The shared buffer pool feature will not be active if any of the following
+// options are used: WithStatsHandler, EnableTracing, or binary logging. In such
+// cases, the shared buffer pool will be ignored.
+//
+// Deprecated: use experimental.WithRecvBufferPool instead.  Will be deleted in
+// v1.60.0 or later.
+func WithRecvBufferPool(bufferPool SharedBufferPool) DialOption {
+	return withRecvBufferPool(bufferPool)
+}
+
+func withRecvBufferPool(bufferPool SharedBufferPool) DialOption {
+	return newFuncDialOption(func(o *dialOptions) {
+		o.recvBufferPool = bufferPool
+	})
+}
diff --git a/vendor/google.golang.org/grpc/encoding/encoding.go b/vendor/google.golang.org/grpc/encoding/encoding.go
index 07a5861352a..5ebf88d7147 100644
--- a/vendor/google.golang.org/grpc/encoding/encoding.go
+++ b/vendor/google.golang.org/grpc/encoding/encoding.go
@@ -38,6 +38,10 @@ const Identity = "identity"
 
 // Compressor is used for compressing and decompressing when sending or
 // receiving messages.
+//
+// If a Compressor implements `DecompressedSize(compressedBytes []byte) int`,
+// gRPC will invoke it to determine the size of the buffer allocated for the
+// result of decompression.  A return value of -1 indicates unknown size.
 type Compressor interface {
 	// Compress writes the data written to wc to w after compressing it.  If an
 	// error occurs while initializing the compressor, that error is returned
@@ -51,15 +55,6 @@ type Compressor interface {
 	// coding header.  The result must be static; the result cannot change
 	// between calls.
 	Name() string
-	// If a Compressor implements
-	// DecompressedSize(compressedBytes []byte) int, gRPC will call it
-	// to determine the size of the buffer allocated for the result of decompression.
-	// Return -1 to indicate unknown size.
-	//
-	// Experimental
-	//
-	// Notice: This API is EXPERIMENTAL and may be changed or removed in a
-	// later release.
 }
 
 var registeredCompressor = make(map[string]Compressor)
@@ -90,9 +85,9 @@ func GetCompressor(name string) Compressor {
 // methods can be called from concurrent goroutines.
 type Codec interface {
 	// Marshal returns the wire format of v.
-	Marshal(v interface{}) ([]byte, error)
+	Marshal(v any) ([]byte, error)
 	// Unmarshal parses the wire format into v.
-	Unmarshal(data []byte, v interface{}) error
+	Unmarshal(data []byte, v any) error
 	// Name returns the name of the Codec implementation. The returned string
 	// will be used as part of content type in transmission.  The result must be
 	// static; the result cannot change between calls.
diff --git a/vendor/google.golang.org/grpc/encoding/proto/proto.go b/vendor/google.golang.org/grpc/encoding/proto/proto.go
index 3009b35afe7..0ee3d3bae97 100644
--- a/vendor/google.golang.org/grpc/encoding/proto/proto.go
+++ b/vendor/google.golang.org/grpc/encoding/proto/proto.go
@@ -37,7 +37,7 @@ func init() {
 // codec is a Codec implementation with protobuf. It is the default codec for gRPC.
 type codec struct{}
 
-func (codec) Marshal(v interface{}) ([]byte, error) {
+func (codec) Marshal(v any) ([]byte, error) {
 	vv, ok := v.(proto.Message)
 	if !ok {
 		return nil, fmt.Errorf("failed to marshal, message is %T, want proto.Message", v)
@@ -45,7 +45,7 @@ func (codec) Marshal(v interface{}) ([]byte, error) {
 	return proto.Marshal(vv)
 }
 
-func (codec) Unmarshal(data []byte, v interface{}) error {
+func (codec) Unmarshal(data []byte, v any) error {
 	vv, ok := v.(proto.Message)
 	if !ok {
 		return fmt.Errorf("failed to unmarshal, message is %T, want proto.Message", v)
diff --git a/vendor/google.golang.org/grpc/grpclog/component.go b/vendor/google.golang.org/grpc/grpclog/component.go
index 8358dd6e2ab..ac73c9ced25 100644
--- a/vendor/google.golang.org/grpc/grpclog/component.go
+++ b/vendor/google.golang.org/grpc/grpclog/component.go
@@ -31,71 +31,71 @@ type componentData struct {
 
 var cache = map[string]*componentData{}
 
-func (c *componentData) InfoDepth(depth int, args ...interface{}) {
-	args = append([]interface{}{"[" + string(c.name) + "]"}, args...)
+func (c *componentData) InfoDepth(depth int, args ...any) {
+	args = append([]any{"[" + string(c.name) + "]"}, args...)
 	grpclog.InfoDepth(depth+1, args...)
 }
 
-func (c *componentData) WarningDepth(depth int, args ...interface{}) {
-	args = append([]interface{}{"[" + string(c.name) + "]"}, args...)
+func (c *componentData) WarningDepth(depth int, args ...any) {
+	args = append([]any{"[" + string(c.name) + "]"}, args...)
 	grpclog.WarningDepth(depth+1, args...)
 }
 
-func (c *componentData) ErrorDepth(depth int, args ...interface{}) {
-	args = append([]interface{}{"[" + string(c.name) + "]"}, args...)
+func (c *componentData) ErrorDepth(depth int, args ...any) {
+	args = append([]any{"[" + string(c.name) + "]"}, args...)
 	grpclog.ErrorDepth(depth+1, args...)
 }
 
-func (c *componentData) FatalDepth(depth int, args ...interface{}) {
-	args = append([]interface{}{"[" + string(c.name) + "]"}, args...)
+func (c *componentData) FatalDepth(depth int, args ...any) {
+	args = append([]any{"[" + string(c.name) + "]"}, args...)
 	grpclog.FatalDepth(depth+1, args...)
 }
 
-func (c *componentData) Info(args ...interface{}) {
+func (c *componentData) Info(args ...any) {
 	c.InfoDepth(1, args...)
 }
 
-func (c *componentData) Warning(args ...interface{}) {
+func (c *componentData) Warning(args ...any) {
 	c.WarningDepth(1, args...)
 }
 
-func (c *componentData) Error(args ...interface{}) {
+func (c *componentData) Error(args ...any) {
 	c.ErrorDepth(1, args...)
 }
 
-func (c *componentData) Fatal(args ...interface{}) {
+func (c *componentData) Fatal(args ...any) {
 	c.FatalDepth(1, args...)
 }
 
-func (c *componentData) Infof(format string, args ...interface{}) {
+func (c *componentData) Infof(format string, args ...any) {
 	c.InfoDepth(1, fmt.Sprintf(format, args...))
 }
 
-func (c *componentData) Warningf(format string, args ...interface{}) {
+func (c *componentData) Warningf(format string, args ...any) {
 	c.WarningDepth(1, fmt.Sprintf(format, args...))
 }
 
-func (c *componentData) Errorf(format string, args ...interface{}) {
+func (c *componentData) Errorf(format string, args ...any) {
 	c.ErrorDepth(1, fmt.Sprintf(format, args...))
 }
 
-func (c *componentData) Fatalf(format string, args ...interface{}) {
+func (c *componentData) Fatalf(format string, args ...any) {
 	c.FatalDepth(1, fmt.Sprintf(format, args...))
 }
 
-func (c *componentData) Infoln(args ...interface{}) {
+func (c *componentData) Infoln(args ...any) {
 	c.InfoDepth(1, args...)
 }
 
-func (c *componentData) Warningln(args ...interface{}) {
+func (c *componentData) Warningln(args ...any) {
 	c.WarningDepth(1, args...)
 }
 
-func (c *componentData) Errorln(args ...interface{}) {
+func (c *componentData) Errorln(args ...any) {
 	c.ErrorDepth(1, args...)
 }
 
-func (c *componentData) Fatalln(args ...interface{}) {
+func (c *componentData) Fatalln(args ...any) {
 	c.FatalDepth(1, args...)
 }
 
diff --git a/vendor/google.golang.org/grpc/grpclog/grpclog.go b/vendor/google.golang.org/grpc/grpclog/grpclog.go
index c8bb2be34bf..16928c9cb99 100644
--- a/vendor/google.golang.org/grpc/grpclog/grpclog.go
+++ b/vendor/google.golang.org/grpc/grpclog/grpclog.go
@@ -42,53 +42,53 @@ func V(l int) bool {
 }
 
 // Info logs to the INFO log.
-func Info(args ...interface{}) {
+func Info(args ...any) {
 	grpclog.Logger.Info(args...)
 }
 
 // Infof logs to the INFO log. Arguments are handled in the manner of fmt.Printf.
-func Infof(format string, args ...interface{}) {
+func Infof(format string, args ...any) {
 	grpclog.Logger.Infof(format, args...)
 }
 
 // Infoln logs to the INFO log. Arguments are handled in the manner of fmt.Println.
-func Infoln(args ...interface{}) {
+func Infoln(args ...any) {
 	grpclog.Logger.Infoln(args...)
 }
 
 // Warning logs to the WARNING log.
-func Warning(args ...interface{}) {
+func Warning(args ...any) {
 	grpclog.Logger.Warning(args...)
 }
 
 // Warningf logs to the WARNING log. Arguments are handled in the manner of fmt.Printf.
-func Warningf(format string, args ...interface{}) {
+func Warningf(format string, args ...any) {
 	grpclog.Logger.Warningf(format, args...)
 }
 
 // Warningln logs to the WARNING log. Arguments are handled in the manner of fmt.Println.
-func Warningln(args ...interface{}) {
+func Warningln(args ...any) {
 	grpclog.Logger.Warningln(args...)
 }
 
 // Error logs to the ERROR log.
-func Error(args ...interface{}) {
+func Error(args ...any) {
 	grpclog.Logger.Error(args...)
 }
 
 // Errorf logs to the ERROR log. Arguments are handled in the manner of fmt.Printf.
-func Errorf(format string, args ...interface{}) {
+func Errorf(format string, args ...any) {
 	grpclog.Logger.Errorf(format, args...)
 }
 
 // Errorln logs to the ERROR log. Arguments are handled in the manner of fmt.Println.
-func Errorln(args ...interface{}) {
+func Errorln(args ...any) {
 	grpclog.Logger.Errorln(args...)
 }
 
 // Fatal logs to the FATAL log. Arguments are handled in the manner of fmt.Print.
 // It calls os.Exit() with exit code 1.
-func Fatal(args ...interface{}) {
+func Fatal(args ...any) {
 	grpclog.Logger.Fatal(args...)
 	// Make sure fatal logs will exit.
 	os.Exit(1)
@@ -96,7 +96,7 @@ func Fatal(args ...interface{}) {
 
 // Fatalf logs to the FATAL log. Arguments are handled in the manner of fmt.Printf.
 // It calls os.Exit() with exit code 1.
-func Fatalf(format string, args ...interface{}) {
+func Fatalf(format string, args ...any) {
 	grpclog.Logger.Fatalf(format, args...)
 	// Make sure fatal logs will exit.
 	os.Exit(1)
@@ -104,7 +104,7 @@ func Fatalf(format string, args ...interface{}) {
 
 // Fatalln logs to the FATAL log. Arguments are handled in the manner of fmt.Println.
 // It calle os.Exit()) with exit code 1.
-func Fatalln(args ...interface{}) {
+func Fatalln(args ...any) {
 	grpclog.Logger.Fatalln(args...)
 	// Make sure fatal logs will exit.
 	os.Exit(1)
@@ -113,20 +113,20 @@ func Fatalln(args ...interface{}) {
 // Print prints to the logger. Arguments are handled in the manner of fmt.Print.
 //
 // Deprecated: use Info.
-func Print(args ...interface{}) {
+func Print(args ...any) {
 	grpclog.Logger.Info(args...)
 }
 
 // Printf prints to the logger. Arguments are handled in the manner of fmt.Printf.
 //
 // Deprecated: use Infof.
-func Printf(format string, args ...interface{}) {
+func Printf(format string, args ...any) {
 	grpclog.Logger.Infof(format, args...)
 }
 
 // Println prints to the logger. Arguments are handled in the manner of fmt.Println.
 //
 // Deprecated: use Infoln.
-func Println(args ...interface{}) {
+func Println(args ...any) {
 	grpclog.Logger.Infoln(args...)
 }
diff --git a/vendor/google.golang.org/grpc/grpclog/logger.go b/vendor/google.golang.org/grpc/grpclog/logger.go
index ef06a4822b7..b1674d8267c 100644
--- a/vendor/google.golang.org/grpc/grpclog/logger.go
+++ b/vendor/google.golang.org/grpc/grpclog/logger.go
@@ -24,12 +24,12 @@ import "google.golang.org/grpc/internal/grpclog"
 //
 // Deprecated: use LoggerV2.
 type Logger interface {
-	Fatal(args ...interface{})
-	Fatalf(format string, args ...interface{})
-	Fatalln(args ...interface{})
-	Print(args ...interface{})
-	Printf(format string, args ...interface{})
-	Println(args ...interface{})
+	Fatal(args ...any)
+	Fatalf(format string, args ...any)
+	Fatalln(args ...any)
+	Print(args ...any)
+	Printf(format string, args ...any)
+	Println(args ...any)
 }
 
 // SetLogger sets the logger that is used in grpc. Call only from
@@ -45,39 +45,39 @@ type loggerWrapper struct {
 	Logger
 }
 
-func (g *loggerWrapper) Info(args ...interface{}) {
+func (g *loggerWrapper) Info(args ...any) {
 	g.Logger.Print(args...)
 }
 
-func (g *loggerWrapper) Infoln(args ...interface{}) {
+func (g *loggerWrapper) Infoln(args ...any) {
 	g.Logger.Println(args...)
 }
 
-func (g *loggerWrapper) Infof(format string, args ...interface{}) {
+func (g *loggerWrapper) Infof(format string, args ...any) {
 	g.Logger.Printf(format, args...)
 }
 
-func (g *loggerWrapper) Warning(args ...interface{}) {
+func (g *loggerWrapper) Warning(args ...any) {
 	g.Logger.Print(args...)
 }
 
-func (g *loggerWrapper) Warningln(args ...interface{}) {
+func (g *loggerWrapper) Warningln(args ...any) {
 	g.Logger.Println(args...)
 }
 
-func (g *loggerWrapper) Warningf(format string, args ...interface{}) {
+func (g *loggerWrapper) Warningf(format string, args ...any) {
 	g.Logger.Printf(format, args...)
 }
 
-func (g *loggerWrapper) Error(args ...interface{}) {
+func (g *loggerWrapper) Error(args ...any) {
 	g.Logger.Print(args...)
 }
 
-func (g *loggerWrapper) Errorln(args ...interface{}) {
+func (g *loggerWrapper) Errorln(args ...any) {
 	g.Logger.Println(args...)
 }
 
-func (g *loggerWrapper) Errorf(format string, args ...interface{}) {
+func (g *loggerWrapper) Errorf(format string, args ...any) {
 	g.Logger.Printf(format, args...)
 }
 
diff --git a/vendor/google.golang.org/grpc/grpclog/loggerv2.go b/vendor/google.golang.org/grpc/grpclog/loggerv2.go
index 5de66e40d36..ecfd36d7130 100644
--- a/vendor/google.golang.org/grpc/grpclog/loggerv2.go
+++ b/vendor/google.golang.org/grpc/grpclog/loggerv2.go
@@ -33,35 +33,35 @@ import (
 // LoggerV2 does underlying logging work for grpclog.
 type LoggerV2 interface {
 	// Info logs to INFO log. Arguments are handled in the manner of fmt.Print.
-	Info(args ...interface{})
+	Info(args ...any)
 	// Infoln logs to INFO log. Arguments are handled in the manner of fmt.Println.
-	Infoln(args ...interface{})
+	Infoln(args ...any)
 	// Infof logs to INFO log. Arguments are handled in the manner of fmt.Printf.
-	Infof(format string, args ...interface{})
+	Infof(format string, args ...any)
 	// Warning logs to WARNING log. Arguments are handled in the manner of fmt.Print.
-	Warning(args ...interface{})
+	Warning(args ...any)
 	// Warningln logs to WARNING log. Arguments are handled in the manner of fmt.Println.
-	Warningln(args ...interface{})
+	Warningln(args ...any)
 	// Warningf logs to WARNING log. Arguments are handled in the manner of fmt.Printf.
-	Warningf(format string, args ...interface{})
+	Warningf(format string, args ...any)
 	// Error logs to ERROR log. Arguments are handled in the manner of fmt.Print.
-	Error(args ...interface{})
+	Error(args ...any)
 	// Errorln logs to ERROR log. Arguments are handled in the manner of fmt.Println.
-	Errorln(args ...interface{})
+	Errorln(args ...any)
 	// Errorf logs to ERROR log. Arguments are handled in the manner of fmt.Printf.
-	Errorf(format string, args ...interface{})
+	Errorf(format string, args ...any)
 	// Fatal logs to ERROR log. Arguments are handled in the manner of fmt.Print.
 	// gRPC ensures that all Fatal logs will exit with os.Exit(1).
 	// Implementations may also call os.Exit() with a non-zero exit code.
-	Fatal(args ...interface{})
+	Fatal(args ...any)
 	// Fatalln logs to ERROR log. Arguments are handled in the manner of fmt.Println.
 	// gRPC ensures that all Fatal logs will exit with os.Exit(1).
 	// Implementations may also call os.Exit() with a non-zero exit code.
-	Fatalln(args ...interface{})
+	Fatalln(args ...any)
 	// Fatalf logs to ERROR log. Arguments are handled in the manner of fmt.Printf.
 	// gRPC ensures that all Fatal logs will exit with os.Exit(1).
 	// Implementations may also call os.Exit() with a non-zero exit code.
-	Fatalf(format string, args ...interface{})
+	Fatalf(format string, args ...any)
 	// V reports whether verbosity level l is at least the requested verbose level.
 	V(l int) bool
 }
@@ -182,53 +182,53 @@ func (g *loggerT) output(severity int, s string) {
 	g.m[severity].Output(2, string(b))
 }
 
-func (g *loggerT) Info(args ...interface{}) {
+func (g *loggerT) Info(args ...any) {
 	g.output(infoLog, fmt.Sprint(args...))
 }
 
-func (g *loggerT) Infoln(args ...interface{}) {
+func (g *loggerT) Infoln(args ...any) {
 	g.output(infoLog, fmt.Sprintln(args...))
 }
 
-func (g *loggerT) Infof(format string, args ...interface{}) {
+func (g *loggerT) Infof(format string, args ...any) {
 	g.output(infoLog, fmt.Sprintf(format, args...))
 }
 
-func (g *loggerT) Warning(args ...interface{}) {
+func (g *loggerT) Warning(args ...any) {
 	g.output(warningLog, fmt.Sprint(args...))
 }
 
-func (g *loggerT) Warningln(args ...interface{}) {
+func (g *loggerT) Warningln(args ...any) {
 	g.output(warningLog, fmt.Sprintln(args...))
 }
 
-func (g *loggerT) Warningf(format string, args ...interface{}) {
+func (g *loggerT) Warningf(format string, args ...any) {
 	g.output(warningLog, fmt.Sprintf(format, args...))
 }
 
-func (g *loggerT) Error(args ...interface{}) {
+func (g *loggerT) Error(args ...any) {
 	g.output(errorLog, fmt.Sprint(args...))
 }
 
-func (g *loggerT) Errorln(args ...interface{}) {
+func (g *loggerT) Errorln(args ...any) {
 	g.output(errorLog, fmt.Sprintln(args...))
 }
 
-func (g *loggerT) Errorf(format string, args ...interface{}) {
+func (g *loggerT) Errorf(format string, args ...any) {
 	g.output(errorLog, fmt.Sprintf(format, args...))
 }
 
-func (g *loggerT) Fatal(args ...interface{}) {
+func (g *loggerT) Fatal(args ...any) {
 	g.output(fatalLog, fmt.Sprint(args...))
 	os.Exit(1)
 }
 
-func (g *loggerT) Fatalln(args ...interface{}) {
+func (g *loggerT) Fatalln(args ...any) {
 	g.output(fatalLog, fmt.Sprintln(args...))
 	os.Exit(1)
 }
 
-func (g *loggerT) Fatalf(format string, args ...interface{}) {
+func (g *loggerT) Fatalf(format string, args ...any) {
 	g.output(fatalLog, fmt.Sprintf(format, args...))
 	os.Exit(1)
 }
@@ -248,11 +248,11 @@ func (g *loggerT) V(l int) bool {
 type DepthLoggerV2 interface {
 	LoggerV2
 	// InfoDepth logs to INFO log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	InfoDepth(depth int, args ...interface{})
+	InfoDepth(depth int, args ...any)
 	// WarningDepth logs to WARNING log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	WarningDepth(depth int, args ...interface{})
+	WarningDepth(depth int, args ...any)
 	// ErrorDepth logs to ERROR log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	ErrorDepth(depth int, args ...interface{})
+	ErrorDepth(depth int, args ...any)
 	// FatalDepth logs to FATAL log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	FatalDepth(depth int, args ...interface{})
+	FatalDepth(depth int, args ...any)
 }
diff --git a/vendor/google.golang.org/grpc/health/client.go b/vendor/google.golang.org/grpc/health/client.go
index b5bee483802..740745c45f6 100644
--- a/vendor/google.golang.org/grpc/health/client.go
+++ b/vendor/google.golang.org/grpc/health/client.go
@@ -56,7 +56,7 @@ const healthCheckMethod = "/grpc.health.v1.Health/Watch"
 
 // This function implements the protocol defined at:
 // https://github.com/grpc/grpc/blob/master/doc/health-checking.md
-func clientHealthCheck(ctx context.Context, newStream func(string) (interface{}, error), setConnectivityState func(connectivity.State, error), service string) error {
+func clientHealthCheck(ctx context.Context, newStream func(string) (any, error), setConnectivityState func(connectivity.State, error), service string) error {
 	tryCnt := 0
 
 retryConnection:
diff --git a/vendor/google.golang.org/grpc/health/grpc_health_v1/health.pb.go b/vendor/google.golang.org/grpc/health/grpc_health_v1/health.pb.go
index 142d35f753e..24299efd63f 100644
--- a/vendor/google.golang.org/grpc/health/grpc_health_v1/health.pb.go
+++ b/vendor/google.golang.org/grpc/health/grpc_health_v1/health.pb.go
@@ -17,7 +17,7 @@
 
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.30.0
+// 	protoc-gen-go v1.31.0
 // 	protoc        v4.22.0
 // source: grpc/health/v1/health.proto
 
diff --git a/vendor/google.golang.org/grpc/health/grpc_health_v1/health_grpc.pb.go b/vendor/google.golang.org/grpc/health/grpc_health_v1/health_grpc.pb.go
index a01a1b4d54b..4439cda0f3c 100644
--- a/vendor/google.golang.org/grpc/health/grpc_health_v1/health_grpc.pb.go
+++ b/vendor/google.golang.org/grpc/health/grpc_health_v1/health_grpc.pb.go
@@ -44,8 +44,15 @@ const (
 //
 // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
 type HealthClient interface {
-	// If the requested service is unknown, the call will fail with status
-	// NOT_FOUND.
+	// Check gets the health of the specified service. If the requested service
+	// is unknown, the call will fail with status NOT_FOUND. If the caller does
+	// not specify a service name, the server should respond with its overall
+	// health status.
+	//
+	// Clients should set a deadline when calling Check, and can declare the
+	// server unhealthy if they do not receive a timely response.
+	//
+	// Check implementations should be idempotent and side effect free.
 	Check(ctx context.Context, in *HealthCheckRequest, opts ...grpc.CallOption) (*HealthCheckResponse, error)
 	// Performs a watch for the serving status of the requested service.
 	// The server will immediately send back a message indicating the current
@@ -118,8 +125,15 @@ func (x *healthWatchClient) Recv() (*HealthCheckResponse, error) {
 // All implementations should embed UnimplementedHealthServer
 // for forward compatibility
 type HealthServer interface {
-	// If the requested service is unknown, the call will fail with status
-	// NOT_FOUND.
+	// Check gets the health of the specified service. If the requested service
+	// is unknown, the call will fail with status NOT_FOUND. If the caller does
+	// not specify a service name, the server should respond with its overall
+	// health status.
+	//
+	// Clients should set a deadline when calling Check, and can declare the
+	// server unhealthy if they do not receive a timely response.
+	//
+	// Check implementations should be idempotent and side effect free.
 	Check(context.Context, *HealthCheckRequest) (*HealthCheckResponse, error)
 	// Performs a watch for the serving status of the requested service.
 	// The server will immediately send back a message indicating the current
diff --git a/vendor/google.golang.org/grpc/idle.go b/vendor/google.golang.org/grpc/idle.go
deleted file mode 100644
index dc3dc72f6b0..00000000000
--- a/vendor/google.golang.org/grpc/idle.go
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- *
- * Copyright 2023 gRPC authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-
-package grpc
-
-import (
-	"fmt"
-	"math"
-	"sync"
-	"sync/atomic"
-	"time"
-)
-
-// For overriding in unit tests.
-var timeAfterFunc = func(d time.Duration, f func()) *time.Timer {
-	return time.AfterFunc(d, f)
-}
-
-// idlenessEnforcer is the functionality provided by grpc.ClientConn to enter
-// and exit from idle mode.
-type idlenessEnforcer interface {
-	exitIdleMode() error
-	enterIdleMode() error
-}
-
-// idlenessManager defines the functionality required to track RPC activity on a
-// channel.
-type idlenessManager interface {
-	onCallBegin() error
-	onCallEnd()
-	close()
-}
-
-type noopIdlenessManager struct{}
-
-func (noopIdlenessManager) onCallBegin() error { return nil }
-func (noopIdlenessManager) onCallEnd()         {}
-func (noopIdlenessManager) close()             {}
-
-// idlenessManagerImpl implements the idlenessManager interface. It uses atomic
-// operations to synchronize access to shared state and a mutex to guarantee
-// mutual exclusion in a critical section.
-type idlenessManagerImpl struct {
-	// State accessed atomically.
-	lastCallEndTime           int64 // Unix timestamp in nanos; time when the most recent RPC completed.
-	activeCallsCount          int32 // Count of active RPCs; -math.MaxInt32 means channel is idle or is trying to get there.
-	activeSinceLastTimerCheck int32 // Boolean; True if there was an RPC since the last timer callback.
-	closed                    int32 // Boolean; True when the manager is closed.
-
-	// Can be accessed without atomics or mutex since these are set at creation
-	// time and read-only after that.
-	enforcer idlenessEnforcer // Functionality provided by grpc.ClientConn.
-	timeout  int64            // Idle timeout duration nanos stored as an int64.
-
-	// idleMu is used to guarantee mutual exclusion in two scenarios:
-	// - Opposing intentions:
-	//   - a: Idle timeout has fired and handleIdleTimeout() is trying to put
-	//     the channel in idle mode because the channel has been inactive.
-	//   - b: At the same time an RPC is made on the channel, and onCallBegin()
-	//     is trying to prevent the channel from going idle.
-	// - Competing intentions:
-	//   - The channel is in idle mode and there are multiple RPCs starting at
-	//     the same time, all trying to move the channel out of idle. Only one
-	//     of them should succeed in doing so, while the other RPCs should
-	//     piggyback on the first one and be successfully handled.
-	idleMu       sync.RWMutex
-	actuallyIdle bool
-	timer        *time.Timer
-}
-
-// newIdlenessManager creates a new idleness manager implementation for the
-// given idle timeout.
-func newIdlenessManager(enforcer idlenessEnforcer, idleTimeout time.Duration) idlenessManager {
-	if idleTimeout == 0 {
-		return noopIdlenessManager{}
-	}
-
-	i := &idlenessManagerImpl{
-		enforcer: enforcer,
-		timeout:  int64(idleTimeout),
-	}
-	i.timer = timeAfterFunc(idleTimeout, i.handleIdleTimeout)
-	return i
-}
-
-// resetIdleTimer resets the idle timer to the given duration. This method
-// should only be called from the timer callback.
-func (i *idlenessManagerImpl) resetIdleTimer(d time.Duration) {
-	i.idleMu.Lock()
-	defer i.idleMu.Unlock()
-
-	if i.timer == nil {
-		// Only close sets timer to nil. We are done.
-		return
-	}
-
-	// It is safe to ignore the return value from Reset() because this method is
-	// only ever called from the timer callback, which means the timer has
-	// already fired.
-	i.timer.Reset(d)
-}
-
-// handleIdleTimeout is the timer callback that is invoked upon expiry of the
-// configured idle timeout. The channel is considered inactive if there are no
-// ongoing calls and no RPC activity since the last time the timer fired.
-func (i *idlenessManagerImpl) handleIdleTimeout() {
-	if i.isClosed() {
-		return
-	}
-
-	if atomic.LoadInt32(&i.activeCallsCount) > 0 {
-		i.resetIdleTimer(time.Duration(i.timeout))
-		return
-	}
-
-	// There has been activity on the channel since we last got here. Reset the
-	// timer and return.
-	if atomic.LoadInt32(&i.activeSinceLastTimerCheck) == 1 {
-		// Set the timer to fire after a duration of idle timeout, calculated
-		// from the time the most recent RPC completed.
-		atomic.StoreInt32(&i.activeSinceLastTimerCheck, 0)
-		i.resetIdleTimer(time.Duration(atomic.LoadInt64(&i.lastCallEndTime) + i.timeout - time.Now().UnixNano()))
-		return
-	}
-
-	// This CAS operation is extremely likely to succeed given that there has
-	// been no activity since the last time we were here.  Setting the
-	// activeCallsCount to -math.MaxInt32 indicates to onCallBegin() that the
-	// channel is either in idle mode or is trying to get there.
-	if !atomic.CompareAndSwapInt32(&i.activeCallsCount, 0, -math.MaxInt32) {
-		// This CAS operation can fail if an RPC started after we checked for
-		// activity at the top of this method, or one was ongoing from before
-		// the last time we were here. In both case, reset the timer and return.
-		i.resetIdleTimer(time.Duration(i.timeout))
-		return
-	}
-
-	// Now that we've set the active calls count to -math.MaxInt32, it's time to
-	// actually move to idle mode.
-	if i.tryEnterIdleMode() {
-		// Successfully entered idle mode. No timer needed until we exit idle.
-		return
-	}
-
-	// Failed to enter idle mode due to a concurrent RPC that kept the channel
-	// active, or because of an error from the channel. Undo the attempt to
-	// enter idle, and reset the timer to try again later.
-	atomic.AddInt32(&i.activeCallsCount, math.MaxInt32)
-	i.resetIdleTimer(time.Duration(i.timeout))
-}
-
-// tryEnterIdleMode instructs the channel to enter idle mode. But before
-// that, it performs a last minute check to ensure that no new RPC has come in,
-// making the channel active.
-//
-// Return value indicates whether or not the channel moved to idle mode.
-//
-// Holds idleMu which ensures mutual exclusion with exitIdleMode.
-func (i *idlenessManagerImpl) tryEnterIdleMode() bool {
-	i.idleMu.Lock()
-	defer i.idleMu.Unlock()
-
-	if atomic.LoadInt32(&i.activeCallsCount) != -math.MaxInt32 {
-		// We raced and lost to a new RPC. Very rare, but stop entering idle.
-		return false
-	}
-	if atomic.LoadInt32(&i.activeSinceLastTimerCheck) == 1 {
-		// An very short RPC could have come in (and also finished) after we
-		// checked for calls count and activity in handleIdleTimeout(), but
-		// before the CAS operation. So, we need to check for activity again.
-		return false
-	}
-
-	// No new RPCs have come in since we last set the active calls count value
-	// -math.MaxInt32 in the timer callback. And since we have the lock, it is
-	// safe to enter idle mode now.
-	if err := i.enforcer.enterIdleMode(); err != nil {
-		logger.Errorf("Failed to enter idle mode: %v", err)
-		return false
-	}
-
-	// Successfully entered idle mode.
-	i.actuallyIdle = true
-	return true
-}
-
-// onCallBegin is invoked at the start of every RPC.
-func (i *idlenessManagerImpl) onCallBegin() error {
-	if i.isClosed() {
-		return nil
-	}
-
-	if atomic.AddInt32(&i.activeCallsCount, 1) > 0 {
-		// Channel is not idle now. Set the activity bit and allow the call.
-		atomic.StoreInt32(&i.activeSinceLastTimerCheck, 1)
-		return nil
-	}
-
-	// Channel is either in idle mode or is in the process of moving to idle
-	// mode. Attempt to exit idle mode to allow this RPC.
-	if err := i.exitIdleMode(); err != nil {
-		// Undo the increment to calls count, and return an error causing the
-		// RPC to fail.
-		atomic.AddInt32(&i.activeCallsCount, -1)
-		return err
-	}
-
-	atomic.StoreInt32(&i.activeSinceLastTimerCheck, 1)
-	return nil
-}
-
-// exitIdleMode instructs the channel to exit idle mode.
-//
-// Holds idleMu which ensures mutual exclusion with tryEnterIdleMode.
-func (i *idlenessManagerImpl) exitIdleMode() error {
-	i.idleMu.Lock()
-	defer i.idleMu.Unlock()
-
-	if !i.actuallyIdle {
-		// This can happen in two scenarios:
-		// - handleIdleTimeout() set the calls count to -math.MaxInt32 and called
-		//   tryEnterIdleMode(). But before the latter could grab the lock, an RPC
-		//   came in and onCallBegin() noticed that the calls count is negative.
-		// - Channel is in idle mode, and multiple new RPCs come in at the same
-		//   time, all of them notice a negative calls count in onCallBegin and get
-		//   here. The first one to get the lock would got the channel to exit idle.
-		//
-		// Either way, nothing to do here.
-		return nil
-	}
-
-	if err := i.enforcer.exitIdleMode(); err != nil {
-		return fmt.Errorf("channel failed to exit idle mode: %v", err)
-	}
-
-	// Undo the idle entry process. This also respects any new RPC attempts.
-	atomic.AddInt32(&i.activeCallsCount, math.MaxInt32)
-	i.actuallyIdle = false
-
-	// Start a new timer to fire after the configured idle timeout.
-	i.timer = timeAfterFunc(time.Duration(i.timeout), i.handleIdleTimeout)
-	return nil
-}
-
-// onCallEnd is invoked at the end of every RPC.
-func (i *idlenessManagerImpl) onCallEnd() {
-	if i.isClosed() {
-		return
-	}
-
-	// Record the time at which the most recent call finished.
-	atomic.StoreInt64(&i.lastCallEndTime, time.Now().UnixNano())
-
-	// Decrement the active calls count. This count can temporarily go negative
-	// when the timer callback is in the process of moving the channel to idle
-	// mode, but one or more RPCs come in and complete before the timer callback
-	// can get done with the process of moving to idle mode.
-	atomic.AddInt32(&i.activeCallsCount, -1)
-}
-
-func (i *idlenessManagerImpl) isClosed() bool {
-	return atomic.LoadInt32(&i.closed) == 1
-}
-
-func (i *idlenessManagerImpl) close() {
-	atomic.StoreInt32(&i.closed, 1)
-
-	i.idleMu.Lock()
-	i.timer.Stop()
-	i.timer = nil
-	i.idleMu.Unlock()
-}
diff --git a/vendor/google.golang.org/grpc/interceptor.go b/vendor/google.golang.org/grpc/interceptor.go
index bb96ef57be8..877d78fc3d0 100644
--- a/vendor/google.golang.org/grpc/interceptor.go
+++ b/vendor/google.golang.org/grpc/interceptor.go
@@ -23,7 +23,7 @@ import (
 )
 
 // UnaryInvoker is called by UnaryClientInterceptor to complete RPCs.
-type UnaryInvoker func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error
+type UnaryInvoker func(ctx context.Context, method string, req, reply any, cc *ClientConn, opts ...CallOption) error
 
 // UnaryClientInterceptor intercepts the execution of a unary RPC on the client.
 // Unary interceptors can be specified as a DialOption, using
@@ -40,7 +40,7 @@ type UnaryInvoker func(ctx context.Context, method string, req, reply interface{
 // defaults from the ClientConn as well as per-call options.
 //
 // The returned error must be compatible with the status package.
-type UnaryClientInterceptor func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error
+type UnaryClientInterceptor func(ctx context.Context, method string, req, reply any, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error
 
 // Streamer is called by StreamClientInterceptor to create a ClientStream.
 type Streamer func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error)
@@ -66,7 +66,7 @@ type StreamClientInterceptor func(ctx context.Context, desc *StreamDesc, cc *Cli
 // server side. All per-rpc information may be mutated by the interceptor.
 type UnaryServerInfo struct {
 	// Server is the service implementation the user provides. This is read-only.
-	Server interface{}
+	Server any
 	// FullMethod is the full RPC method string, i.e., /package.service/method.
 	FullMethod string
 }
@@ -78,13 +78,13 @@ type UnaryServerInfo struct {
 // status package, or be one of the context errors. Otherwise, gRPC will use
 // codes.Unknown as the status code and err.Error() as the status message of the
 // RPC.
-type UnaryHandler func(ctx context.Context, req interface{}) (interface{}, error)
+type UnaryHandler func(ctx context.Context, req any) (any, error)
 
 // UnaryServerInterceptor provides a hook to intercept the execution of a unary RPC on the server. info
 // contains all the information of this RPC the interceptor can operate on. And handler is the wrapper
 // of the service method implementation. It is the responsibility of the interceptor to invoke handler
 // to complete the RPC.
-type UnaryServerInterceptor func(ctx context.Context, req interface{}, info *UnaryServerInfo, handler UnaryHandler) (resp interface{}, err error)
+type UnaryServerInterceptor func(ctx context.Context, req any, info *UnaryServerInfo, handler UnaryHandler) (resp any, err error)
 
 // StreamServerInfo consists of various information about a streaming RPC on
 // server side. All per-rpc information may be mutated by the interceptor.
@@ -101,4 +101,4 @@ type StreamServerInfo struct {
 // info contains all the information of this RPC the interceptor can operate on. And handler is the
 // service method implementation. It is the responsibility of the interceptor to invoke handler to
 // complete the RPC.
-type StreamServerInterceptor func(srv interface{}, ss ServerStream, info *StreamServerInfo, handler StreamHandler) error
+type StreamServerInterceptor func(srv any, ss ServerStream, info *StreamServerInfo, handler StreamHandler) error
diff --git a/vendor/google.golang.org/grpc/internal/backoff/backoff.go b/vendor/google.golang.org/grpc/internal/backoff/backoff.go
index 5fc0ee3da53..fed1c011a32 100644
--- a/vendor/google.golang.org/grpc/internal/backoff/backoff.go
+++ b/vendor/google.golang.org/grpc/internal/backoff/backoff.go
@@ -23,6 +23,8 @@
 package backoff
 
 import (
+	"context"
+	"errors"
 	"time"
 
 	grpcbackoff "google.golang.org/grpc/backoff"
@@ -71,3 +73,37 @@ func (bc Exponential) Backoff(retries int) time.Duration {
 	}
 	return time.Duration(backoff)
 }
+
+// ErrResetBackoff is the error to be returned by the function executed by RunF,
+// to instruct the latter to reset its backoff state.
+var ErrResetBackoff = errors.New("reset backoff state")
+
+// RunF provides a convenient way to run a function f repeatedly until the
+// context expires or f returns a non-nil error that is not ErrResetBackoff.
+// When f returns ErrResetBackoff, RunF continues to run f, but resets its
+// backoff state before doing so. backoff accepts an integer representing the
+// number of retries, and returns the amount of time to backoff.
+func RunF(ctx context.Context, f func() error, backoff func(int) time.Duration) {
+	attempt := 0
+	timer := time.NewTimer(0)
+	for ctx.Err() == nil {
+		select {
+		case <-timer.C:
+		case <-ctx.Done():
+			timer.Stop()
+			return
+		}
+
+		err := f()
+		if errors.Is(err, ErrResetBackoff) {
+			timer.Reset(0)
+			attempt = 0
+			continue
+		}
+		if err != nil {
+			return
+		}
+		timer.Reset(backoff(attempt))
+		attempt++
+	}
+}
diff --git a/vendor/google.golang.org/grpc/internal/balancer/gracefulswitch/gracefulswitch.go b/vendor/google.golang.org/grpc/internal/balancer/gracefulswitch/gracefulswitch.go
index 08666f62a7c..3c594e6e4e5 100644
--- a/vendor/google.golang.org/grpc/internal/balancer/gracefulswitch/gracefulswitch.go
+++ b/vendor/google.golang.org/grpc/internal/balancer/gracefulswitch/gracefulswitch.go
@@ -200,8 +200,8 @@ func (gsb *Balancer) ExitIdle() {
 	}
 }
 
-// UpdateSubConnState forwards the update to the appropriate child.
-func (gsb *Balancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
+// updateSubConnState forwards the update to the appropriate child.
+func (gsb *Balancer) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState, cb func(balancer.SubConnState)) {
 	gsb.currentMu.Lock()
 	defer gsb.currentMu.Unlock()
 	gsb.mu.Lock()
@@ -214,13 +214,26 @@ func (gsb *Balancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubC
 	} else if gsb.balancerPending != nil && gsb.balancerPending.subconns[sc] {
 		balToUpdate = gsb.balancerPending
 	}
-	gsb.mu.Unlock()
 	if balToUpdate == nil {
 		// SubConn belonged to a stale lb policy that has not yet fully closed,
 		// or the balancer was already closed.
+		gsb.mu.Unlock()
 		return
 	}
-	balToUpdate.UpdateSubConnState(sc, state)
+	if state.ConnectivityState == connectivity.Shutdown {
+		delete(balToUpdate.subconns, sc)
+	}
+	gsb.mu.Unlock()
+	if cb != nil {
+		cb(state)
+	} else {
+		balToUpdate.UpdateSubConnState(sc, state)
+	}
+}
+
+// UpdateSubConnState forwards the update to the appropriate child.
+func (gsb *Balancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
+	gsb.updateSubConnState(sc, state, nil)
 }
 
 // Close closes any active child balancers.
@@ -242,7 +255,7 @@ func (gsb *Balancer) Close() {
 //
 // It implements the balancer.ClientConn interface and is passed down in that
 // capacity to the wrapped balancer. It maintains a set of subConns created by
-// the wrapped balancer and calls from the latter to create/update/remove
+// the wrapped balancer and calls from the latter to create/update/shutdown
 // SubConns update this set before being forwarded to the parent ClientConn.
 // State updates from the wrapped balancer can result in invocation of the
 // graceful switch logic.
@@ -254,21 +267,10 @@ type balancerWrapper struct {
 	subconns  map[balancer.SubConn]bool // subconns created by this balancer
 }
 
-func (bw *balancerWrapper) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
-	if state.ConnectivityState == connectivity.Shutdown {
-		bw.gsb.mu.Lock()
-		delete(bw.subconns, sc)
-		bw.gsb.mu.Unlock()
-	}
-	// There is no need to protect this read with a mutex, as the write to the
-	// Balancer field happens in SwitchTo, which completes before this can be
-	// called.
-	bw.Balancer.UpdateSubConnState(sc, state)
-}
-
-// Close closes the underlying LB policy and removes the subconns it created. bw
-// must not be referenced via balancerCurrent or balancerPending in gsb when
-// called. gsb.mu must not be held.  Does not panic with a nil receiver.
+// Close closes the underlying LB policy and shuts down the subconns it
+// created. bw must not be referenced via balancerCurrent or balancerPending in
+// gsb when called. gsb.mu must not be held.  Does not panic with a nil
+// receiver.
 func (bw *balancerWrapper) Close() {
 	// before Close is called.
 	if bw == nil {
@@ -281,7 +283,7 @@ func (bw *balancerWrapper) Close() {
 	bw.Balancer.Close()
 	bw.gsb.mu.Lock()
 	for sc := range bw.subconns {
-		bw.gsb.cc.RemoveSubConn(sc)
+		sc.Shutdown()
 	}
 	bw.gsb.mu.Unlock()
 }
@@ -335,13 +337,16 @@ func (bw *balancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.Ne
 	}
 	bw.gsb.mu.Unlock()
 
+	var sc balancer.SubConn
+	oldListener := opts.StateListener
+	opts.StateListener = func(state balancer.SubConnState) { bw.gsb.updateSubConnState(sc, state, oldListener) }
 	sc, err := bw.gsb.cc.NewSubConn(addrs, opts)
 	if err != nil {
 		return nil, err
 	}
 	bw.gsb.mu.Lock()
 	if !bw.gsb.balancerCurrentOrPending(bw) { // balancer was closed during this call
-		bw.gsb.cc.RemoveSubConn(sc)
+		sc.Shutdown()
 		bw.gsb.mu.Unlock()
 		return nil, fmt.Errorf("%T at address %p that called NewSubConn is deleted", bw, bw)
 	}
@@ -360,13 +365,9 @@ func (bw *balancerWrapper) ResolveNow(opts resolver.ResolveNowOptions) {
 }
 
 func (bw *balancerWrapper) RemoveSubConn(sc balancer.SubConn) {
-	bw.gsb.mu.Lock()
-	if !bw.gsb.balancerCurrentOrPending(bw) {
-		bw.gsb.mu.Unlock()
-		return
-	}
-	bw.gsb.mu.Unlock()
-	bw.gsb.cc.RemoveSubConn(sc)
+	// Note: existing third party balancers may call this, so it must remain
+	// until RemoveSubConn is fully removed.
+	sc.Shutdown()
 }
 
 func (bw *balancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) {
diff --git a/vendor/google.golang.org/grpc/internal/balancerload/load.go b/vendor/google.golang.org/grpc/internal/balancerload/load.go
index 3a905d96657..94a08d6875a 100644
--- a/vendor/google.golang.org/grpc/internal/balancerload/load.go
+++ b/vendor/google.golang.org/grpc/internal/balancerload/load.go
@@ -25,7 +25,7 @@ import (
 // Parser converts loads from metadata into a concrete type.
 type Parser interface {
 	// Parse parses loads from metadata.
-	Parse(md metadata.MD) interface{}
+	Parse(md metadata.MD) any
 }
 
 var parser Parser
@@ -38,7 +38,7 @@ func SetParser(lr Parser) {
 }
 
 // Parse calls parser.Read().
-func Parse(md metadata.MD) interface{} {
+func Parse(md metadata.MD) any {
 	if parser == nil {
 		return nil
 	}
diff --git a/vendor/google.golang.org/grpc/internal/binarylog/method_logger.go b/vendor/google.golang.org/grpc/internal/binarylog/method_logger.go
index 6c3f632215f..0f31274a3cc 100644
--- a/vendor/google.golang.org/grpc/internal/binarylog/method_logger.go
+++ b/vendor/google.golang.org/grpc/internal/binarylog/method_logger.go
@@ -230,7 +230,7 @@ type ClientMessage struct {
 	OnClientSide bool
 	// Message can be a proto.Message or []byte. Other messages formats are not
 	// supported.
-	Message interface{}
+	Message any
 }
 
 func (c *ClientMessage) toProto() *binlogpb.GrpcLogEntry {
@@ -270,7 +270,7 @@ type ServerMessage struct {
 	OnClientSide bool
 	// Message can be a proto.Message or []byte. Other messages formats are not
 	// supported.
-	Message interface{}
+	Message any
 }
 
 func (c *ServerMessage) toProto() *binlogpb.GrpcLogEntry {
diff --git a/vendor/google.golang.org/grpc/internal/buffer/unbounded.go b/vendor/google.golang.org/grpc/internal/buffer/unbounded.go
index 81c2f5fd761..11f91668ac9 100644
--- a/vendor/google.golang.org/grpc/internal/buffer/unbounded.go
+++ b/vendor/google.golang.org/grpc/internal/buffer/unbounded.go
@@ -18,7 +18,10 @@
 // Package buffer provides an implementation of an unbounded buffer.
 package buffer
 
-import "sync"
+import (
+	"errors"
+	"sync"
+)
 
 // Unbounded is an implementation of an unbounded buffer which does not use
 // extra goroutines. This is typically used for passing updates from one entity
@@ -28,49 +31,50 @@ import "sync"
 // the underlying mutex used for synchronization.
 //
 // Unbounded supports values of any type to be stored in it by using a channel
-// of `interface{}`. This means that a call to Put() incurs an extra memory
-// allocation, and also that users need a type assertion while reading. For
-// performance critical code paths, using Unbounded is strongly discouraged and
-// defining a new type specific implementation of this buffer is preferred. See
+// of `any`. This means that a call to Put() incurs an extra memory allocation,
+// and also that users need a type assertion while reading. For performance
+// critical code paths, using Unbounded is strongly discouraged and defining a
+// new type specific implementation of this buffer is preferred. See
 // internal/transport/transport.go for an example of this.
 type Unbounded struct {
-	c       chan interface{}
+	c       chan any
 	closed  bool
+	closing bool
 	mu      sync.Mutex
-	backlog []interface{}
+	backlog []any
 }
 
 // NewUnbounded returns a new instance of Unbounded.
 func NewUnbounded() *Unbounded {
-	return &Unbounded{c: make(chan interface{}, 1)}
+	return &Unbounded{c: make(chan any, 1)}
 }
 
+var errBufferClosed = errors.New("Put called on closed buffer.Unbounded")
+
 // Put adds t to the unbounded buffer.
-func (b *Unbounded) Put(t interface{}) {
+func (b *Unbounded) Put(t any) error {
 	b.mu.Lock()
 	defer b.mu.Unlock()
-	if b.closed {
-		return
+	if b.closing {
+		return errBufferClosed
 	}
 	if len(b.backlog) == 0 {
 		select {
 		case b.c <- t:
-			return
+			return nil
 		default:
 		}
 	}
 	b.backlog = append(b.backlog, t)
+	return nil
 }
 
-// Load sends the earliest buffered data, if any, onto the read channel
-// returned by Get(). Users are expected to call this every time they read a
+// Load sends the earliest buffered data, if any, onto the read channel returned
+// by Get(). Users are expected to call this every time they successfully read a
 // value from the read channel.
 func (b *Unbounded) Load() {
 	b.mu.Lock()
 	defer b.mu.Unlock()
-	if b.closed {
-		return
-	}
 	if len(b.backlog) > 0 {
 		select {
 		case b.c <- b.backlog[0]:
@@ -78,6 +82,8 @@ func (b *Unbounded) Load() {
 			b.backlog = b.backlog[1:]
 		default:
 		}
+	} else if b.closing && !b.closed {
+		close(b.c)
 	}
 }
 
@@ -88,18 +94,23 @@ func (b *Unbounded) Load() {
 // send the next buffered value onto the channel if there is any.
 //
 // If the unbounded buffer is closed, the read channel returned by this method
-// is closed.
-func (b *Unbounded) Get() <-chan interface{} {
+// is closed after all data is drained.
+func (b *Unbounded) Get() <-chan any {
 	return b.c
 }
 
-// Close closes the unbounded buffer.
+// Close closes the unbounded buffer. No subsequent data may be Put(), and the
+// channel returned from Get() will be closed after all the data is read and
+// Load() is called for the final time.
 func (b *Unbounded) Close() {
 	b.mu.Lock()
 	defer b.mu.Unlock()
-	if b.closed {
+	if b.closing {
 		return
 	}
-	b.closed = true
-	close(b.c)
+	b.closing = true
+	if len(b.backlog) == 0 {
+		b.closed = true
+		close(b.c)
+	}
 }
diff --git a/vendor/google.golang.org/grpc/internal/channelz/funcs.go b/vendor/google.golang.org/grpc/internal/channelz/funcs.go
index 777cbcd7921..fc094f3441b 100644
--- a/vendor/google.golang.org/grpc/internal/channelz/funcs.go
+++ b/vendor/google.golang.org/grpc/internal/channelz/funcs.go
@@ -24,15 +24,14 @@
 package channelz
 
 import (
-	"context"
 	"errors"
-	"fmt"
 	"sort"
 	"sync"
 	"sync/atomic"
 	"time"
 
 	"google.golang.org/grpc/grpclog"
+	"google.golang.org/grpc/internal"
 )
 
 const (
@@ -40,8 +39,11 @@ const (
 )
 
 var (
-	db    dbWrapper
-	idGen idGenerator
+	// IDGen is the global channelz entity ID generator.  It should not be used
+	// outside this package except by tests.
+	IDGen IDGenerator
+
+	db dbWrapper
 	// EntryPerPage defines the number of channelz entries to be shown on a web page.
 	EntryPerPage  = int64(50)
 	curState      int32
@@ -52,14 +54,20 @@ var (
 func TurnOn() {
 	if !IsOn() {
 		db.set(newChannelMap())
-		idGen.reset()
+		IDGen.Reset()
 		atomic.StoreInt32(&curState, 1)
 	}
 }
 
+func init() {
+	internal.ChannelzTurnOffForTesting = func() {
+		atomic.StoreInt32(&curState, 0)
+	}
+}
+
 // IsOn returns whether channelz data collection is on.
 func IsOn() bool {
-	return atomic.CompareAndSwapInt32(&curState, 1, 1)
+	return atomic.LoadInt32(&curState) == 1
 }
 
 // SetMaxTraceEntry sets maximum number of trace entry per entity (i.e. channel/subchannel).
@@ -97,43 +105,6 @@ func (d *dbWrapper) get() *channelMap {
 	return d.DB
 }
 
-// NewChannelzStorageForTesting initializes channelz data storage and id
-// generator for testing purposes.
-//
-// Returns a cleanup function to be invoked by the test, which waits for up to
-// 10s for all channelz state to be reset by the grpc goroutines when those
-// entities get closed. This cleanup function helps with ensuring that tests
-// don't mess up each other.
-func NewChannelzStorageForTesting() (cleanup func() error) {
-	db.set(newChannelMap())
-	idGen.reset()
-
-	return func() error {
-		cm := db.get()
-		if cm == nil {
-			return nil
-		}
-
-		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-		defer cancel()
-		ticker := time.NewTicker(10 * time.Millisecond)
-		defer ticker.Stop()
-		for {
-			cm.mu.RLock()
-			topLevelChannels, servers, channels, subChannels, listenSockets, normalSockets := len(cm.topLevelChannels), len(cm.servers), len(cm.channels), len(cm.subChannels), len(cm.listenSockets), len(cm.normalSockets)
-			cm.mu.RUnlock()
-
-			if err := ctx.Err(); err != nil {
-				return fmt.Errorf("after 10s the channelz map has not been cleaned up yet, topchannels: %d, servers: %d, channels: %d, subchannels: %d, listen sockets: %d, normal sockets: %d", topLevelChannels, servers, channels, subChannels, listenSockets, normalSockets)
-			}
-			if topLevelChannels == 0 && servers == 0 && channels == 0 && subChannels == 0 && listenSockets == 0 && normalSockets == 0 {
-				return nil
-			}
-			<-ticker.C
-		}
-	}
-}
-
 // GetTopChannels returns a slice of top channel's ChannelMetric, along with a
 // boolean indicating whether there's more top channels to be queried for.
 //
@@ -193,7 +164,7 @@ func GetServer(id int64) *ServerMetric {
 //
 // If channelz is not turned ON, the channelz database is not mutated.
 func RegisterChannel(c Channel, pid *Identifier, ref string) *Identifier {
-	id := idGen.genID()
+	id := IDGen.genID()
 	var parent int64
 	isTopChannel := true
 	if pid != nil {
@@ -229,7 +200,7 @@ func RegisterSubChannel(c Channel, pid *Identifier, ref string) (*Identifier, er
 	if pid == nil {
 		return nil, errors.New("a SubChannel's parent id cannot be nil")
 	}
-	id := idGen.genID()
+	id := IDGen.genID()
 	if !IsOn() {
 		return newIdentifer(RefSubChannel, id, pid), nil
 	}
@@ -251,7 +222,7 @@ func RegisterSubChannel(c Channel, pid *Identifier, ref string) (*Identifier, er
 //
 // If channelz is not turned ON, the channelz database is not mutated.
 func RegisterServer(s Server, ref string) *Identifier {
-	id := idGen.genID()
+	id := IDGen.genID()
 	if !IsOn() {
 		return newIdentifer(RefServer, id, nil)
 	}
@@ -277,7 +248,7 @@ func RegisterListenSocket(s Socket, pid *Identifier, ref string) (*Identifier, e
 	if pid == nil {
 		return nil, errors.New("a ListenSocket's parent id cannot be 0")
 	}
-	id := idGen.genID()
+	id := IDGen.genID()
 	if !IsOn() {
 		return newIdentifer(RefListenSocket, id, pid), nil
 	}
@@ -297,7 +268,7 @@ func RegisterNormalSocket(s Socket, pid *Identifier, ref string) (*Identifier, e
 	if pid == nil {
 		return nil, errors.New("a NormalSocket's parent id cannot be 0")
 	}
-	id := idGen.genID()
+	id := IDGen.genID()
 	if !IsOn() {
 		return newIdentifer(RefNormalSocket, id, pid), nil
 	}
@@ -776,14 +747,17 @@ func (c *channelMap) GetServer(id int64) *ServerMetric {
 	return sm
 }
 
-type idGenerator struct {
+// IDGenerator is an incrementing atomic that tracks IDs for channelz entities.
+type IDGenerator struct {
 	id int64
 }
 
-func (i *idGenerator) reset() {
+// Reset resets the generated ID back to zero.  Should only be used at
+// initialization or by tests sensitive to the ID number.
+func (i *IDGenerator) Reset() {
 	atomic.StoreInt64(&i.id, 0)
 }
 
-func (i *idGenerator) genID() int64 {
+func (i *IDGenerator) genID() int64 {
 	return atomic.AddInt64(&i.id, 1)
 }
diff --git a/vendor/google.golang.org/grpc/internal/channelz/logging.go b/vendor/google.golang.org/grpc/internal/channelz/logging.go
index 8e13a3d2ce7..f89e6f77bbd 100644
--- a/vendor/google.golang.org/grpc/internal/channelz/logging.go
+++ b/vendor/google.golang.org/grpc/internal/channelz/logging.go
@@ -31,7 +31,7 @@ func withParens(id *Identifier) string {
 }
 
 // Info logs and adds a trace event if channelz is on.
-func Info(l grpclog.DepthLoggerV2, id *Identifier, args ...interface{}) {
+func Info(l grpclog.DepthLoggerV2, id *Identifier, args ...any) {
 	AddTraceEvent(l, id, 1, &TraceEventDesc{
 		Desc:     fmt.Sprint(args...),
 		Severity: CtInfo,
@@ -39,7 +39,7 @@ func Info(l grpclog.DepthLoggerV2, id *Identifier, args ...interface{}) {
 }
 
 // Infof logs and adds a trace event if channelz is on.
-func Infof(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...interface{}) {
+func Infof(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...any) {
 	AddTraceEvent(l, id, 1, &TraceEventDesc{
 		Desc:     fmt.Sprintf(format, args...),
 		Severity: CtInfo,
@@ -47,7 +47,7 @@ func Infof(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...inter
 }
 
 // Warning logs and adds a trace event if channelz is on.
-func Warning(l grpclog.DepthLoggerV2, id *Identifier, args ...interface{}) {
+func Warning(l grpclog.DepthLoggerV2, id *Identifier, args ...any) {
 	AddTraceEvent(l, id, 1, &TraceEventDesc{
 		Desc:     fmt.Sprint(args...),
 		Severity: CtWarning,
@@ -55,7 +55,7 @@ func Warning(l grpclog.DepthLoggerV2, id *Identifier, args ...interface{}) {
 }
 
 // Warningf logs and adds a trace event if channelz is on.
-func Warningf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...interface{}) {
+func Warningf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...any) {
 	AddTraceEvent(l, id, 1, &TraceEventDesc{
 		Desc:     fmt.Sprintf(format, args...),
 		Severity: CtWarning,
@@ -63,7 +63,7 @@ func Warningf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...in
 }
 
 // Error logs and adds a trace event if channelz is on.
-func Error(l grpclog.DepthLoggerV2, id *Identifier, args ...interface{}) {
+func Error(l grpclog.DepthLoggerV2, id *Identifier, args ...any) {
 	AddTraceEvent(l, id, 1, &TraceEventDesc{
 		Desc:     fmt.Sprint(args...),
 		Severity: CtError,
@@ -71,7 +71,7 @@ func Error(l grpclog.DepthLoggerV2, id *Identifier, args ...interface{}) {
 }
 
 // Errorf logs and adds a trace event if channelz is on.
-func Errorf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...interface{}) {
+func Errorf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...any) {
 	AddTraceEvent(l, id, 1, &TraceEventDesc{
 		Desc:     fmt.Sprintf(format, args...),
 		Severity: CtError,
diff --git a/vendor/google.golang.org/grpc/internal/channelz/types.go b/vendor/google.golang.org/grpc/internal/channelz/types.go
index 7b2f350e2e6..1d4020f5379 100644
--- a/vendor/google.golang.org/grpc/internal/channelz/types.go
+++ b/vendor/google.golang.org/grpc/internal/channelz/types.go
@@ -628,6 +628,7 @@ type tracedChannel interface {
 
 type channelTrace struct {
 	cm          *channelMap
+	clearCalled bool
 	createdTime time.Time
 	eventCount  int64
 	mu          sync.Mutex
@@ -656,6 +657,10 @@ func (c *channelTrace) append(e *TraceEvent) {
 }
 
 func (c *channelTrace) clear() {
+	if c.clearCalled {
+		return
+	}
+	c.clearCalled = true
 	c.mu.Lock()
 	for _, e := range c.events {
 		if e.RefID != 0 {
diff --git a/vendor/google.golang.org/grpc/internal/channelz/util_linux.go b/vendor/google.golang.org/grpc/internal/channelz/util_linux.go
index 8d194e44e1d..98288c3f866 100644
--- a/vendor/google.golang.org/grpc/internal/channelz/util_linux.go
+++ b/vendor/google.golang.org/grpc/internal/channelz/util_linux.go
@@ -23,7 +23,7 @@ import (
 )
 
 // GetSocketOption gets the socket option info of the conn.
-func GetSocketOption(socket interface{}) *SocketOptionData {
+func GetSocketOption(socket any) *SocketOptionData {
 	c, ok := socket.(syscall.Conn)
 	if !ok {
 		return nil
diff --git a/vendor/google.golang.org/grpc/internal/channelz/util_nonlinux.go b/vendor/google.golang.org/grpc/internal/channelz/util_nonlinux.go
index 837ddc40240..b5568b22e20 100644
--- a/vendor/google.golang.org/grpc/internal/channelz/util_nonlinux.go
+++ b/vendor/google.golang.org/grpc/internal/channelz/util_nonlinux.go
@@ -22,6 +22,6 @@
 package channelz
 
 // GetSocketOption gets the socket option info of the conn.
-func GetSocketOption(c interface{}) *SocketOptionData {
+func GetSocketOption(c any) *SocketOptionData {
 	return nil
 }
diff --git a/vendor/google.golang.org/grpc/internal/credentials/credentials.go b/vendor/google.golang.org/grpc/internal/credentials/credentials.go
index 32c9b59033c..9deee7f6513 100644
--- a/vendor/google.golang.org/grpc/internal/credentials/credentials.go
+++ b/vendor/google.golang.org/grpc/internal/credentials/credentials.go
@@ -25,12 +25,12 @@ import (
 type requestInfoKey struct{}
 
 // NewRequestInfoContext creates a context with ri.
-func NewRequestInfoContext(ctx context.Context, ri interface{}) context.Context {
+func NewRequestInfoContext(ctx context.Context, ri any) context.Context {
 	return context.WithValue(ctx, requestInfoKey{}, ri)
 }
 
 // RequestInfoFromContext extracts the RequestInfo from ctx.
-func RequestInfoFromContext(ctx context.Context) interface{} {
+func RequestInfoFromContext(ctx context.Context) any {
 	return ctx.Value(requestInfoKey{})
 }
 
@@ -39,11 +39,11 @@ func RequestInfoFromContext(ctx context.Context) interface{} {
 type clientHandshakeInfoKey struct{}
 
 // ClientHandshakeInfoFromContext extracts the ClientHandshakeInfo from ctx.
-func ClientHandshakeInfoFromContext(ctx context.Context) interface{} {
+func ClientHandshakeInfoFromContext(ctx context.Context) any {
 	return ctx.Value(clientHandshakeInfoKey{})
 }
 
 // NewClientHandshakeInfoContext creates a context with chi.
-func NewClientHandshakeInfoContext(ctx context.Context, chi interface{}) context.Context {
+func NewClientHandshakeInfoContext(ctx context.Context, chi any) context.Context {
 	return context.WithValue(ctx, clientHandshakeInfoKey{}, chi)
 }
diff --git a/vendor/google.golang.org/grpc/internal/envconfig/envconfig.go b/vendor/google.golang.org/grpc/internal/envconfig/envconfig.go
index 80fd5c7d2a4..685a3cb41b1 100644
--- a/vendor/google.golang.org/grpc/internal/envconfig/envconfig.go
+++ b/vendor/google.golang.org/grpc/internal/envconfig/envconfig.go
@@ -36,10 +36,13 @@ var (
 	// "GRPC_RING_HASH_CAP".  This does not override the default bounds
 	// checking which NACKs configs specifying ring sizes > 8*1024*1024 (~8M).
 	RingHashCap = uint64FromEnv("GRPC_RING_HASH_CAP", 4096, 1, 8*1024*1024)
-	// PickFirstLBConfig is set if we should support configuration of the
-	// pick_first LB policy, which can be enabled by setting the environment
-	// variable "GRPC_EXPERIMENTAL_PICKFIRST_LB_CONFIG" to "true".
-	PickFirstLBConfig = boolFromEnv("GRPC_EXPERIMENTAL_PICKFIRST_LB_CONFIG", false)
+	// LeastRequestLB is set if we should support the least_request_experimental
+	// LB policy, which can be enabled by setting the environment variable
+	// "GRPC_EXPERIMENTAL_ENABLE_LEAST_REQUEST" to "true".
+	LeastRequestLB = boolFromEnv("GRPC_EXPERIMENTAL_ENABLE_LEAST_REQUEST", false)
+	// ALTSMaxConcurrentHandshakes is the maximum number of concurrent ALTS
+	// handshakes that can be performed.
+	ALTSMaxConcurrentHandshakes = uint64FromEnv("GRPC_ALTS_MAX_CONCURRENT_HANDSHAKES", 100, 1, 100)
 )
 
 func boolFromEnv(envVar string, def bool) bool {
diff --git a/vendor/google.golang.org/grpc/internal/envconfig/xds.go b/vendor/google.golang.org/grpc/internal/envconfig/xds.go
index 02b4b6a1c10..29f234acb1b 100644
--- a/vendor/google.golang.org/grpc/internal/envconfig/xds.go
+++ b/vendor/google.golang.org/grpc/internal/envconfig/xds.go
@@ -50,46 +50,7 @@ var (
 	//
 	// When both bootstrap FileName and FileContent are set, FileName is used.
 	XDSBootstrapFileContent = os.Getenv(XDSBootstrapFileContentEnv)
-	// XDSRingHash indicates whether ring hash support is enabled, which can be
-	// disabled by setting the environment variable
-	// "GRPC_XDS_EXPERIMENTAL_ENABLE_RING_HASH" to "false".
-	XDSRingHash = boolFromEnv("GRPC_XDS_EXPERIMENTAL_ENABLE_RING_HASH", true)
-	// XDSClientSideSecurity is used to control processing of security
-	// configuration on the client-side.
-	//
-	// Note that there is no env var protection for the server-side because we
-	// have a brand new API on the server-side and users explicitly need to use
-	// the new API to get security integration on the server.
-	XDSClientSideSecurity = boolFromEnv("GRPC_XDS_EXPERIMENTAL_SECURITY_SUPPORT", true)
-	// XDSAggregateAndDNS indicates whether processing of aggregated cluster and
-	// DNS cluster is enabled, which can be disabled by setting the environment
-	// variable "GRPC_XDS_EXPERIMENTAL_ENABLE_AGGREGATE_AND_LOGICAL_DNS_CLUSTER"
-	// to "false".
-	XDSAggregateAndDNS = boolFromEnv("GRPC_XDS_EXPERIMENTAL_ENABLE_AGGREGATE_AND_LOGICAL_DNS_CLUSTER", true)
-
-	// XDSRBAC indicates whether xDS configured RBAC HTTP Filter is enabled,
-	// which can be disabled by setting the environment variable
-	// "GRPC_XDS_EXPERIMENTAL_RBAC" to "false".
-	XDSRBAC = boolFromEnv("GRPC_XDS_EXPERIMENTAL_RBAC", true)
-	// XDSOutlierDetection indicates whether outlier detection support is
-	// enabled, which can be disabled by setting the environment variable
-	// "GRPC_EXPERIMENTAL_ENABLE_OUTLIER_DETECTION" to "false".
-	XDSOutlierDetection = boolFromEnv("GRPC_EXPERIMENTAL_ENABLE_OUTLIER_DETECTION", true)
-	// XDSFederation indicates whether federation support is enabled, which can
-	// be enabled by setting the environment variable
-	// "GRPC_EXPERIMENTAL_XDS_FEDERATION" to "true".
-	XDSFederation = boolFromEnv("GRPC_EXPERIMENTAL_XDS_FEDERATION", true)
-
-	// XDSRLS indicates whether processing of Cluster Specifier plugins and
-	// support for the RLS CLuster Specifier is enabled, which can be disabled by
-	// setting the environment variable "GRPC_EXPERIMENTAL_XDS_RLS_LB" to
-	// "false".
-	XDSRLS = boolFromEnv("GRPC_EXPERIMENTAL_XDS_RLS_LB", true)
 
 	// C2PResolverTestOnlyTrafficDirectorURI is the TD URI for testing.
 	C2PResolverTestOnlyTrafficDirectorURI = os.Getenv("GRPC_TEST_ONLY_GOOGLE_C2P_RESOLVER_TRAFFIC_DIRECTOR_URI")
-	// XDSCustomLBPolicy indicates whether Custom LB Policies are enabled, which
-	// can be disabled by setting the environment variable
-	// "GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG" to "false".
-	XDSCustomLBPolicy = boolFromEnv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG", true)
 )
diff --git a/vendor/google.golang.org/grpc/internal/experimental.go b/vendor/google.golang.org/grpc/internal/experimental.go
new file mode 100644
index 00000000000..7f7044e1731
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/experimental.go
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package internal
+
+var (
+	// WithRecvBufferPool is implemented by the grpc package and returns a dial
+	// option to configure a shared buffer pool for a grpc.ClientConn.
+	WithRecvBufferPool any // func (grpc.SharedBufferPool) grpc.DialOption
+
+	// RecvBufferPool is implemented by the grpc package and returns a server
+	// option to configure a shared buffer pool for a grpc.Server.
+	RecvBufferPool any // func (grpc.SharedBufferPool) grpc.ServerOption
+)
diff --git a/vendor/google.golang.org/grpc/internal/grpclog/grpclog.go b/vendor/google.golang.org/grpc/internal/grpclog/grpclog.go
index b68e26a3649..bfc45102ab2 100644
--- a/vendor/google.golang.org/grpc/internal/grpclog/grpclog.go
+++ b/vendor/google.golang.org/grpc/internal/grpclog/grpclog.go
@@ -30,7 +30,7 @@ var Logger LoggerV2
 var DepthLogger DepthLoggerV2
 
 // InfoDepth logs to the INFO log at the specified depth.
-func InfoDepth(depth int, args ...interface{}) {
+func InfoDepth(depth int, args ...any) {
 	if DepthLogger != nil {
 		DepthLogger.InfoDepth(depth, args...)
 	} else {
@@ -39,7 +39,7 @@ func InfoDepth(depth int, args ...interface{}) {
 }
 
 // WarningDepth logs to the WARNING log at the specified depth.
-func WarningDepth(depth int, args ...interface{}) {
+func WarningDepth(depth int, args ...any) {
 	if DepthLogger != nil {
 		DepthLogger.WarningDepth(depth, args...)
 	} else {
@@ -48,7 +48,7 @@ func WarningDepth(depth int, args ...interface{}) {
 }
 
 // ErrorDepth logs to the ERROR log at the specified depth.
-func ErrorDepth(depth int, args ...interface{}) {
+func ErrorDepth(depth int, args ...any) {
 	if DepthLogger != nil {
 		DepthLogger.ErrorDepth(depth, args...)
 	} else {
@@ -57,7 +57,7 @@ func ErrorDepth(depth int, args ...interface{}) {
 }
 
 // FatalDepth logs to the FATAL log at the specified depth.
-func FatalDepth(depth int, args ...interface{}) {
+func FatalDepth(depth int, args ...any) {
 	if DepthLogger != nil {
 		DepthLogger.FatalDepth(depth, args...)
 	} else {
@@ -71,35 +71,35 @@ func FatalDepth(depth int, args ...interface{}) {
 // is defined here to avoid a circular dependency.
 type LoggerV2 interface {
 	// Info logs to INFO log. Arguments are handled in the manner of fmt.Print.
-	Info(args ...interface{})
+	Info(args ...any)
 	// Infoln logs to INFO log. Arguments are handled in the manner of fmt.Println.
-	Infoln(args ...interface{})
+	Infoln(args ...any)
 	// Infof logs to INFO log. Arguments are handled in the manner of fmt.Printf.
-	Infof(format string, args ...interface{})
+	Infof(format string, args ...any)
 	// Warning logs to WARNING log. Arguments are handled in the manner of fmt.Print.
-	Warning(args ...interface{})
+	Warning(args ...any)
 	// Warningln logs to WARNING log. Arguments are handled in the manner of fmt.Println.
-	Warningln(args ...interface{})
+	Warningln(args ...any)
 	// Warningf logs to WARNING log. Arguments are handled in the manner of fmt.Printf.
-	Warningf(format string, args ...interface{})
+	Warningf(format string, args ...any)
 	// Error logs to ERROR log. Arguments are handled in the manner of fmt.Print.
-	Error(args ...interface{})
+	Error(args ...any)
 	// Errorln logs to ERROR log. Arguments are handled in the manner of fmt.Println.
-	Errorln(args ...interface{})
+	Errorln(args ...any)
 	// Errorf logs to ERROR log. Arguments are handled in the manner of fmt.Printf.
-	Errorf(format string, args ...interface{})
+	Errorf(format string, args ...any)
 	// Fatal logs to ERROR log. Arguments are handled in the manner of fmt.Print.
 	// gRPC ensures that all Fatal logs will exit with os.Exit(1).
 	// Implementations may also call os.Exit() with a non-zero exit code.
-	Fatal(args ...interface{})
+	Fatal(args ...any)
 	// Fatalln logs to ERROR log. Arguments are handled in the manner of fmt.Println.
 	// gRPC ensures that all Fatal logs will exit with os.Exit(1).
 	// Implementations may also call os.Exit() with a non-zero exit code.
-	Fatalln(args ...interface{})
+	Fatalln(args ...any)
 	// Fatalf logs to ERROR log. Arguments are handled in the manner of fmt.Printf.
 	// gRPC ensures that all Fatal logs will exit with os.Exit(1).
 	// Implementations may also call os.Exit() with a non-zero exit code.
-	Fatalf(format string, args ...interface{})
+	Fatalf(format string, args ...any)
 	// V reports whether verbosity level l is at least the requested verbose level.
 	V(l int) bool
 }
@@ -116,11 +116,11 @@ type LoggerV2 interface {
 // later release.
 type DepthLoggerV2 interface {
 	// InfoDepth logs to INFO log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	InfoDepth(depth int, args ...interface{})
+	InfoDepth(depth int, args ...any)
 	// WarningDepth logs to WARNING log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	WarningDepth(depth int, args ...interface{})
+	WarningDepth(depth int, args ...any)
 	// ErrorDepth logs to ERROR log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	ErrorDepth(depth int, args ...interface{})
+	ErrorDepth(depth int, args ...any)
 	// FatalDepth logs to FATAL log at the specified depth. Arguments are handled in the manner of fmt.Println.
-	FatalDepth(depth int, args ...interface{})
+	FatalDepth(depth int, args ...any)
 }
diff --git a/vendor/google.golang.org/grpc/internal/grpclog/prefixLogger.go b/vendor/google.golang.org/grpc/internal/grpclog/prefixLogger.go
index 02224b42ca8..faa998de763 100644
--- a/vendor/google.golang.org/grpc/internal/grpclog/prefixLogger.go
+++ b/vendor/google.golang.org/grpc/internal/grpclog/prefixLogger.go
@@ -31,7 +31,7 @@ type PrefixLogger struct {
 }
 
 // Infof does info logging.
-func (pl *PrefixLogger) Infof(format string, args ...interface{}) {
+func (pl *PrefixLogger) Infof(format string, args ...any) {
 	if pl != nil {
 		// Handle nil, so the tests can pass in a nil logger.
 		format = pl.prefix + format
@@ -42,7 +42,7 @@ func (pl *PrefixLogger) Infof(format string, args ...interface{}) {
 }
 
 // Warningf does warning logging.
-func (pl *PrefixLogger) Warningf(format string, args ...interface{}) {
+func (pl *PrefixLogger) Warningf(format string, args ...any) {
 	if pl != nil {
 		format = pl.prefix + format
 		pl.logger.WarningDepth(1, fmt.Sprintf(format, args...))
@@ -52,7 +52,7 @@ func (pl *PrefixLogger) Warningf(format string, args ...interface{}) {
 }
 
 // Errorf does error logging.
-func (pl *PrefixLogger) Errorf(format string, args ...interface{}) {
+func (pl *PrefixLogger) Errorf(format string, args ...any) {
 	if pl != nil {
 		format = pl.prefix + format
 		pl.logger.ErrorDepth(1, fmt.Sprintf(format, args...))
@@ -62,7 +62,7 @@ func (pl *PrefixLogger) Errorf(format string, args ...interface{}) {
 }
 
 // Debugf does info logging at verbose level 2.
-func (pl *PrefixLogger) Debugf(format string, args ...interface{}) {
+func (pl *PrefixLogger) Debugf(format string, args ...any) {
 	// TODO(6044): Refactor interfaces LoggerV2 and DepthLogger, and maybe
 	// rewrite PrefixLogger a little to ensure that we don't use the global
 	// `Logger` here, and instead use the `logger` field.
diff --git a/vendor/google.golang.org/grpc/internal/grpcrand/grpcrand.go b/vendor/google.golang.org/grpc/internal/grpcrand/grpcrand.go
index d08e3e90766..aa97273e7d1 100644
--- a/vendor/google.golang.org/grpc/internal/grpcrand/grpcrand.go
+++ b/vendor/google.golang.org/grpc/internal/grpcrand/grpcrand.go
@@ -80,6 +80,13 @@ func Uint32() uint32 {
 	return r.Uint32()
 }
 
+// ExpFloat64 implements rand.ExpFloat64 on the grpcrand global source.
+func ExpFloat64() float64 {
+	mu.Lock()
+	defer mu.Unlock()
+	return r.ExpFloat64()
+}
+
 // Shuffle implements rand.Shuffle on the grpcrand global source.
 var Shuffle = func(n int, f func(int, int)) {
 	mu.Lock()
diff --git a/vendor/google.golang.org/grpc/internal/grpcsync/callback_serializer.go b/vendor/google.golang.org/grpc/internal/grpcsync/callback_serializer.go
index 37b8d4117e7..f7f40a16ace 100644
--- a/vendor/google.golang.org/grpc/internal/grpcsync/callback_serializer.go
+++ b/vendor/google.golang.org/grpc/internal/grpcsync/callback_serializer.go
@@ -20,7 +20,6 @@ package grpcsync
 
 import (
 	"context"
-	"sync"
 
 	"google.golang.org/grpc/internal/buffer"
 )
@@ -32,14 +31,12 @@ import (
 //
 // This type is safe for concurrent access.
 type CallbackSerializer struct {
-	// Done is closed once the serializer is shut down completely, i.e all
+	// done is closed once the serializer is shut down completely, i.e all
 	// scheduled callbacks are executed and the serializer has deallocated all
 	// its resources.
-	Done chan struct{}
+	done chan struct{}
 
 	callbacks *buffer.Unbounded
-	closedMu  sync.Mutex
-	closed    bool
 }
 
 // NewCallbackSerializer returns a new CallbackSerializer instance. The provided
@@ -48,12 +45,12 @@ type CallbackSerializer struct {
 // callbacks will be added once this context is canceled, and any pending un-run
 // callbacks will be executed before the serializer is shut down.
 func NewCallbackSerializer(ctx context.Context) *CallbackSerializer {
-	t := &CallbackSerializer{
-		Done:      make(chan struct{}),
+	cs := &CallbackSerializer{
+		done:      make(chan struct{}),
 		callbacks: buffer.NewUnbounded(),
 	}
-	go t.run(ctx)
-	return t
+	go cs.run(ctx)
+	return cs
 }
 
 // Schedule adds a callback to be scheduled after existing callbacks are run.
@@ -64,56 +61,40 @@ func NewCallbackSerializer(ctx context.Context) *CallbackSerializer {
 // Return value indicates if the callback was successfully added to the list of
 // callbacks to be executed by the serializer. It is not possible to add
 // callbacks once the context passed to NewCallbackSerializer is cancelled.
-func (t *CallbackSerializer) Schedule(f func(ctx context.Context)) bool {
-	t.closedMu.Lock()
-	defer t.closedMu.Unlock()
-
-	if t.closed {
-		return false
-	}
-	t.callbacks.Put(f)
-	return true
+func (cs *CallbackSerializer) Schedule(f func(ctx context.Context)) bool {
+	return cs.callbacks.Put(f) == nil
 }
 
-func (t *CallbackSerializer) run(ctx context.Context) {
-	var backlog []func(context.Context)
+func (cs *CallbackSerializer) run(ctx context.Context) {
+	defer close(cs.done)
 
-	defer close(t.Done)
+	// TODO: when Go 1.21 is the oldest supported version, this loop and Close
+	// can be replaced with:
+	//
+	// context.AfterFunc(ctx, cs.callbacks.Close)
 	for ctx.Err() == nil {
 		select {
 		case <-ctx.Done():
 			// Do nothing here. Next iteration of the for loop will not happen,
 			// since ctx.Err() would be non-nil.
-		case callback, ok := <-t.callbacks.Get():
-			if !ok {
-				return
-			}
-			t.callbacks.Load()
-			callback.(func(ctx context.Context))(ctx)
+		case cb := <-cs.callbacks.Get():
+			cs.callbacks.Load()
+			cb.(func(context.Context))(ctx)
 		}
 	}
 
-	// Fetch pending callbacks if any, and execute them before returning from
-	// this method and closing t.Done.
-	t.closedMu.Lock()
-	t.closed = true
-	backlog = t.fetchPendingCallbacks()
-	t.callbacks.Close()
-	t.closedMu.Unlock()
-	for _, b := range backlog {
-		b(ctx)
+	// Close the buffer to prevent new callbacks from being added.
+	cs.callbacks.Close()
+
+	// Run all pending callbacks.
+	for cb := range cs.callbacks.Get() {
+		cs.callbacks.Load()
+		cb.(func(context.Context))(ctx)
 	}
 }
 
-func (t *CallbackSerializer) fetchPendingCallbacks() []func(context.Context) {
-	var backlog []func(context.Context)
-	for {
-		select {
-		case b := <-t.callbacks.Get():
-			backlog = append(backlog, b.(func(context.Context)))
-			t.callbacks.Load()
-		default:
-			return backlog
-		}
-	}
+// Done returns a channel that is closed after the context passed to
+// NewCallbackSerializer is canceled and all callbacks have been executed.
+func (cs *CallbackSerializer) Done() <-chan struct{} {
+	return cs.done
 }
diff --git a/vendor/google.golang.org/grpc/internal/grpcsync/pubsub.go b/vendor/google.golang.org/grpc/internal/grpcsync/pubsub.go
new file mode 100644
index 00000000000..aef8cec1ab0
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/grpcsync/pubsub.go
@@ -0,0 +1,121 @@
+/*
+ *
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package grpcsync
+
+import (
+	"context"
+	"sync"
+)
+
+// Subscriber represents an entity that is subscribed to messages published on
+// a PubSub. It wraps the callback to be invoked by the PubSub when a new
+// message is published.
+type Subscriber interface {
+	// OnMessage is invoked when a new message is published. Implementations
+	// must not block in this method.
+	OnMessage(msg any)
+}
+
+// PubSub is a simple one-to-many publish-subscribe system that supports
+// messages of arbitrary type. It guarantees that messages are delivered in
+// the same order in which they were published.
+//
+// Publisher invokes the Publish() method to publish new messages, while
+// subscribers interested in receiving these messages register a callback
+// via the Subscribe() method.
+//
+// Once a PubSub is stopped, no more messages can be published, but any pending
+// published messages will be delivered to the subscribers.  Done may be used
+// to determine when all published messages have been delivered.
+type PubSub struct {
+	cs *CallbackSerializer
+
+	// Access to the below fields are guarded by this mutex.
+	mu          sync.Mutex
+	msg         any
+	subscribers map[Subscriber]bool
+}
+
+// NewPubSub returns a new PubSub instance.  Users should cancel the
+// provided context to shutdown the PubSub.
+func NewPubSub(ctx context.Context) *PubSub {
+	return &PubSub{
+		cs:          NewCallbackSerializer(ctx),
+		subscribers: map[Subscriber]bool{},
+	}
+}
+
+// Subscribe registers the provided Subscriber to the PubSub.
+//
+// If the PubSub contains a previously published message, the Subscriber's
+// OnMessage() callback will be invoked asynchronously with the existing
+// message to begin with, and subsequently for every newly published message.
+//
+// The caller is responsible for invoking the returned cancel function to
+// unsubscribe itself from the PubSub.
+func (ps *PubSub) Subscribe(sub Subscriber) (cancel func()) {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+
+	ps.subscribers[sub] = true
+
+	if ps.msg != nil {
+		msg := ps.msg
+		ps.cs.Schedule(func(context.Context) {
+			ps.mu.Lock()
+			defer ps.mu.Unlock()
+			if !ps.subscribers[sub] {
+				return
+			}
+			sub.OnMessage(msg)
+		})
+	}
+
+	return func() {
+		ps.mu.Lock()
+		defer ps.mu.Unlock()
+		delete(ps.subscribers, sub)
+	}
+}
+
+// Publish publishes the provided message to the PubSub, and invokes
+// callbacks registered by subscribers asynchronously.
+func (ps *PubSub) Publish(msg any) {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+
+	ps.msg = msg
+	for sub := range ps.subscribers {
+		s := sub
+		ps.cs.Schedule(func(context.Context) {
+			ps.mu.Lock()
+			defer ps.mu.Unlock()
+			if !ps.subscribers[s] {
+				return
+			}
+			s.OnMessage(msg)
+		})
+	}
+}
+
+// Done returns a channel that is closed after the context passed to NewPubSub
+// is canceled and all updates have been sent to subscribers.
+func (ps *PubSub) Done() <-chan struct{} {
+	return ps.cs.Done()
+}
diff --git a/vendor/google.golang.org/grpc/internal/idle/idle.go b/vendor/google.golang.org/grpc/internal/idle/idle.go
new file mode 100644
index 00000000000..fe49cb74c55
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/idle/idle.go
@@ -0,0 +1,278 @@
+/*
+ *
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+// Package idle contains a component for managing idleness (entering and exiting)
+// based on RPC activity.
+package idle
+
+import (
+	"fmt"
+	"math"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+// For overriding in unit tests.
+var timeAfterFunc = func(d time.Duration, f func()) *time.Timer {
+	return time.AfterFunc(d, f)
+}
+
+// Enforcer is the functionality provided by grpc.ClientConn to enter
+// and exit from idle mode.
+type Enforcer interface {
+	ExitIdleMode() error
+	EnterIdleMode()
+}
+
+// Manager implements idleness detection and calls the configured Enforcer to
+// enter/exit idle mode when appropriate.  Must be created by NewManager.
+type Manager struct {
+	// State accessed atomically.
+	lastCallEndTime           int64 // Unix timestamp in nanos; time when the most recent RPC completed.
+	activeCallsCount          int32 // Count of active RPCs; -math.MaxInt32 means channel is idle or is trying to get there.
+	activeSinceLastTimerCheck int32 // Boolean; True if there was an RPC since the last timer callback.
+	closed                    int32 // Boolean; True when the manager is closed.
+
+	// Can be accessed without atomics or mutex since these are set at creation
+	// time and read-only after that.
+	enforcer Enforcer // Functionality provided by grpc.ClientConn.
+	timeout  time.Duration
+
+	// idleMu is used to guarantee mutual exclusion in two scenarios:
+	// - Opposing intentions:
+	//   - a: Idle timeout has fired and handleIdleTimeout() is trying to put
+	//     the channel in idle mode because the channel has been inactive.
+	//   - b: At the same time an RPC is made on the channel, and OnCallBegin()
+	//     is trying to prevent the channel from going idle.
+	// - Competing intentions:
+	//   - The channel is in idle mode and there are multiple RPCs starting at
+	//     the same time, all trying to move the channel out of idle. Only one
+	//     of them should succeed in doing so, while the other RPCs should
+	//     piggyback on the first one and be successfully handled.
+	idleMu       sync.RWMutex
+	actuallyIdle bool
+	timer        *time.Timer
+}
+
+// NewManager creates a new idleness manager implementation for the
+// given idle timeout.  It begins in idle mode.
+func NewManager(enforcer Enforcer, timeout time.Duration) *Manager {
+	return &Manager{
+		enforcer:         enforcer,
+		timeout:          timeout,
+		actuallyIdle:     true,
+		activeCallsCount: -math.MaxInt32,
+	}
+}
+
+// resetIdleTimerLocked resets the idle timer to the given duration.  Called
+// when exiting idle mode or when the timer fires and we need to reset it.
+func (m *Manager) resetIdleTimerLocked(d time.Duration) {
+	if m.isClosed() || m.timeout == 0 || m.actuallyIdle {
+		return
+	}
+
+	// It is safe to ignore the return value from Reset() because this method is
+	// only ever called from the timer callback or when exiting idle mode.
+	if m.timer != nil {
+		m.timer.Stop()
+	}
+	m.timer = timeAfterFunc(d, m.handleIdleTimeout)
+}
+
+func (m *Manager) resetIdleTimer(d time.Duration) {
+	m.idleMu.Lock()
+	defer m.idleMu.Unlock()
+	m.resetIdleTimerLocked(d)
+}
+
+// handleIdleTimeout is the timer callback that is invoked upon expiry of the
+// configured idle timeout. The channel is considered inactive if there are no
+// ongoing calls and no RPC activity since the last time the timer fired.
+func (m *Manager) handleIdleTimeout() {
+	if m.isClosed() {
+		return
+	}
+
+	if atomic.LoadInt32(&m.activeCallsCount) > 0 {
+		m.resetIdleTimer(m.timeout)
+		return
+	}
+
+	// There has been activity on the channel since we last got here. Reset the
+	// timer and return.
+	if atomic.LoadInt32(&m.activeSinceLastTimerCheck) == 1 {
+		// Set the timer to fire after a duration of idle timeout, calculated
+		// from the time the most recent RPC completed.
+		atomic.StoreInt32(&m.activeSinceLastTimerCheck, 0)
+		m.resetIdleTimer(time.Duration(atomic.LoadInt64(&m.lastCallEndTime)-time.Now().UnixNano()) + m.timeout)
+		return
+	}
+
+	// Now that we've checked that there has been no activity, attempt to enter
+	// idle mode, which is very likely to succeed.
+	if m.tryEnterIdleMode() {
+		// Successfully entered idle mode. No timer needed until we exit idle.
+		return
+	}
+
+	// Failed to enter idle mode due to a concurrent RPC that kept the channel
+	// active, or because of an error from the channel. Undo the attempt to
+	// enter idle, and reset the timer to try again later.
+	m.resetIdleTimer(m.timeout)
+}
+
+// tryEnterIdleMode instructs the channel to enter idle mode. But before
+// that, it performs a last minute check to ensure that no new RPC has come in,
+// making the channel active.
+//
+// Return value indicates whether or not the channel moved to idle mode.
+//
+// Holds idleMu which ensures mutual exclusion with exitIdleMode.
+func (m *Manager) tryEnterIdleMode() bool {
+	// Setting the activeCallsCount to -math.MaxInt32 indicates to OnCallBegin()
+	// that the channel is either in idle mode or is trying to get there.
+	if !atomic.CompareAndSwapInt32(&m.activeCallsCount, 0, -math.MaxInt32) {
+		// This CAS operation can fail if an RPC started after we checked for
+		// activity in the timer handler, or one was ongoing from before the
+		// last time the timer fired, or if a test is attempting to enter idle
+		// mode without checking.  In all cases, abort going into idle mode.
+		return false
+	}
+	// N.B. if we fail to enter idle mode after this, we must re-add
+	// math.MaxInt32 to m.activeCallsCount.
+
+	m.idleMu.Lock()
+	defer m.idleMu.Unlock()
+
+	if atomic.LoadInt32(&m.activeCallsCount) != -math.MaxInt32 {
+		// We raced and lost to a new RPC. Very rare, but stop entering idle.
+		atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
+		return false
+	}
+	if atomic.LoadInt32(&m.activeSinceLastTimerCheck) == 1 {
+		// A very short RPC could have come in (and also finished) after we
+		// checked for calls count and activity in handleIdleTimeout(), but
+		// before the CAS operation. So, we need to check for activity again.
+		atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
+		return false
+	}
+
+	// No new RPCs have come in since we set the active calls count value to
+	// -math.MaxInt32. And since we have the lock, it is safe to enter idle mode
+	// unconditionally now.
+	m.enforcer.EnterIdleMode()
+	m.actuallyIdle = true
+	return true
+}
+
+func (m *Manager) EnterIdleModeForTesting() {
+	m.tryEnterIdleMode()
+}
+
+// OnCallBegin is invoked at the start of every RPC.
+func (m *Manager) OnCallBegin() error {
+	if m.isClosed() {
+		return nil
+	}
+
+	if atomic.AddInt32(&m.activeCallsCount, 1) > 0 {
+		// Channel is not idle now. Set the activity bit and allow the call.
+		atomic.StoreInt32(&m.activeSinceLastTimerCheck, 1)
+		return nil
+	}
+
+	// Channel is either in idle mode or is in the process of moving to idle
+	// mode. Attempt to exit idle mode to allow this RPC.
+	if err := m.ExitIdleMode(); err != nil {
+		// Undo the increment to calls count, and return an error causing the
+		// RPC to fail.
+		atomic.AddInt32(&m.activeCallsCount, -1)
+		return err
+	}
+
+	atomic.StoreInt32(&m.activeSinceLastTimerCheck, 1)
+	return nil
+}
+
+// ExitIdleMode instructs m to call the enforcer's ExitIdleMode and update m's
+// internal state.
+func (m *Manager) ExitIdleMode() error {
+	// Holds idleMu which ensures mutual exclusion with tryEnterIdleMode.
+	m.idleMu.Lock()
+	defer m.idleMu.Unlock()
+
+	if m.isClosed() || !m.actuallyIdle {
+		// This can happen in three scenarios:
+		// - handleIdleTimeout() set the calls count to -math.MaxInt32 and called
+		//   tryEnterIdleMode(). But before the latter could grab the lock, an RPC
+		//   came in and OnCallBegin() noticed that the calls count is negative.
+		// - Channel is in idle mode, and multiple new RPCs come in at the same
+		//   time, all of them notice a negative calls count in OnCallBegin and get
+		//   here. The first one to get the lock would got the channel to exit idle.
+		// - Channel is not in idle mode, and the user calls Connect which calls
+		//   m.ExitIdleMode.
+		//
+		// In any case, there is nothing to do here.
+		return nil
+	}
+
+	if err := m.enforcer.ExitIdleMode(); err != nil {
+		return fmt.Errorf("failed to exit idle mode: %w", err)
+	}
+
+	// Undo the idle entry process. This also respects any new RPC attempts.
+	atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
+	m.actuallyIdle = false
+
+	// Start a new timer to fire after the configured idle timeout.
+	m.resetIdleTimerLocked(m.timeout)
+	return nil
+}
+
+// OnCallEnd is invoked at the end of every RPC.
+func (m *Manager) OnCallEnd() {
+	if m.isClosed() {
+		return
+	}
+
+	// Record the time at which the most recent call finished.
+	atomic.StoreInt64(&m.lastCallEndTime, time.Now().UnixNano())
+
+	// Decrement the active calls count. This count can temporarily go negative
+	// when the timer callback is in the process of moving the channel to idle
+	// mode, but one or more RPCs come in and complete before the timer callback
+	// can get done with the process of moving to idle mode.
+	atomic.AddInt32(&m.activeCallsCount, -1)
+}
+
+func (m *Manager) isClosed() bool {
+	return atomic.LoadInt32(&m.closed) == 1
+}
+
+func (m *Manager) Close() {
+	atomic.StoreInt32(&m.closed, 1)
+
+	m.idleMu.Lock()
+	if m.timer != nil {
+		m.timer.Stop()
+		m.timer = nil
+	}
+	m.idleMu.Unlock()
+}
diff --git a/vendor/google.golang.org/grpc/internal/internal.go b/vendor/google.golang.org/grpc/internal/internal.go
index 42ff39c8444..6c7ea6a5336 100644
--- a/vendor/google.golang.org/grpc/internal/internal.go
+++ b/vendor/google.golang.org/grpc/internal/internal.go
@@ -30,7 +30,7 @@ import (
 
 var (
 	// WithHealthCheckFunc is set by dialoptions.go
-	WithHealthCheckFunc interface{} // func (HealthChecker) DialOption
+	WithHealthCheckFunc any // func (HealthChecker) DialOption
 	// HealthCheckFunc is used to provide client-side LB channel health checking
 	HealthCheckFunc HealthChecker
 	// BalancerUnregister is exported by package balancer to unregister a balancer.
@@ -38,8 +38,12 @@ var (
 	// KeepaliveMinPingTime is the minimum ping interval.  This must be 10s by
 	// default, but tests may wish to set it lower for convenience.
 	KeepaliveMinPingTime = 10 * time.Second
+	// KeepaliveMinServerPingTime is the minimum ping interval for servers.
+	// This must be 1s by default, but tests may wish to set it lower for
+	// convenience.
+	KeepaliveMinServerPingTime = time.Second
 	// ParseServiceConfig parses a JSON representation of the service config.
-	ParseServiceConfig interface{} // func(string) *serviceconfig.ParseResult
+	ParseServiceConfig any // func(string) *serviceconfig.ParseResult
 	// EqualServiceConfigForTesting is for testing service config generation and
 	// parsing. Both a and b should be returned by ParseServiceConfig.
 	// This function compares the config without rawJSON stripped, in case the
@@ -49,33 +53,33 @@ var (
 	// given name. This is set by package certprovider for use from xDS
 	// bootstrap code while parsing certificate provider configs in the
 	// bootstrap file.
-	GetCertificateProviderBuilder interface{} // func(string) certprovider.Builder
+	GetCertificateProviderBuilder any // func(string) certprovider.Builder
 	// GetXDSHandshakeInfoForTesting returns a pointer to the xds.HandshakeInfo
 	// stored in the passed in attributes. This is set by
 	// credentials/xds/xds.go.
-	GetXDSHandshakeInfoForTesting interface{} // func (*attributes.Attributes) *xds.HandshakeInfo
+	GetXDSHandshakeInfoForTesting any // func (*attributes.Attributes) *unsafe.Pointer
 	// GetServerCredentials returns the transport credentials configured on a
 	// gRPC server. An xDS-enabled server needs to know what type of credentials
 	// is configured on the underlying gRPC server. This is set by server.go.
-	GetServerCredentials interface{} // func (*grpc.Server) credentials.TransportCredentials
+	GetServerCredentials any // func (*grpc.Server) credentials.TransportCredentials
 	// CanonicalString returns the canonical string of the code defined here:
 	// https://github.com/grpc/grpc/blob/master/doc/statuscodes.md.
 	//
 	// This is used in the 1.0 release of gcp/observability, and thus must not be
 	// deleted or changed.
-	CanonicalString interface{} // func (codes.Code) string
-	// DrainServerTransports initiates a graceful close of existing connections
-	// on a gRPC server accepted on the provided listener address. An
-	// xDS-enabled server invokes this method on a grpc.Server when a particular
-	// listener moves to "not-serving" mode.
-	DrainServerTransports interface{} // func(*grpc.Server, string)
+	CanonicalString any // func (codes.Code) string
+	// IsRegisteredMethod returns whether the passed in method is registered as
+	// a method on the server.
+	IsRegisteredMethod any // func(*grpc.Server, string) bool
+	// ServerFromContext returns the server from the context.
+	ServerFromContext any // func(context.Context) *grpc.Server
 	// AddGlobalServerOptions adds an array of ServerOption that will be
 	// effective globally for newly created servers. The priority will be: 1.
 	// user-provided; 2. this method; 3. default values.
 	//
 	// This is used in the 1.0 release of gcp/observability, and thus must not be
 	// deleted or changed.
-	AddGlobalServerOptions interface{} // func(opt ...ServerOption)
+	AddGlobalServerOptions any // func(opt ...ServerOption)
 	// ClearGlobalServerOptions clears the array of extra ServerOption. This
 	// method is useful in testing and benchmarking.
 	//
@@ -88,14 +92,14 @@ var (
 	//
 	// This is used in the 1.0 release of gcp/observability, and thus must not be
 	// deleted or changed.
-	AddGlobalDialOptions interface{} // func(opt ...DialOption)
+	AddGlobalDialOptions any // func(opt ...DialOption)
 	// DisableGlobalDialOptions returns a DialOption that prevents the
 	// ClientConn from applying the global DialOptions (set via
 	// AddGlobalDialOptions).
 	//
 	// This is used in the 1.0 release of gcp/observability, and thus must not be
 	// deleted or changed.
-	DisableGlobalDialOptions interface{} // func() grpc.DialOption
+	DisableGlobalDialOptions any // func() grpc.DialOption
 	// ClearGlobalDialOptions clears the array of extra DialOption. This
 	// method is useful in testing and benchmarking.
 	//
@@ -104,23 +108,26 @@ var (
 	ClearGlobalDialOptions func()
 	// JoinDialOptions combines the dial options passed as arguments into a
 	// single dial option.
-	JoinDialOptions interface{} // func(...grpc.DialOption) grpc.DialOption
+	JoinDialOptions any // func(...grpc.DialOption) grpc.DialOption
 	// JoinServerOptions combines the server options passed as arguments into a
 	// single server option.
-	JoinServerOptions interface{} // func(...grpc.ServerOption) grpc.ServerOption
+	JoinServerOptions any // func(...grpc.ServerOption) grpc.ServerOption
 
 	// WithBinaryLogger returns a DialOption that specifies the binary logger
 	// for a ClientConn.
 	//
 	// This is used in the 1.0 release of gcp/observability, and thus must not be
 	// deleted or changed.
-	WithBinaryLogger interface{} // func(binarylog.Logger) grpc.DialOption
+	WithBinaryLogger any // func(binarylog.Logger) grpc.DialOption
 	// BinaryLogger returns a ServerOption that can set the binary logger for a
 	// server.
 	//
 	// This is used in the 1.0 release of gcp/observability, and thus must not be
 	// deleted or changed.
-	BinaryLogger interface{} // func(binarylog.Logger) grpc.ServerOption
+	BinaryLogger any // func(binarylog.Logger) grpc.ServerOption
+
+	// SubscribeToConnectivityStateChanges adds a grpcsync.Subscriber to a provided grpc.ClientConn
+	SubscribeToConnectivityStateChanges any // func(*grpc.ClientConn, grpcsync.Subscriber)
 
 	// NewXDSResolverWithConfigForTesting creates a new xds resolver builder using
 	// the provided xds bootstrap config instead of the global configuration from
@@ -131,7 +138,7 @@ var (
 	//
 	// This function should ONLY be used for testing and may not work with some
 	// other features, including the CSDS service.
-	NewXDSResolverWithConfigForTesting interface{} // func([]byte) (resolver.Builder, error)
+	NewXDSResolverWithConfigForTesting any // func([]byte) (resolver.Builder, error)
 
 	// RegisterRLSClusterSpecifierPluginForTesting registers the RLS Cluster
 	// Specifier Plugin for testing purposes, regardless of the XDSRLS environment
@@ -163,7 +170,32 @@ var (
 	UnregisterRBACHTTPFilterForTesting func()
 
 	// ORCAAllowAnyMinReportingInterval is for examples/orca use ONLY.
-	ORCAAllowAnyMinReportingInterval interface{} // func(so *orca.ServiceOptions)
+	ORCAAllowAnyMinReportingInterval any // func(so *orca.ServiceOptions)
+
+	// GRPCResolverSchemeExtraMetadata determines when gRPC will add extra
+	// metadata to RPCs.
+	GRPCResolverSchemeExtraMetadata string = "xds"
+
+	// EnterIdleModeForTesting gets the ClientConn to enter IDLE mode.
+	EnterIdleModeForTesting any // func(*grpc.ClientConn)
+
+	// ExitIdleModeForTesting gets the ClientConn to exit IDLE mode.
+	ExitIdleModeForTesting any // func(*grpc.ClientConn) error
+
+	ChannelzTurnOffForTesting func()
+
+	// TriggerXDSResourceNameNotFoundForTesting triggers the resource-not-found
+	// error for a given resource type and name. This is usually triggered when
+	// the associated watch timer fires. For testing purposes, having this
+	// function makes events more predictable than relying on timer events.
+	TriggerXDSResourceNameNotFoundForTesting any // func(func(xdsresource.Type, string), string, string) error
+
+	// TriggerXDSResourceNotFoundClient invokes the testing xDS Client singleton
+	// to invoke resource not found for a resource type name and resource name.
+	TriggerXDSResourceNameNotFoundClient any // func(string, string) error
+
+	// FromOutgoingContextRaw returns the un-merged, intermediary contents of metadata.rawMD.
+	FromOutgoingContextRaw any // func(context.Context) (metadata.MD, [][]string, bool)
 )
 
 // HealthChecker defines the signature of the client-side LB channel health checking function.
@@ -174,7 +206,7 @@ var (
 //
 // The health checking protocol is defined at:
 // https://github.com/grpc/grpc/blob/master/doc/health-checking.md
-type HealthChecker func(ctx context.Context, newStream func(string) (interface{}, error), setConnectivityState func(connectivity.State, error), serviceName string) error
+type HealthChecker func(ctx context.Context, newStream func(string) (any, error), setConnectivityState func(connectivity.State, error), serviceName string) error
 
 const (
 	// CredsBundleModeFallback switches GoogleDefaultCreds to fallback mode.
diff --git a/vendor/google.golang.org/grpc/internal/metadata/metadata.go b/vendor/google.golang.org/grpc/internal/metadata/metadata.go
index c82e608e077..900bfb71608 100644
--- a/vendor/google.golang.org/grpc/internal/metadata/metadata.go
+++ b/vendor/google.golang.org/grpc/internal/metadata/metadata.go
@@ -35,7 +35,7 @@ const mdKey = mdKeyType("grpc.internal.address.metadata")
 
 type mdValue metadata.MD
 
-func (m mdValue) Equal(o interface{}) bool {
+func (m mdValue) Equal(o any) bool {
 	om, ok := o.(mdValue)
 	if !ok {
 		return false
diff --git a/vendor/google.golang.org/grpc/internal/pretty/pretty.go b/vendor/google.golang.org/grpc/internal/pretty/pretty.go
index 0177af4b511..7033191375d 100644
--- a/vendor/google.golang.org/grpc/internal/pretty/pretty.go
+++ b/vendor/google.golang.org/grpc/internal/pretty/pretty.go
@@ -35,7 +35,7 @@ const jsonIndent = "  "
 // ToJSON marshals the input into a json string.
 //
 // If marshal fails, it falls back to fmt.Sprintf("%+v").
-func ToJSON(e interface{}) string {
+func ToJSON(e any) string {
 	switch ee := e.(type) {
 	case protov1.Message:
 		mm := jsonpb.Marshaler{Indent: jsonIndent}
diff --git a/vendor/google.golang.org/grpc/internal/resolver/config_selector.go b/vendor/google.golang.org/grpc/internal/resolver/config_selector.go
index c7a18a948ad..f0603871c93 100644
--- a/vendor/google.golang.org/grpc/internal/resolver/config_selector.go
+++ b/vendor/google.golang.org/grpc/internal/resolver/config_selector.go
@@ -92,7 +92,7 @@ type ClientStream interface {
 	// calling RecvMsg on the same stream at the same time, but it is not safe
 	// to call SendMsg on the same stream in different goroutines. It is also
 	// not safe to call CloseSend concurrently with SendMsg.
-	SendMsg(m interface{}) error
+	SendMsg(m any) error
 	// RecvMsg blocks until it receives a message into m or the stream is
 	// done. It returns io.EOF when the stream completes successfully. On
 	// any other error, the stream is aborted and the error contains the RPC
@@ -101,7 +101,7 @@ type ClientStream interface {
 	// It is safe to have a goroutine calling SendMsg and another goroutine
 	// calling RecvMsg on the same stream at the same time, but it is not
 	// safe to call RecvMsg on the same stream in different goroutines.
-	RecvMsg(m interface{}) error
+	RecvMsg(m any) error
 }
 
 // ClientInterceptor is an interceptor for gRPC client streams.
diff --git a/vendor/google.golang.org/grpc/internal/resolver/dns/dns_resolver.go b/vendor/google.golang.org/grpc/internal/resolver/dns/dns_resolver.go
index 09a667f33cb..b66dcb21327 100644
--- a/vendor/google.golang.org/grpc/internal/resolver/dns/dns_resolver.go
+++ b/vendor/google.golang.org/grpc/internal/resolver/dns/dns_resolver.go
@@ -23,7 +23,6 @@ package dns
 import (
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"net"
 	"os"
@@ -37,6 +36,7 @@ import (
 	"google.golang.org/grpc/internal/backoff"
 	"google.golang.org/grpc/internal/envconfig"
 	"google.golang.org/grpc/internal/grpcrand"
+	"google.golang.org/grpc/internal/resolver/dns/internal"
 	"google.golang.org/grpc/resolver"
 	"google.golang.org/grpc/serviceconfig"
 )
@@ -47,53 +47,37 @@ var EnableSRVLookups = false
 
 var logger = grpclog.Component("dns")
 
-// Globals to stub out in tests. TODO: Perhaps these two can be combined into a
-// single variable for testing the resolver?
-var (
-	newTimer           = time.NewTimer
-	newTimerDNSResRate = time.NewTimer
-)
-
 func init() {
 	resolver.Register(NewBuilder())
+	internal.TimeAfterFunc = time.After
+	internal.NewNetResolver = newNetResolver
+	internal.AddressDialer = addressDialer
 }
 
 const (
 	defaultPort       = "443"
 	defaultDNSSvrPort = "53"
 	golang            = "GO"
-	// txtPrefix is the prefix string to be prepended to the host name for txt record lookup.
+	// txtPrefix is the prefix string to be prepended to the host name for txt
+	// record lookup.
 	txtPrefix = "_grpc_config."
 	// In DNS, service config is encoded in a TXT record via the mechanism
 	// described in RFC-1464 using the attribute name grpc_config.
 	txtAttribute = "grpc_config="
 )
 
-var (
-	errMissingAddr = errors.New("dns resolver: missing address")
-
-	// Addresses ending with a colon that is supposed to be the separator
-	// between host and port is not allowed.  E.g. "::" is a valid address as
-	// it is an IPv6 address (host only) and "[::]:" is invalid as it ends with
-	// a colon as the host and port separator
-	errEndsWithColon = errors.New("dns resolver: missing port after port-separator colon")
-)
-
-var (
-	defaultResolver netResolver = net.DefaultResolver
-	// To prevent excessive re-resolution, we enforce a rate limit on DNS
-	// resolution requests.
-	minDNSResRate = 30 * time.Second
-)
-
-var customAuthorityDialler = func(authority string) func(ctx context.Context, network, address string) (net.Conn, error) {
-	return func(ctx context.Context, network, address string) (net.Conn, error) {
+var addressDialer = func(address string) func(context.Context, string, string) (net.Conn, error) {
+	return func(ctx context.Context, network, _ string) (net.Conn, error) {
 		var dialer net.Dialer
-		return dialer.DialContext(ctx, network, authority)
+		return dialer.DialContext(ctx, network, address)
 	}
 }
 
-var customAuthorityResolver = func(authority string) (netResolver, error) {
+var newNetResolver = func(authority string) (internal.NetResolver, error) {
+	if authority == "" {
+		return net.DefaultResolver, nil
+	}
+
 	host, port, err := parseTarget(authority, defaultDNSSvrPort)
 	if err != nil {
 		return nil, err
@@ -103,7 +87,7 @@ var customAuthorityResolver = func(authority string) (netResolver, error) {
 
 	return &net.Resolver{
 		PreferGo: true,
-		Dial:     customAuthorityDialler(authorityWithPort),
+		Dial:     internal.AddressDialer(authorityWithPort),
 	}, nil
 }
 
@@ -114,7 +98,8 @@ func NewBuilder() resolver.Builder {
 
 type dnsBuilder struct{}
 
-// Build creates and starts a DNS resolver that watches the name resolution of the target.
+// Build creates and starts a DNS resolver that watches the name resolution of
+// the target.
 func (b *dnsBuilder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) {
 	host, port, err := parseTarget(target.Endpoint(), defaultPort)
 	if err != nil {
@@ -140,13 +125,9 @@ func (b *dnsBuilder) Build(target resolver.Target, cc resolver.ClientConn, opts
 		disableServiceConfig: opts.DisableServiceConfig,
 	}
 
-	if target.URL.Host == "" {
-		d.resolver = defaultResolver
-	} else {
-		d.resolver, err = customAuthorityResolver(target.URL.Host)
-		if err != nil {
-			return nil, err
-		}
+	d.resolver, err = internal.NewNetResolver(target.URL.Host)
+	if err != nil {
+		return nil, err
 	}
 
 	d.wg.Add(1)
@@ -159,12 +140,6 @@ func (b *dnsBuilder) Scheme() string {
 	return "dns"
 }
 
-type netResolver interface {
-	LookupHost(ctx context.Context, host string) (addrs []string, err error)
-	LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error)
-	LookupTXT(ctx context.Context, name string) (txts []string, err error)
-}
-
 // deadResolver is a resolver that does nothing.
 type deadResolver struct{}
 
@@ -176,23 +151,26 @@ func (deadResolver) Close() {}
 type dnsResolver struct {
 	host     string
 	port     string
-	resolver netResolver
+	resolver internal.NetResolver
 	ctx      context.Context
 	cancel   context.CancelFunc
 	cc       resolver.ClientConn
-	// rn channel is used by ResolveNow() to force an immediate resolution of the target.
+	// rn channel is used by ResolveNow() to force an immediate resolution of the
+	// target.
 	rn chan struct{}
-	// wg is used to enforce Close() to return after the watcher() goroutine has finished.
-	// Otherwise, data race will be possible. [Race Example] in dns_resolver_test we
-	// replace the real lookup functions with mocked ones to facilitate testing.
-	// If Close() doesn't wait for watcher() goroutine finishes, race detector sometimes
-	// will warns lookup (READ the lookup function pointers) inside watcher() goroutine
-	// has data race with replaceNetFunc (WRITE the lookup function pointers).
+	// wg is used to enforce Close() to return after the watcher() goroutine has
+	// finished. Otherwise, data race will be possible. [Race Example] in
+	// dns_resolver_test we replace the real lookup functions with mocked ones to
+	// facilitate testing. If Close() doesn't wait for watcher() goroutine
+	// finishes, race detector sometimes will warns lookup (READ the lookup
+	// function pointers) inside watcher() goroutine has data race with
+	// replaceNetFunc (WRITE the lookup function pointers).
 	wg                   sync.WaitGroup
 	disableServiceConfig bool
 }
 
-// ResolveNow invoke an immediate resolution of the target that this dnsResolver watches.
+// ResolveNow invoke an immediate resolution of the target that this
+// dnsResolver watches.
 func (d *dnsResolver) ResolveNow(resolver.ResolveNowOptions) {
 	select {
 	case d.rn <- struct{}{}:
@@ -218,28 +196,27 @@ func (d *dnsResolver) watcher() {
 			err = d.cc.UpdateState(*state)
 		}
 
-		var timer *time.Timer
+		var waitTime time.Duration
 		if err == nil {
-			// Success resolving, wait for the next ResolveNow. However, also wait 30 seconds at the very least
-			// to prevent constantly re-resolving.
+			// Success resolving, wait for the next ResolveNow. However, also wait 30
+			// seconds at the very least to prevent constantly re-resolving.
 			backoffIndex = 1
-			timer = newTimerDNSResRate(minDNSResRate)
+			waitTime = internal.MinResolutionRate
 			select {
 			case <-d.ctx.Done():
-				timer.Stop()
 				return
 			case <-d.rn:
 			}
 		} else {
-			// Poll on an error found in DNS Resolver or an error received from ClientConn.
-			timer = newTimer(backoff.DefaultExponential.Backoff(backoffIndex))
+			// Poll on an error found in DNS Resolver or an error received from
+			// ClientConn.
+			waitTime = backoff.DefaultExponential.Backoff(backoffIndex)
 			backoffIndex++
 		}
 		select {
 		case <-d.ctx.Done():
-			timer.Stop()
 			return
-		case <-timer.C:
+		case <-internal.TimeAfterFunc(waitTime):
 		}
 	}
 }
@@ -278,7 +255,8 @@ func (d *dnsResolver) lookupSRV() ([]resolver.Address, error) {
 }
 
 func handleDNSError(err error, lookupType string) error {
-	if dnsErr, ok := err.(*net.DNSError); ok && !dnsErr.IsTimeout && !dnsErr.IsTemporary {
+	dnsErr, ok := err.(*net.DNSError)
+	if ok && !dnsErr.IsTimeout && !dnsErr.IsTemporary {
 		// Timeouts and temporary errors should be communicated to gRPC to
 		// attempt another DNS query (with backoff).  Other errors should be
 		// suppressed (they may represent the absence of a TXT record).
@@ -307,10 +285,12 @@ func (d *dnsResolver) lookupTXT() *serviceconfig.ParseResult {
 		res += s
 	}
 
-	// TXT record must have "grpc_config=" attribute in order to be used as service config.
+	// TXT record must have "grpc_config=" attribute in order to be used as
+	// service config.
 	if !strings.HasPrefix(res, txtAttribute) {
 		logger.Warningf("dns: TXT record %v missing %v attribute", res, txtAttribute)
-		// This is not an error; it is the equivalent of not having a service config.
+		// This is not an error; it is the equivalent of not having a service
+		// config.
 		return nil
 	}
 	sc := canaryingSC(strings.TrimPrefix(res, txtAttribute))
@@ -352,9 +332,10 @@ func (d *dnsResolver) lookup() (*resolver.State, error) {
 	return &state, nil
 }
 
-// formatIP returns ok = false if addr is not a valid textual representation of an IP address.
-// If addr is an IPv4 address, return the addr and ok = true.
-// If addr is an IPv6 address, return the addr enclosed in square brackets and ok = true.
+// formatIP returns ok = false if addr is not a valid textual representation of
+// an IP address. If addr is an IPv4 address, return the addr and ok = true.
+// If addr is an IPv6 address, return the addr enclosed in square brackets and
+// ok = true.
 func formatIP(addr string) (addrIP string, ok bool) {
 	ip := net.ParseIP(addr)
 	if ip == nil {
@@ -366,10 +347,10 @@ func formatIP(addr string) (addrIP string, ok bool) {
 	return "[" + addr + "]", true
 }
 
-// parseTarget takes the user input target string and default port, returns formatted host and port info.
-// If target doesn't specify a port, set the port to be the defaultPort.
-// If target is in IPv6 format and host-name is enclosed in square brackets, brackets
-// are stripped when setting the host.
+// parseTarget takes the user input target string and default port, returns
+// formatted host and port info. If target doesn't specify a port, set the port
+// to be the defaultPort. If target is in IPv6 format and host-name is enclosed
+// in square brackets, brackets are stripped when setting the host.
 // examples:
 // target: "www.google.com" defaultPort: "443" returns host: "www.google.com", port: "443"
 // target: "ipv4-host:80" defaultPort: "443" returns host: "ipv4-host", port: "80"
@@ -377,7 +358,7 @@ func formatIP(addr string) (addrIP string, ok bool) {
 // target: ":80" defaultPort: "443" returns host: "localhost", port: "80"
 func parseTarget(target, defaultPort string) (host, port string, err error) {
 	if target == "" {
-		return "", "", errMissingAddr
+		return "", "", internal.ErrMissingAddr
 	}
 	if ip := net.ParseIP(target); ip != nil {
 		// target is an IPv4 or IPv6(without brackets) address
@@ -385,12 +366,14 @@ func parseTarget(target, defaultPort string) (host, port string, err error) {
 	}
 	if host, port, err = net.SplitHostPort(target); err == nil {
 		if port == "" {
-			// If the port field is empty (target ends with colon), e.g. "[::1]:", this is an error.
-			return "", "", errEndsWithColon
+			// If the port field is empty (target ends with colon), e.g. "[::1]:",
+			// this is an error.
+			return "", "", internal.ErrEndsWithColon
 		}
 		// target has port, i.e ipv4-host:port, [ipv6-host]:port, host-name:port
 		if host == "" {
-			// Keep consistent with net.Dial(): If the host is empty, as in ":80", the local system is assumed.
+			// Keep consistent with net.Dial(): If the host is empty, as in ":80",
+			// the local system is assumed.
 			host = "localhost"
 		}
 		return host, port, nil
diff --git a/vendor/google.golang.org/grpc/internal/resolver/dns/internal/internal.go b/vendor/google.golang.org/grpc/internal/resolver/dns/internal/internal.go
new file mode 100644
index 00000000000..c7fc557d00c
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/resolver/dns/internal/internal.go
@@ -0,0 +1,70 @@
+/*
+ *
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+// Package internal contains functionality internal to the dns resolver package.
+package internal
+
+import (
+	"context"
+	"errors"
+	"net"
+	"time"
+)
+
+// NetResolver groups the methods on net.Resolver that are used by the DNS
+// resolver implementation. This allows the default net.Resolver instance to be
+// overidden from tests.
+type NetResolver interface {
+	LookupHost(ctx context.Context, host string) (addrs []string, err error)
+	LookupSRV(ctx context.Context, service, proto, name string) (cname string, addrs []*net.SRV, err error)
+	LookupTXT(ctx context.Context, name string) (txts []string, err error)
+}
+
+var (
+	// ErrMissingAddr is the error returned when building a DNS resolver when
+	// the provided target name is empty.
+	ErrMissingAddr = errors.New("dns resolver: missing address")
+
+	// ErrEndsWithColon is the error returned when building a DNS resolver when
+	// the provided target name ends with a colon that is supposed to be the
+	// separator between host and port.  E.g. "::" is a valid address as it is
+	// an IPv6 address (host only) and "[::]:" is invalid as it ends with a
+	// colon as the host and port separator
+	ErrEndsWithColon = errors.New("dns resolver: missing port after port-separator colon")
+)
+
+// The following vars are overridden from tests.
+var (
+	// MinResolutionRate is the minimum rate at which re-resolutions are
+	// allowed. This helps to prevent excessive re-resolution.
+	MinResolutionRate = 30 * time.Second
+
+	// TimeAfterFunc is used by the DNS resolver to wait for the given duration
+	// to elapse. In non-test code, this is implemented by time.After.  In test
+	// code, this can be used to control the amount of time the resolver is
+	// blocked waiting for the duration to elapse.
+	TimeAfterFunc func(time.Duration) <-chan time.Time
+
+	// NewNetResolver returns the net.Resolver instance for the given target.
+	NewNetResolver func(string) (NetResolver, error)
+
+	// AddressDialer is the dialer used to dial the DNS server. It accepts the
+	// Host portion of the URL corresponding to the user's dial target and
+	// returns a dial function.
+	AddressDialer func(address string) func(context.Context, string, string) (net.Conn, error)
+)
diff --git a/vendor/google.golang.org/grpc/internal/resolver/unix/unix.go b/vendor/google.golang.org/grpc/internal/resolver/unix/unix.go
index 16091168773..27cd81af9e5 100644
--- a/vendor/google.golang.org/grpc/internal/resolver/unix/unix.go
+++ b/vendor/google.golang.org/grpc/internal/resolver/unix/unix.go
@@ -61,6 +61,10 @@ func (b *builder) Scheme() string {
 	return b.scheme
 }
 
+func (b *builder) OverrideAuthority(resolver.Target) string {
+	return "localhost"
+}
+
 type nopResolver struct {
 }
 
diff --git a/vendor/google.golang.org/grpc/internal/status/status.go b/vendor/google.golang.org/grpc/internal/status/status.go
index b0ead4f54f8..03ef2fedd5c 100644
--- a/vendor/google.golang.org/grpc/internal/status/status.go
+++ b/vendor/google.golang.org/grpc/internal/status/status.go
@@ -43,13 +43,41 @@ type Status struct {
 	s *spb.Status
 }
 
+// NewWithProto returns a new status including details from statusProto.  This
+// is meant to be used by the gRPC library only.
+func NewWithProto(code codes.Code, message string, statusProto []string) *Status {
+	if len(statusProto) != 1 {
+		// No grpc-status-details bin header, or multiple; just ignore.
+		return &Status{s: &spb.Status{Code: int32(code), Message: message}}
+	}
+	st := &spb.Status{}
+	if err := proto.Unmarshal([]byte(statusProto[0]), st); err != nil {
+		// Probably not a google.rpc.Status proto; do not provide details.
+		return &Status{s: &spb.Status{Code: int32(code), Message: message}}
+	}
+	if st.Code == int32(code) {
+		// The codes match between the grpc-status header and the
+		// grpc-status-details-bin header; use the full details proto.
+		return &Status{s: st}
+	}
+	return &Status{
+		s: &spb.Status{
+			Code: int32(codes.Internal),
+			Message: fmt.Sprintf(
+				"grpc-status-details-bin mismatch: grpc-status=%v, grpc-message=%q, grpc-status-details-bin=%+v",
+				code, message, st,
+			),
+		},
+	}
+}
+
 // New returns a Status representing c and msg.
 func New(c codes.Code, msg string) *Status {
 	return &Status{s: &spb.Status{Code: int32(c), Message: msg}}
 }
 
 // Newf returns New(c, fmt.Sprintf(format, a...)).
-func Newf(c codes.Code, format string, a ...interface{}) *Status {
+func Newf(c codes.Code, format string, a ...any) *Status {
 	return New(c, fmt.Sprintf(format, a...))
 }
 
@@ -64,7 +92,7 @@ func Err(c codes.Code, msg string) error {
 }
 
 // Errorf returns Error(c, fmt.Sprintf(format, a...)).
-func Errorf(c codes.Code, format string, a ...interface{}) error {
+func Errorf(c codes.Code, format string, a ...any) error {
 	return Err(c, fmt.Sprintf(format, a...))
 }
 
@@ -120,11 +148,11 @@ func (s *Status) WithDetails(details ...proto.Message) (*Status, error) {
 
 // Details returns a slice of details messages attached to the status.
 // If a detail cannot be decoded, the error is returned in place of the detail.
-func (s *Status) Details() []interface{} {
+func (s *Status) Details() []any {
 	if s == nil || s.s == nil {
 		return nil
 	}
-	details := make([]interface{}, 0, len(s.s.Details))
+	details := make([]any, 0, len(s.s.Details))
 	for _, any := range s.s.Details {
 		detail := &ptypes.DynamicAny{}
 		if err := ptypes.UnmarshalAny(any, detail); err != nil {
diff --git a/vendor/google.golang.org/grpc/internal/tcp_keepalive_others.go b/vendor/google.golang.org/grpc/internal/tcp_keepalive_others.go
new file mode 100644
index 00000000000..4f347edd423
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/tcp_keepalive_others.go
@@ -0,0 +1,29 @@
+//go:build !unix && !windows
+
+/*
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package internal
+
+import (
+	"net"
+)
+
+// NetDialerWithTCPKeepalive returns a vanilla net.Dialer on non-unix platforms.
+func NetDialerWithTCPKeepalive() *net.Dialer {
+	return &net.Dialer{}
+}
diff --git a/vendor/google.golang.org/grpc/internal/tcp_keepalive_unix.go b/vendor/google.golang.org/grpc/internal/tcp_keepalive_unix.go
new file mode 100644
index 00000000000..078137b7fd7
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/tcp_keepalive_unix.go
@@ -0,0 +1,54 @@
+//go:build unix
+
+/*
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package internal
+
+import (
+	"net"
+	"syscall"
+	"time"
+
+	"golang.org/x/sys/unix"
+)
+
+// NetDialerWithTCPKeepalive returns a net.Dialer that enables TCP keepalives on
+// the underlying connection with OS default values for keepalive parameters.
+//
+// TODO: Once https://github.com/golang/go/issues/62254 lands, and the
+// appropriate Go version becomes less than our least supported Go version, we
+// should look into using the new API to make things more straightforward.
+func NetDialerWithTCPKeepalive() *net.Dialer {
+	return &net.Dialer{
+		// Setting a negative value here prevents the Go stdlib from overriding
+		// the values of TCP keepalive time and interval. It also prevents the
+		// Go stdlib from enabling TCP keepalives by default.
+		KeepAlive: time.Duration(-1),
+		// This method is called after the underlying network socket is created,
+		// but before dialing the socket (or calling its connect() method). The
+		// combination of unconditionally enabling TCP keepalives here, and
+		// disabling the overriding of TCP keepalive parameters by setting the
+		// KeepAlive field to a negative value above, results in OS defaults for
+		// the TCP keealive interval and time parameters.
+		Control: func(_, _ string, c syscall.RawConn) error {
+			return c.Control(func(fd uintptr) {
+				unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_KEEPALIVE, 1)
+			})
+		},
+	}
+}
diff --git a/vendor/google.golang.org/grpc/internal/tcp_keepalive_windows.go b/vendor/google.golang.org/grpc/internal/tcp_keepalive_windows.go
new file mode 100644
index 00000000000..fd7d43a8907
--- /dev/null
+++ b/vendor/google.golang.org/grpc/internal/tcp_keepalive_windows.go
@@ -0,0 +1,54 @@
+//go:build windows
+
+/*
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package internal
+
+import (
+	"net"
+	"syscall"
+	"time"
+
+	"golang.org/x/sys/windows"
+)
+
+// NetDialerWithTCPKeepalive returns a net.Dialer that enables TCP keepalives on
+// the underlying connection with OS default values for keepalive parameters.
+//
+// TODO: Once https://github.com/golang/go/issues/62254 lands, and the
+// appropriate Go version becomes less than our least supported Go version, we
+// should look into using the new API to make things more straightforward.
+func NetDialerWithTCPKeepalive() *net.Dialer {
+	return &net.Dialer{
+		// Setting a negative value here prevents the Go stdlib from overriding
+		// the values of TCP keepalive time and interval. It also prevents the
+		// Go stdlib from enabling TCP keepalives by default.
+		KeepAlive: time.Duration(-1),
+		// This method is called after the underlying network socket is created,
+		// but before dialing the socket (or calling its connect() method). The
+		// combination of unconditionally enabling TCP keepalives here, and
+		// disabling the overriding of TCP keepalive parameters by setting the
+		// KeepAlive field to a negative value above, results in OS defaults for
+		// the TCP keealive interval and time parameters.
+		Control: func(_, _ string, c syscall.RawConn) error {
+			return c.Control(func(fd uintptr) {
+				windows.SetsockoptInt(windows.Handle(fd), windows.SOL_SOCKET, windows.SO_KEEPALIVE, 1)
+			})
+		},
+	}
+}
diff --git a/vendor/google.golang.org/grpc/internal/transport/controlbuf.go b/vendor/google.golang.org/grpc/internal/transport/controlbuf.go
index be5a9c81eb9..b330ccedc8a 100644
--- a/vendor/google.golang.org/grpc/internal/transport/controlbuf.go
+++ b/vendor/google.golang.org/grpc/internal/transport/controlbuf.go
@@ -40,7 +40,7 @@ var updateHeaderTblSize = func(e *hpack.Encoder, v uint32) {
 }
 
 type itemNode struct {
-	it   interface{}
+	it   any
 	next *itemNode
 }
 
@@ -49,7 +49,7 @@ type itemList struct {
 	tail *itemNode
 }
 
-func (il *itemList) enqueue(i interface{}) {
+func (il *itemList) enqueue(i any) {
 	n := &itemNode{it: i}
 	if il.tail == nil {
 		il.head, il.tail = n, n
@@ -61,11 +61,11 @@ func (il *itemList) enqueue(i interface{}) {
 
 // peek returns the first item in the list without removing it from the
 // list.
-func (il *itemList) peek() interface{} {
+func (il *itemList) peek() any {
 	return il.head.it
 }
 
-func (il *itemList) dequeue() interface{} {
+func (il *itemList) dequeue() any {
 	if il.head == nil {
 		return nil
 	}
@@ -336,7 +336,7 @@ func (c *controlBuffer) put(it cbItem) error {
 	return err
 }
 
-func (c *controlBuffer) executeAndPut(f func(it interface{}) bool, it cbItem) (bool, error) {
+func (c *controlBuffer) executeAndPut(f func(it any) bool, it cbItem) (bool, error) {
 	var wakeUp bool
 	c.mu.Lock()
 	if c.err != nil {
@@ -373,7 +373,7 @@ func (c *controlBuffer) executeAndPut(f func(it interface{}) bool, it cbItem) (b
 }
 
 // Note argument f should never be nil.
-func (c *controlBuffer) execute(f func(it interface{}) bool, it interface{}) (bool, error) {
+func (c *controlBuffer) execute(f func(it any) bool, it any) (bool, error) {
 	c.mu.Lock()
 	if c.err != nil {
 		c.mu.Unlock()
@@ -387,7 +387,7 @@ func (c *controlBuffer) execute(f func(it interface{}) bool, it interface{}) (bo
 	return true, nil
 }
 
-func (c *controlBuffer) get(block bool) (interface{}, error) {
+func (c *controlBuffer) get(block bool) (any, error) {
 	for {
 		c.mu.Lock()
 		if c.err != nil {
@@ -830,7 +830,7 @@ func (l *loopyWriter) goAwayHandler(g *goAway) error {
 	return nil
 }
 
-func (l *loopyWriter) handle(i interface{}) error {
+func (l *loopyWriter) handle(i any) error {
 	switch i := i.(type) {
 	case *incomingWindowUpdate:
 		l.incomingWindowUpdateHandler(i)
diff --git a/vendor/google.golang.org/grpc/internal/transport/handler_server.go b/vendor/google.golang.org/grpc/internal/transport/handler_server.go
index 98f80e3fa00..a9d70e2a16c 100644
--- a/vendor/google.golang.org/grpc/internal/transport/handler_server.go
+++ b/vendor/google.golang.org/grpc/internal/transport/handler_server.go
@@ -75,11 +75,25 @@ func NewServerHandlerTransport(w http.ResponseWriter, r *http.Request, stats []s
 		return nil, errors.New(msg)
 	}
 
+	var localAddr net.Addr
+	if la := r.Context().Value(http.LocalAddrContextKey); la != nil {
+		localAddr, _ = la.(net.Addr)
+	}
+	var authInfo credentials.AuthInfo
+	if r.TLS != nil {
+		authInfo = credentials.TLSInfo{State: *r.TLS, CommonAuthInfo: credentials.CommonAuthInfo{SecurityLevel: credentials.PrivacyAndIntegrity}}
+	}
+	p := peer.Peer{
+		Addr:      strAddr(r.RemoteAddr),
+		LocalAddr: localAddr,
+		AuthInfo:  authInfo,
+	}
 	st := &serverHandlerTransport{
 		rw:             w,
 		req:            r,
 		closedCh:       make(chan struct{}),
 		writes:         make(chan func()),
+		peer:           p,
 		contentType:    contentType,
 		contentSubtype: contentSubtype,
 		stats:          stats,
@@ -134,6 +148,8 @@ type serverHandlerTransport struct {
 
 	headerMD metadata.MD
 
+	peer peer.Peer
+
 	closeOnce sync.Once
 	closedCh  chan struct{} // closed on Close
 
@@ -165,7 +181,13 @@ func (ht *serverHandlerTransport) Close(err error) {
 	})
 }
 
-func (ht *serverHandlerTransport) RemoteAddr() net.Addr { return strAddr(ht.req.RemoteAddr) }
+func (ht *serverHandlerTransport) Peer() *peer.Peer {
+	return &peer.Peer{
+		Addr:      ht.peer.Addr,
+		LocalAddr: ht.peer.LocalAddr,
+		AuthInfo:  ht.peer.AuthInfo,
+	}
+}
 
 // strAddr is a net.Addr backed by either a TCP "ip:port" string, or
 // the empty string if unknown.
@@ -220,18 +242,20 @@ func (ht *serverHandlerTransport) WriteStatus(s *Stream, st *status.Status) erro
 			h.Set("Grpc-Message", encodeGrpcMessage(m))
 		}
 
+		s.hdrMu.Lock()
 		if p := st.Proto(); p != nil && len(p.Details) > 0 {
+			delete(s.trailer, grpcStatusDetailsBinHeader)
 			stBytes, err := proto.Marshal(p)
 			if err != nil {
 				// TODO: return error instead, when callers are able to handle it.
 				panic(err)
 			}
 
-			h.Set("Grpc-Status-Details-Bin", encodeBinHeader(stBytes))
+			h.Set(grpcStatusDetailsBinHeader, encodeBinHeader(stBytes))
 		}
 
-		if md := s.Trailer(); len(md) > 0 {
-			for k, vv := range md {
+		if len(s.trailer) > 0 {
+			for k, vv := range s.trailer {
 				// Clients don't tolerate reading restricted headers after some non restricted ones were sent.
 				if isReservedHeader(k) {
 					continue
@@ -243,6 +267,7 @@ func (ht *serverHandlerTransport) WriteStatus(s *Stream, st *status.Status) erro
 				}
 			}
 		}
+		s.hdrMu.Unlock()
 	})
 
 	if err == nil { // transport has not been closed
@@ -287,7 +312,7 @@ func (ht *serverHandlerTransport) writeCommonHeaders(s *Stream) {
 }
 
 // writeCustomHeaders sets custom headers set on the stream via SetHeader
-// on the first write call (Write, WriteHeader, or WriteStatus).
+// on the first write call (Write, WriteHeader, or WriteStatus)
 func (ht *serverHandlerTransport) writeCustomHeaders(s *Stream) {
 	h := ht.rw.Header()
 
@@ -344,10 +369,8 @@ func (ht *serverHandlerTransport) WriteHeader(s *Stream, md metadata.MD) error {
 	return err
 }
 
-func (ht *serverHandlerTransport) HandleStreams(startStream func(*Stream), traceCtx func(context.Context, string) context.Context) {
+func (ht *serverHandlerTransport) HandleStreams(ctx context.Context, startStream func(*Stream)) {
 	// With this transport type there will be exactly 1 stream: this HTTP request.
-
-	ctx := ht.req.Context()
 	var cancel context.CancelFunc
 	if ht.timeoutSet {
 		ctx, cancel = context.WithTimeout(ctx, ht.timeout)
@@ -367,34 +390,19 @@ func (ht *serverHandlerTransport) HandleStreams(startStream func(*Stream), trace
 		ht.Close(errors.New("request is done processing"))
 	}()
 
+	ctx = metadata.NewIncomingContext(ctx, ht.headerMD)
 	req := ht.req
-
 	s := &Stream{
-		id:             0, // irrelevant
-		requestRead:    func(int) {},
-		cancel:         cancel,
-		buf:            newRecvBuffer(),
-		st:             ht,
-		method:         req.URL.Path,
-		recvCompress:   req.Header.Get("grpc-encoding"),
-		contentSubtype: ht.contentSubtype,
-	}
-	pr := &peer.Peer{
-		Addr: ht.RemoteAddr(),
-	}
-	if req.TLS != nil {
-		pr.AuthInfo = credentials.TLSInfo{State: *req.TLS, CommonAuthInfo: credentials.CommonAuthInfo{SecurityLevel: credentials.PrivacyAndIntegrity}}
-	}
-	ctx = metadata.NewIncomingContext(ctx, ht.headerMD)
-	s.ctx = peer.NewContext(ctx, pr)
-	for _, sh := range ht.stats {
-		s.ctx = sh.TagRPC(s.ctx, &stats.RPCTagInfo{FullMethodName: s.method})
-		inHeader := &stats.InHeader{
-			FullMethod:  s.method,
-			RemoteAddr:  ht.RemoteAddr(),
-			Compression: s.recvCompress,
-		}
-		sh.HandleRPC(s.ctx, inHeader)
+		id:               0, // irrelevant
+		ctx:              ctx,
+		requestRead:      func(int) {},
+		cancel:           cancel,
+		buf:              newRecvBuffer(),
+		st:               ht,
+		method:           req.URL.Path,
+		recvCompress:     req.Header.Get("grpc-encoding"),
+		contentSubtype:   ht.contentSubtype,
+		headerWireLength: 0, // won't have access to header wire length until golang/go#18997.
 	}
 	s.trReader = &transportReader{
 		reader:        &recvBufferReader{ctx: s.ctx, ctxDone: s.ctx.Done(), recv: s.buf, freeBuffer: func(*bytes.Buffer) {}},
diff --git a/vendor/google.golang.org/grpc/internal/transport/http2_client.go b/vendor/google.golang.org/grpc/internal/transport/http2_client.go
index 326bf084800..c33ac5961b2 100644
--- a/vendor/google.golang.org/grpc/internal/transport/http2_client.go
+++ b/vendor/google.golang.org/grpc/internal/transport/http2_client.go
@@ -36,6 +36,7 @@ import (
 	"golang.org/x/net/http2/hpack"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/internal"
 	"google.golang.org/grpc/internal/channelz"
 	icredentials "google.golang.org/grpc/internal/credentials"
 	"google.golang.org/grpc/internal/grpclog"
@@ -43,7 +44,7 @@ import (
 	"google.golang.org/grpc/internal/grpcutil"
 	imetadata "google.golang.org/grpc/internal/metadata"
 	istatus "google.golang.org/grpc/internal/status"
-	"google.golang.org/grpc/internal/syscall"
+	isyscall "google.golang.org/grpc/internal/syscall"
 	"google.golang.org/grpc/internal/transport/networktype"
 	"google.golang.org/grpc/keepalive"
 	"google.golang.org/grpc/metadata"
@@ -58,6 +59,8 @@ import (
 // atomically.
 var clientConnectionCounter uint64
 
+var metadataFromOutgoingContextRaw = internal.FromOutgoingContextRaw.(func(context.Context) (metadata.MD, [][]string, bool))
+
 // http2Client implements the ClientTransport interface with HTTP2.
 type http2Client struct {
 	lastRead  int64 // Keep this field 64-bit aligned. Accessed atomically.
@@ -176,7 +179,7 @@ func dial(ctx context.Context, fn func(context.Context, string) (net.Conn, error
 	if networkType == "tcp" && useProxy {
 		return proxyDial(ctx, address, grpcUA)
 	}
-	return (&net.Dialer{}).DialContext(ctx, networkType, address)
+	return internal.NetDialerWithTCPKeepalive().DialContext(ctx, networkType, address)
 }
 
 func isTemporary(err error) bool {
@@ -262,7 +265,7 @@ func newHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts
 	}
 	keepaliveEnabled := false
 	if kp.Time != infinity {
-		if err = syscall.SetTCPUserTimeout(conn, kp.Timeout); err != nil {
+		if err = isyscall.SetTCPUserTimeout(conn, kp.Timeout); err != nil {
 			return nil, connectionErrorf(false, err, "transport: failed to set TCP_USER_TIMEOUT: %v", err)
 		}
 		keepaliveEnabled = true
@@ -330,7 +333,7 @@ func newHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts
 		readerDone:            make(chan struct{}),
 		writerDone:            make(chan struct{}),
 		goAway:                make(chan struct{}),
-		framer:                newFramer(conn, writeBufSize, readBufSize, maxHeaderListSize),
+		framer:                newFramer(conn, writeBufSize, readBufSize, opts.SharedWriteBuffer, maxHeaderListSize),
 		fc:                    &trInFlow{limit: uint32(icwz)},
 		scheme:                scheme,
 		activeStreams:         make(map[uint32]*Stream),
@@ -493,8 +496,9 @@ func (t *http2Client) newStream(ctx context.Context, callHdr *CallHdr) *Stream {
 
 func (t *http2Client) getPeer() *peer.Peer {
 	return &peer.Peer{
-		Addr:     t.remoteAddr,
-		AuthInfo: t.authInfo, // Can be nil
+		Addr:      t.remoteAddr,
+		AuthInfo:  t.authInfo, // Can be nil
+		LocalAddr: t.localAddr,
 	}
 }
 
@@ -566,7 +570,7 @@ func (t *http2Client) createHeaderFields(ctx context.Context, callHdr *CallHdr)
 		headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-trace-bin", Value: encodeBinHeader(b)})
 	}
 
-	if md, added, ok := metadata.FromOutgoingContextRaw(ctx); ok {
+	if md, added, ok := metadataFromOutgoingContextRaw(ctx); ok {
 		var k string
 		for k, vv := range md {
 			// HTTP doesn't allow you to set pseudoheaders after non pseudoheaders were set.
@@ -762,7 +766,7 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (*Stream,
 	firstTry := true
 	var ch chan struct{}
 	transportDrainRequired := false
-	checkForStreamQuota := func(it interface{}) bool {
+	checkForStreamQuota := func(it any) bool {
 		if t.streamQuota <= 0 { // Can go negative if server decreases it.
 			if firstTry {
 				t.waitingStreams++
@@ -800,7 +804,7 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (*Stream,
 		return true
 	}
 	var hdrListSizeErr error
-	checkForHeaderListSize := func(it interface{}) bool {
+	checkForHeaderListSize := func(it any) bool {
 		if t.maxSendHeaderListSize == nil {
 			return true
 		}
@@ -815,7 +819,7 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (*Stream,
 		return true
 	}
 	for {
-		success, err := t.controlBuf.executeAndPut(func(it interface{}) bool {
+		success, err := t.controlBuf.executeAndPut(func(it any) bool {
 			return checkForHeaderListSize(it) && checkForStreamQuota(it)
 		}, hdr)
 		if err != nil {
@@ -927,7 +931,7 @@ func (t *http2Client) closeStream(s *Stream, err error, rst bool, rstCode http2.
 		rst:     rst,
 		rstCode: rstCode,
 	}
-	addBackStreamQuota := func(interface{}) bool {
+	addBackStreamQuota := func(any) bool {
 		t.streamQuota++
 		if t.streamQuota > 0 && t.waitingStreams > 0 {
 			select {
@@ -1080,7 +1084,7 @@ func (t *http2Client) updateWindow(s *Stream, n uint32) {
 // for the transport and the stream based on the current bdp
 // estimation.
 func (t *http2Client) updateFlowControl(n uint32) {
-	updateIWS := func(interface{}) bool {
+	updateIWS := func(any) bool {
 		t.initialWindowSize = int32(n)
 		t.mu.Lock()
 		for _, s := range t.activeStreams {
@@ -1233,7 +1237,7 @@ func (t *http2Client) handleSettings(f *http2.SettingsFrame, isFirst bool) {
 		}
 		updateFuncs = append(updateFuncs, updateStreamQuota)
 	}
-	t.controlBuf.executeAndPut(func(interface{}) bool {
+	t.controlBuf.executeAndPut(func(any) bool {
 		for _, f := range updateFuncs {
 			f()
 		}
@@ -1321,10 +1325,8 @@ func (t *http2Client) handleGoAway(f *http2.GoAwayFrame) {
 	for streamID, stream := range t.activeStreams {
 		if streamID > id && streamID <= upperLimit {
 			// The stream was unprocessed by the server.
-			if streamID > id && streamID <= upperLimit {
-				atomic.StoreUint32(&stream.unprocessed, 1)
-				streamsToClose = append(streamsToClose, stream)
-			}
+			atomic.StoreUint32(&stream.unprocessed, 1)
+			streamsToClose = append(streamsToClose, stream)
 		}
 	}
 	t.mu.Unlock()
@@ -1399,7 +1401,6 @@ func (t *http2Client) operateHeaders(frame *http2.MetaHeadersFrame) {
 		mdata          = make(map[string][]string)
 		contentTypeErr = "malformed header: missing HTTP content-type"
 		grpcMessage    string
-		statusGen      *status.Status
 		recvCompress   string
 		httpStatusCode *int
 		httpStatusErr  string
@@ -1434,12 +1435,6 @@ func (t *http2Client) operateHeaders(frame *http2.MetaHeadersFrame) {
 			rawStatusCode = codes.Code(uint32(code))
 		case "grpc-message":
 			grpcMessage = decodeGrpcMessage(hf.Value)
-		case "grpc-status-details-bin":
-			var err error
-			statusGen, err = decodeGRPCStatusDetails(hf.Value)
-			if err != nil {
-				headerError = fmt.Sprintf("transport: malformed grpc-status-details-bin: %v", err)
-			}
 		case ":status":
 			if hf.Value == "200" {
 				httpStatusErr = ""
@@ -1505,14 +1500,15 @@ func (t *http2Client) operateHeaders(frame *http2.MetaHeadersFrame) {
 		return
 	}
 
-	isHeader := false
-
-	// If headerChan hasn't been closed yet
-	if atomic.CompareAndSwapUint32(&s.headerChanClosed, 0, 1) {
-		s.headerValid = true
-		if !endStream {
-			// HEADERS frame block carries a Response-Headers.
-			isHeader = true
+	// For headers, set them in s.header and close headerChan.  For trailers or
+	// trailers-only, closeStream will set the trailers and close headerChan as
+	// needed.
+	if !endStream {
+		// If headerChan hasn't been closed yet (expected, given we checked it
+		// above, but something else could have potentially closed the whole
+		// stream).
+		if atomic.CompareAndSwapUint32(&s.headerChanClosed, 0, 1) {
+			s.headerValid = true
 			// These values can be set without any synchronization because
 			// stream goroutine will read it only after seeing a closed
 			// headerChan which we'll close after setting this.
@@ -1520,15 +1516,12 @@ func (t *http2Client) operateHeaders(frame *http2.MetaHeadersFrame) {
 			if len(mdata) > 0 {
 				s.header = mdata
 			}
-		} else {
-			// HEADERS frame block carries a Trailers-Only.
-			s.noHeaders = true
+			close(s.headerChan)
 		}
-		close(s.headerChan)
 	}
 
 	for _, sh := range t.statsHandlers {
-		if isHeader {
+		if !endStream {
 			inHeader := &stats.InHeader{
 				Client:      true,
 				WireLength:  int(frame.Header().Length),
@@ -1550,13 +1543,12 @@ func (t *http2Client) operateHeaders(frame *http2.MetaHeadersFrame) {
 		return
 	}
 
-	if statusGen == nil {
-		statusGen = status.New(rawStatusCode, grpcMessage)
-	}
+	status := istatus.NewWithProto(rawStatusCode, grpcMessage, mdata[grpcStatusDetailsBinHeader])
 
-	// if client received END_STREAM from server while stream was still active, send RST_STREAM
-	rst := s.getState() == streamActive
-	t.closeStream(s, io.EOF, rst, http2.ErrCodeNo, statusGen, mdata, true)
+	// If client received END_STREAM from server while stream was still active,
+	// send RST_STREAM.
+	rstStream := s.getState() == streamActive
+	t.closeStream(s, io.EOF, rstStream, http2.ErrCodeNo, status, mdata, true)
 }
 
 // readServerPreface reads and handles the initial settings frame from the
diff --git a/vendor/google.golang.org/grpc/internal/transport/http2_server.go b/vendor/google.golang.org/grpc/internal/transport/http2_server.go
index ec4eef21342..f6bac0e8a00 100644
--- a/vendor/google.golang.org/grpc/internal/transport/http2_server.go
+++ b/vendor/google.golang.org/grpc/internal/transport/http2_server.go
@@ -68,18 +68,15 @@ var serverConnectionCounter uint64
 
 // http2Server implements the ServerTransport interface with HTTP2.
 type http2Server struct {
-	lastRead    int64 // Keep this field 64-bit aligned. Accessed atomically.
-	ctx         context.Context
-	done        chan struct{}
-	conn        net.Conn
-	loopy       *loopyWriter
-	readerDone  chan struct{} // sync point to enable testing.
-	writerDone  chan struct{} // sync point to enable testing.
-	remoteAddr  net.Addr
-	localAddr   net.Addr
-	authInfo    credentials.AuthInfo // auth info about the connection
-	inTapHandle tap.ServerInHandle
-	framer      *framer
+	lastRead        int64 // Keep this field 64-bit aligned. Accessed atomically.
+	done            chan struct{}
+	conn            net.Conn
+	loopy           *loopyWriter
+	readerDone      chan struct{} // sync point to enable testing.
+	loopyWriterDone chan struct{}
+	peer            peer.Peer
+	inTapHandle     tap.ServerInHandle
+	framer          *framer
 	// The max number of concurrent streams.
 	maxStreams uint32
 	// controlBuf delivers all the control related tasks (e.g., window
@@ -165,7 +162,7 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 	if config.MaxHeaderListSize != nil {
 		maxHeaderListSize = *config.MaxHeaderListSize
 	}
-	framer := newFramer(conn, writeBufSize, readBufSize, maxHeaderListSize)
+	framer := newFramer(conn, writeBufSize, readBufSize, config.SharedWriteBuffer, maxHeaderListSize)
 	// Send initial settings as connection preface to client.
 	isettings := []http2.Setting{{
 		ID:  http2.SettingMaxFrameSize,
@@ -233,7 +230,7 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 		kp.Timeout = defaultServerKeepaliveTimeout
 	}
 	if kp.Time != infinity {
-		if err = syscall.SetTCPUserTimeout(conn, kp.Timeout); err != nil {
+		if err = syscall.SetTCPUserTimeout(rawConn, kp.Timeout); err != nil {
 			return nil, connectionErrorf(false, err, "transport: failed to set TCP_USER_TIMEOUT: %v", err)
 		}
 	}
@@ -243,16 +240,18 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 	}
 
 	done := make(chan struct{})
+	peer := peer.Peer{
+		Addr:      conn.RemoteAddr(),
+		LocalAddr: conn.LocalAddr(),
+		AuthInfo:  authInfo,
+	}
 	t := &http2Server{
-		ctx:               setConnection(context.Background(), rawConn),
 		done:              done,
 		conn:              conn,
-		remoteAddr:        conn.RemoteAddr(),
-		localAddr:         conn.LocalAddr(),
-		authInfo:          authInfo,
+		peer:              peer,
 		framer:            framer,
 		readerDone:        make(chan struct{}),
-		writerDone:        make(chan struct{}),
+		loopyWriterDone:   make(chan struct{}),
 		maxStreams:        config.MaxStreams,
 		inTapHandle:       config.InTapHandle,
 		fc:                &trInFlow{limit: uint32(icwz)},
@@ -267,8 +266,6 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 		bufferPool:        newBufferPool(),
 	}
 	t.logger = prefixLoggerForServerTransport(t)
-	// Add peer information to the http2server context.
-	t.ctx = peer.NewContext(t.ctx, t.getPeer())
 
 	t.controlBuf = newControlBuffer(t.done)
 	if dynamicWindow {
@@ -277,15 +274,7 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 			updateFlowControl: t.updateFlowControl,
 		}
 	}
-	for _, sh := range t.stats {
-		t.ctx = sh.TagConn(t.ctx, &stats.ConnTagInfo{
-			RemoteAddr: t.remoteAddr,
-			LocalAddr:  t.localAddr,
-		})
-		connBegin := &stats.ConnBegin{}
-		sh.HandleConn(t.ctx, connBegin)
-	}
-	t.channelzID, err = channelz.RegisterNormalSocket(t, config.ChannelzParentID, fmt.Sprintf("%s -> %s", t.remoteAddr, t.localAddr))
+	t.channelzID, err = channelz.RegisterNormalSocket(t, config.ChannelzParentID, fmt.Sprintf("%s -> %s", t.peer.Addr, t.peer.LocalAddr))
 	if err != nil {
 		return nil, err
 	}
@@ -334,7 +323,7 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 		t.loopy = newLoopyWriter(serverSide, t.framer, t.controlBuf, t.bdpEst, t.conn, t.logger)
 		t.loopy.ssGoAwayHandler = t.outgoingGoAwayHandler
 		t.loopy.run()
-		close(t.writerDone)
+		close(t.loopyWriterDone)
 	}()
 	go t.keepalive()
 	return t, nil
@@ -342,7 +331,7 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport,
 
 // operateHeaders takes action on the decoded headers. Returns an error if fatal
 // error encountered and transport needs to close, otherwise returns nil.
-func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(*Stream), traceCtx func(context.Context, string) context.Context) error {
+func (t *http2Server) operateHeaders(ctx context.Context, frame *http2.MetaHeadersFrame, handle func(*Stream)) error {
 	// Acquire max stream ID lock for entire duration
 	t.maxStreamMu.Lock()
 	defer t.maxStreamMu.Unlock()
@@ -369,10 +358,11 @@ func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(
 
 	buf := newRecvBuffer()
 	s := &Stream{
-		id:  streamID,
-		st:  t,
-		buf: buf,
-		fc:  &inFlow{limit: uint32(t.initialWindowSize)},
+		id:               streamID,
+		st:               t,
+		buf:              buf,
+		fc:               &inFlow{limit: uint32(t.initialWindowSize)},
+		headerWireLength: int(frame.Header().Length),
 	}
 	var (
 		// if false, content-type was missing or invalid
@@ -511,9 +501,9 @@ func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(
 		s.state = streamReadDone
 	}
 	if timeoutSet {
-		s.ctx, s.cancel = context.WithTimeout(t.ctx, timeout)
+		s.ctx, s.cancel = context.WithTimeout(ctx, timeout)
 	} else {
-		s.ctx, s.cancel = context.WithCancel(t.ctx)
+		s.ctx, s.cancel = context.WithCancel(ctx)
 	}
 
 	// Attach the received metadata to the context.
@@ -561,7 +551,7 @@ func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(
 	}
 	if t.inTapHandle != nil {
 		var err error
-		if s.ctx, err = t.inTapHandle(s.ctx, &tap.Info{FullMethodName: s.method}); err != nil {
+		if s.ctx, err = t.inTapHandle(s.ctx, &tap.Info{FullMethodName: s.method, Header: mdata}); err != nil {
 			t.mu.Unlock()
 			if t.logger.V(logLevel) {
 				t.logger.Infof("Aborting the stream early due to InTapHandle failure: %v", err)
@@ -592,19 +582,6 @@ func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(
 	s.requestRead = func(n int) {
 		t.adjustWindow(s, uint32(n))
 	}
-	s.ctx = traceCtx(s.ctx, s.method)
-	for _, sh := range t.stats {
-		s.ctx = sh.TagRPC(s.ctx, &stats.RPCTagInfo{FullMethodName: s.method})
-		inHeader := &stats.InHeader{
-			FullMethod:  s.method,
-			RemoteAddr:  t.remoteAddr,
-			LocalAddr:   t.localAddr,
-			Compression: s.recvCompress,
-			WireLength:  int(frame.Header().Length),
-			Header:      mdata.Copy(),
-		}
-		sh.HandleRPC(s.ctx, inHeader)
-	}
 	s.ctxDone = s.ctx.Done()
 	s.wq = newWriteQuota(defaultWriteQuota, s.ctxDone)
 	s.trReader = &transportReader{
@@ -630,8 +607,11 @@ func (t *http2Server) operateHeaders(frame *http2.MetaHeadersFrame, handle func(
 // HandleStreams receives incoming streams using the given handler. This is
 // typically run in a separate goroutine.
 // traceCtx attaches trace to ctx and returns the new context.
-func (t *http2Server) HandleStreams(handle func(*Stream), traceCtx func(context.Context, string) context.Context) {
-	defer close(t.readerDone)
+func (t *http2Server) HandleStreams(ctx context.Context, handle func(*Stream)) {
+	defer func() {
+		<-t.loopyWriterDone
+		close(t.readerDone)
+	}()
 	for {
 		t.controlBuf.throttle()
 		frame, err := t.framer.fr.ReadFrame()
@@ -665,7 +645,7 @@ func (t *http2Server) HandleStreams(handle func(*Stream), traceCtx func(context.
 		}
 		switch frame := frame.(type) {
 		case *http2.MetaHeadersFrame:
-			if err := t.operateHeaders(frame, handle, traceCtx); err != nil {
+			if err := t.operateHeaders(ctx, frame, handle); err != nil {
 				t.Close(err)
 				break
 			}
@@ -850,7 +830,7 @@ func (t *http2Server) handleSettings(f *http2.SettingsFrame) {
 		}
 		return nil
 	})
-	t.controlBuf.executeAndPut(func(interface{}) bool {
+	t.controlBuf.executeAndPut(func(any) bool {
 		for _, f := range updateFuncs {
 			f()
 		}
@@ -934,7 +914,7 @@ func appendHeaderFieldsFromMD(headerFields []hpack.HeaderField, md metadata.MD)
 	return headerFields
 }
 
-func (t *http2Server) checkForHeaderListSize(it interface{}) bool {
+func (t *http2Server) checkForHeaderListSize(it any) bool {
 	if t.maxSendHeaderListSize == nil {
 		return true
 	}
@@ -980,7 +960,12 @@ func (t *http2Server) WriteHeader(s *Stream, md metadata.MD) error {
 		}
 	}
 	if err := t.writeHeaderLocked(s); err != nil {
-		return status.Convert(err).Err()
+		switch e := err.(type) {
+		case ConnectionError:
+			return status.Error(codes.Unavailable, e.Desc)
+		default:
+			return status.Convert(err).Err()
+		}
 	}
 	return nil
 }
@@ -1053,12 +1038,15 @@ func (t *http2Server) WriteStatus(s *Stream, st *status.Status) error {
 	headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-message", Value: encodeGrpcMessage(st.Message())})
 
 	if p := st.Proto(); p != nil && len(p.Details) > 0 {
+		// Do not use the user's grpc-status-details-bin (if present) if we are
+		// even attempting to set our own.
+		delete(s.trailer, grpcStatusDetailsBinHeader)
 		stBytes, err := proto.Marshal(p)
 		if err != nil {
 			// TODO: return error instead, when callers are able to handle it.
 			t.logger.Errorf("Failed to marshal rpc status: %s, error: %v", pretty.ToJSON(p), err)
 		} else {
-			headerFields = append(headerFields, hpack.HeaderField{Name: "grpc-status-details-bin", Value: encodeBinHeader(stBytes)})
+			headerFields = append(headerFields, hpack.HeaderField{Name: grpcStatusDetailsBinHeader, Value: encodeBinHeader(stBytes)})
 		}
 	}
 
@@ -1240,10 +1228,6 @@ func (t *http2Server) Close(err error) {
 	for _, s := range streams {
 		s.cancel()
 	}
-	for _, sh := range t.stats {
-		connEnd := &stats.ConnEnd{}
-		sh.HandleConn(t.ctx, connEnd)
-	}
 }
 
 // deleteStream deletes the stream s from transport's active streams.
@@ -1309,10 +1293,6 @@ func (t *http2Server) closeStream(s *Stream, rst bool, rstCode http2.ErrCode, eo
 	})
 }
 
-func (t *http2Server) RemoteAddr() net.Addr {
-	return t.remoteAddr
-}
-
 func (t *http2Server) Drain(debugData string) {
 	t.mu.Lock()
 	defer t.mu.Unlock()
@@ -1395,11 +1375,11 @@ func (t *http2Server) ChannelzMetric() *channelz.SocketInternalMetric {
 		LastMessageReceivedTimestamp:     time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgRecvTime)),
 		LocalFlowControlWindow:           int64(t.fc.getSize()),
 		SocketOptions:                    channelz.GetSocketOption(t.conn),
-		LocalAddr:                        t.localAddr,
-		RemoteAddr:                       t.remoteAddr,
+		LocalAddr:                        t.peer.LocalAddr,
+		RemoteAddr:                       t.peer.Addr,
 		// RemoteName :
 	}
-	if au, ok := t.authInfo.(credentials.ChannelzSecurityInfo); ok {
+	if au, ok := t.peer.AuthInfo.(credentials.ChannelzSecurityInfo); ok {
 		s.Security = au.GetSecurityValue()
 	}
 	s.RemoteFlowControlWindow = t.getOutFlowWindow()
@@ -1431,10 +1411,12 @@ func (t *http2Server) getOutFlowWindow() int64 {
 	}
 }
 
-func (t *http2Server) getPeer() *peer.Peer {
+// Peer returns the peer of the transport.
+func (t *http2Server) Peer() *peer.Peer {
 	return &peer.Peer{
-		Addr:     t.remoteAddr,
-		AuthInfo: t.authInfo, // Can be nil
+		Addr:      t.peer.Addr,
+		LocalAddr: t.peer.LocalAddr,
+		AuthInfo:  t.peer.AuthInfo, // Can be nil
 	}
 }
 
@@ -1459,6 +1441,6 @@ func GetConnection(ctx context.Context) net.Conn {
 // SetConnection adds the connection to the context to be able to get
 // information about the destination ip and port for an incoming RPC. This also
 // allows any unary or streaming interceptors to see the connection.
-func setConnection(ctx context.Context, conn net.Conn) context.Context {
+func SetConnection(ctx context.Context, conn net.Conn) context.Context {
 	return context.WithValue(ctx, connectionKey{}, conn)
 }
diff --git a/vendor/google.golang.org/grpc/internal/transport/http_util.go b/vendor/google.golang.org/grpc/internal/transport/http_util.go
index 19cbb18f5ab..dc29d590e91 100644
--- a/vendor/google.golang.org/grpc/internal/transport/http_util.go
+++ b/vendor/google.golang.org/grpc/internal/transport/http_util.go
@@ -30,15 +30,13 @@ import (
 	"net/url"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 	"unicode/utf8"
 
-	"github.com/golang/protobuf/proto"
 	"golang.org/x/net/http2"
 	"golang.org/x/net/http2/hpack"
-	spb "google.golang.org/genproto/googleapis/rpc/status"
 	"google.golang.org/grpc/codes"
-	"google.golang.org/grpc/status"
 )
 
 const (
@@ -87,6 +85,8 @@ var (
 	}
 )
 
+var grpcStatusDetailsBinHeader = "grpc-status-details-bin"
+
 // isReservedHeader checks whether hdr belongs to HTTP2 headers
 // reserved by gRPC protocol. Any other headers are classified as the
 // user-specified metadata.
@@ -102,7 +102,6 @@ func isReservedHeader(hdr string) bool {
 		"grpc-message",
 		"grpc-status",
 		"grpc-timeout",
-		"grpc-status-details-bin",
 		// Intentionally exclude grpc-previous-rpc-attempts and
 		// grpc-retry-pushback-ms, which are "reserved", but their API
 		// intentionally works via metadata.
@@ -153,18 +152,6 @@ func decodeMetadataHeader(k, v string) (string, error) {
 	return v, nil
 }
 
-func decodeGRPCStatusDetails(rawDetails string) (*status.Status, error) {
-	v, err := decodeBinHeader(rawDetails)
-	if err != nil {
-		return nil, err
-	}
-	st := &spb.Status{}
-	if err = proto.Unmarshal(v, st); err != nil {
-		return nil, err
-	}
-	return status.FromProto(st), nil
-}
-
 type timeoutUnit uint8
 
 const (
@@ -309,6 +296,7 @@ func decodeGrpcMessageUnchecked(msg string) string {
 }
 
 type bufWriter struct {
+	pool      *sync.Pool
 	buf       []byte
 	offset    int
 	batchSize int
@@ -316,12 +304,17 @@ type bufWriter struct {
 	err       error
 }
 
-func newBufWriter(conn net.Conn, batchSize int) *bufWriter {
-	return &bufWriter{
-		buf:       make([]byte, batchSize*2),
+func newBufWriter(conn net.Conn, batchSize int, pool *sync.Pool) *bufWriter {
+	w := &bufWriter{
 		batchSize: batchSize,
 		conn:      conn,
+		pool:      pool,
+	}
+	// this indicates that we should use non shared buf
+	if pool == nil {
+		w.buf = make([]byte, batchSize)
 	}
+	return w
 }
 
 func (w *bufWriter) Write(b []byte) (n int, err error) {
@@ -332,19 +325,34 @@ func (w *bufWriter) Write(b []byte) (n int, err error) {
 		n, err = w.conn.Write(b)
 		return n, toIOError(err)
 	}
+	if w.buf == nil {
+		b := w.pool.Get().(*[]byte)
+		w.buf = *b
+	}
 	for len(b) > 0 {
 		nn := copy(w.buf[w.offset:], b)
 		b = b[nn:]
 		w.offset += nn
 		n += nn
 		if w.offset >= w.batchSize {
-			err = w.Flush()
+			err = w.flushKeepBuffer()
 		}
 	}
 	return n, err
 }
 
 func (w *bufWriter) Flush() error {
+	err := w.flushKeepBuffer()
+	// Only release the buffer if we are in a "shared" mode
+	if w.buf != nil && w.pool != nil {
+		b := w.buf
+		w.pool.Put(&b)
+		w.buf = nil
+	}
+	return err
+}
+
+func (w *bufWriter) flushKeepBuffer() error {
 	if w.err != nil {
 		return w.err
 	}
@@ -381,7 +389,10 @@ type framer struct {
 	fr     *http2.Framer
 }
 
-func newFramer(conn net.Conn, writeBufferSize, readBufferSize int, maxHeaderListSize uint32) *framer {
+var writeBufferPoolMap map[int]*sync.Pool = make(map[int]*sync.Pool)
+var writeBufferMutex sync.Mutex
+
+func newFramer(conn net.Conn, writeBufferSize, readBufferSize int, sharedWriteBuffer bool, maxHeaderListSize uint32) *framer {
 	if writeBufferSize < 0 {
 		writeBufferSize = 0
 	}
@@ -389,7 +400,11 @@ func newFramer(conn net.Conn, writeBufferSize, readBufferSize int, maxHeaderList
 	if readBufferSize > 0 {
 		r = bufio.NewReaderSize(r, readBufferSize)
 	}
-	w := newBufWriter(conn, writeBufferSize)
+	var pool *sync.Pool
+	if sharedWriteBuffer {
+		pool = getWriteBufferPool(writeBufferSize)
+	}
+	w := newBufWriter(conn, writeBufferSize, pool)
 	f := &framer{
 		writer: w,
 		fr:     http2.NewFramer(w, r),
@@ -403,6 +418,24 @@ func newFramer(conn net.Conn, writeBufferSize, readBufferSize int, maxHeaderList
 	return f
 }
 
+func getWriteBufferPool(writeBufferSize int) *sync.Pool {
+	writeBufferMutex.Lock()
+	defer writeBufferMutex.Unlock()
+	size := writeBufferSize * 2
+	pool, ok := writeBufferPoolMap[size]
+	if ok {
+		return pool
+	}
+	pool = &sync.Pool{
+		New: func() any {
+			b := make([]byte, size)
+			return &b
+		},
+	}
+	writeBufferPoolMap[size] = pool
+	return pool
+}
+
 // parseDialTarget returns the network and address to pass to dialer.
 func parseDialTarget(target string) (string, string) {
 	net := "tcp"
diff --git a/vendor/google.golang.org/grpc/internal/transport/proxy.go b/vendor/google.golang.org/grpc/internal/transport/proxy.go
index 41596198787..24fa1032574 100644
--- a/vendor/google.golang.org/grpc/internal/transport/proxy.go
+++ b/vendor/google.golang.org/grpc/internal/transport/proxy.go
@@ -28,6 +28,8 @@ import (
 	"net/http"
 	"net/http/httputil"
 	"net/url"
+
+	"google.golang.org/grpc/internal"
 )
 
 const proxyAuthHeaderKey = "Proxy-Authorization"
@@ -112,7 +114,7 @@ func doHTTPConnectHandshake(ctx context.Context, conn net.Conn, backendAddr stri
 // proxyDial dials, connecting to a proxy first if necessary. Checks if a proxy
 // is necessary, dials, does the HTTP CONNECT handshake, and returns the
 // connection.
-func proxyDial(ctx context.Context, addr string, grpcUA string) (conn net.Conn, err error) {
+func proxyDial(ctx context.Context, addr string, grpcUA string) (net.Conn, error) {
 	newAddr := addr
 	proxyURL, err := mapAddress(addr)
 	if err != nil {
@@ -122,15 +124,15 @@ func proxyDial(ctx context.Context, addr string, grpcUA string) (conn net.Conn,
 		newAddr = proxyURL.Host
 	}
 
-	conn, err = (&net.Dialer{}).DialContext(ctx, "tcp", newAddr)
+	conn, err := internal.NetDialerWithTCPKeepalive().DialContext(ctx, "tcp", newAddr)
 	if err != nil {
-		return
+		return nil, err
 	}
-	if proxyURL != nil {
+	if proxyURL == nil {
 		// proxy is disabled if proxyURL is nil.
-		conn, err = doHTTPConnectHandshake(ctx, conn, addr, proxyURL, grpcUA)
+		return conn, err
 	}
-	return
+	return doHTTPConnectHandshake(ctx, conn, addr, proxyURL, grpcUA)
 }
 
 func sendHTTPRequest(ctx context.Context, req *http.Request, conn net.Conn) error {
diff --git a/vendor/google.golang.org/grpc/internal/transport/transport.go b/vendor/google.golang.org/grpc/internal/transport/transport.go
index aa1c896595d..b7b8fec1804 100644
--- a/vendor/google.golang.org/grpc/internal/transport/transport.go
+++ b/vendor/google.golang.org/grpc/internal/transport/transport.go
@@ -37,16 +37,13 @@ import (
 	"google.golang.org/grpc/internal/channelz"
 	"google.golang.org/grpc/keepalive"
 	"google.golang.org/grpc/metadata"
+	"google.golang.org/grpc/peer"
 	"google.golang.org/grpc/resolver"
 	"google.golang.org/grpc/stats"
 	"google.golang.org/grpc/status"
 	"google.golang.org/grpc/tap"
 )
 
-// ErrNoHeaders is used as a signal that a trailers only response was received,
-// and is not a real error.
-var ErrNoHeaders = errors.New("stream has no headers")
-
 const logLevel = 2
 
 type bufferPool struct {
@@ -56,7 +53,7 @@ type bufferPool struct {
 func newBufferPool() *bufferPool {
 	return &bufferPool{
 		pool: sync.Pool{
-			New: func() interface{} {
+			New: func() any {
 				return new(bytes.Buffer)
 			},
 		},
@@ -269,7 +266,8 @@ type Stream struct {
 	// headerValid indicates whether a valid header was received.  Only
 	// meaningful after headerChan is closed (always call waitOnHeader() before
 	// reading its value).  Not valid on server side.
-	headerValid bool
+	headerValid      bool
+	headerWireLength int // Only set on server side.
 
 	// hdrMu protects header and trailer metadata on the server-side.
 	hdrMu sync.Mutex
@@ -390,14 +388,10 @@ func (s *Stream) Header() (metadata.MD, error) {
 	}
 	s.waitOnHeader()
 
-	if !s.headerValid {
+	if !s.headerValid || s.noHeaders {
 		return nil, s.status.Err()
 	}
 
-	if s.noHeaders {
-		return nil, ErrNoHeaders
-	}
-
 	return s.header.Copy(), nil
 }
 
@@ -433,6 +427,12 @@ func (s *Stream) Context() context.Context {
 	return s.ctx
 }
 
+// SetContext sets the context of the stream. This will be deleted once the
+// stats handler callouts all move to gRPC layer.
+func (s *Stream) SetContext(ctx context.Context) {
+	s.ctx = ctx
+}
+
 // Method returns the method for the stream.
 func (s *Stream) Method() string {
 	return s.method
@@ -445,6 +445,12 @@ func (s *Stream) Status() *status.Status {
 	return s.status
 }
 
+// HeaderWireLength returns the size of the headers of the stream as received
+// from the wire. Valid only on the server.
+func (s *Stream) HeaderWireLength() int {
+	return s.headerWireLength
+}
+
 // SetHeader sets the header metadata. This can be called multiple times.
 // Server side only.
 // This should not be called in parallel to other data writes.
@@ -559,6 +565,7 @@ type ServerConfig struct {
 	InitialConnWindowSize int32
 	WriteBufferSize       int
 	ReadBufferSize        int
+	SharedWriteBuffer     bool
 	ChannelzParentID      *channelz.Identifier
 	MaxHeaderListSize     *uint32
 	HeaderTableSize       *uint32
@@ -592,6 +599,8 @@ type ConnectOptions struct {
 	WriteBufferSize int
 	// ReadBufferSize sets the size of read buffer, which in turn determines how much data can be read at most for one read syscall.
 	ReadBufferSize int
+	// SharedWriteBuffer indicates whether connections should reuse write buffer
+	SharedWriteBuffer bool
 	// ChannelzParentID sets the addrConn id which initiate the creation of this client transport.
 	ChannelzParentID *channelz.Identifier
 	// MaxHeaderListSize sets the max (uncompressed) size of header list that is prepared to be received.
@@ -703,7 +712,7 @@ type ClientTransport interface {
 // Write methods for a given Stream will be called serially.
 type ServerTransport interface {
 	// HandleStreams receives incoming streams using the given handler.
-	HandleStreams(func(*Stream), func(context.Context, string) context.Context)
+	HandleStreams(context.Context, func(*Stream))
 
 	// WriteHeader sends the header metadata for the given stream.
 	// WriteHeader may not be called on all streams.
@@ -722,8 +731,8 @@ type ServerTransport interface {
 	// handlers will be terminated asynchronously.
 	Close(err error)
 
-	// RemoteAddr returns the remote network address.
-	RemoteAddr() net.Addr
+	// Peer returns the peer of the server transport.
+	Peer() *peer.Peer
 
 	// Drain notifies the client this ServerTransport stops accepting new RPCs.
 	Drain(debugData string)
@@ -736,7 +745,7 @@ type ServerTransport interface {
 }
 
 // connectionErrorf creates an ConnectionError with the specified error description.
-func connectionErrorf(temp bool, e error, format string, a ...interface{}) ConnectionError {
+func connectionErrorf(temp bool, e error, format string, a ...any) ConnectionError {
 	return ConnectionError{
 		Desc: fmt.Sprintf(format, a...),
 		temp: temp,
diff --git a/vendor/google.golang.org/grpc/metadata/metadata.go b/vendor/google.golang.org/grpc/metadata/metadata.go
index a2cdcaf12a8..1e9485fd6e2 100644
--- a/vendor/google.golang.org/grpc/metadata/metadata.go
+++ b/vendor/google.golang.org/grpc/metadata/metadata.go
@@ -25,8 +25,14 @@ import (
 	"context"
 	"fmt"
 	"strings"
+
+	"google.golang.org/grpc/internal"
 )
 
+func init() {
+	internal.FromOutgoingContextRaw = fromOutgoingContextRaw
+}
+
 // DecodeKeyValue returns k, v, nil.
 //
 // Deprecated: use k and v directly instead.
@@ -153,14 +159,16 @@ func Join(mds ...MD) MD {
 type mdIncomingKey struct{}
 type mdOutgoingKey struct{}
 
-// NewIncomingContext creates a new context with incoming md attached.
+// NewIncomingContext creates a new context with incoming md attached. md must
+// not be modified after calling this function.
 func NewIncomingContext(ctx context.Context, md MD) context.Context {
 	return context.WithValue(ctx, mdIncomingKey{}, md)
 }
 
 // NewOutgoingContext creates a new context with outgoing md attached. If used
 // in conjunction with AppendToOutgoingContext, NewOutgoingContext will
-// overwrite any previously-appended metadata.
+// overwrite any previously-appended metadata. md must not be modified after
+// calling this function.
 func NewOutgoingContext(ctx context.Context, md MD) context.Context {
 	return context.WithValue(ctx, mdOutgoingKey{}, rawMD{md: md})
 }
@@ -203,7 +211,8 @@ func FromIncomingContext(ctx context.Context) (MD, bool) {
 }
 
 // ValueFromIncomingContext returns the metadata value corresponding to the metadata
-// key from the incoming metadata if it exists. Key must be lower-case.
+// key from the incoming metadata if it exists. Keys are matched in a case insensitive
+// manner.
 //
 // # Experimental
 //
@@ -219,33 +228,29 @@ func ValueFromIncomingContext(ctx context.Context, key string) []string {
 		return copyOf(v)
 	}
 	for k, v := range md {
-		// We need to manually convert all keys to lower case, because MD is a
-		// map, and there's no guarantee that the MD attached to the context is
-		// created using our helper functions.
-		if strings.ToLower(k) == key {
+		// Case insenitive comparison: MD is a map, and there's no guarantee
+		// that the MD attached to the context is created using our helper
+		// functions.
+		if strings.EqualFold(k, key) {
 			return copyOf(v)
 		}
 	}
 	return nil
 }
 
-// the returned slice must not be modified in place
 func copyOf(v []string) []string {
 	vals := make([]string, len(v))
 	copy(vals, v)
 	return vals
 }
 
-// FromOutgoingContextRaw returns the un-merged, intermediary contents of rawMD.
+// fromOutgoingContextRaw returns the un-merged, intermediary contents of rawMD.
 //
 // Remember to perform strings.ToLower on the keys, for both the returned MD (MD
 // is a map, there's no guarantee it's created using our helper functions) and
 // the extra kv pairs (AppendToOutgoingContext doesn't turn them into
 // lowercase).
-//
-// This is intended for gRPC-internal use ONLY. Users should use
-// FromOutgoingContext instead.
-func FromOutgoingContextRaw(ctx context.Context) (MD, [][]string, bool) {
+func fromOutgoingContextRaw(ctx context.Context) (MD, [][]string, bool) {
 	raw, ok := ctx.Value(mdOutgoingKey{}).(rawMD)
 	if !ok {
 		return nil, nil, false
diff --git a/vendor/google.golang.org/grpc/peer/peer.go b/vendor/google.golang.org/grpc/peer/peer.go
index e01d219ffbc..a821ff9b2b7 100644
--- a/vendor/google.golang.org/grpc/peer/peer.go
+++ b/vendor/google.golang.org/grpc/peer/peer.go
@@ -32,6 +32,8 @@ import (
 type Peer struct {
 	// Addr is the peer address.
 	Addr net.Addr
+	// LocalAddr is the local address.
+	LocalAddr net.Addr
 	// AuthInfo is the authentication information of the transport.
 	// It is nil if there is no transport security being used.
 	AuthInfo credentials.AuthInfo
diff --git a/vendor/google.golang.org/grpc/picker_wrapper.go b/vendor/google.golang.org/grpc/picker_wrapper.go
index 02f97595124..bf56faa76d3 100644
--- a/vendor/google.golang.org/grpc/picker_wrapper.go
+++ b/vendor/google.golang.org/grpc/picker_wrapper.go
@@ -28,31 +28,31 @@ import (
 	"google.golang.org/grpc/internal/channelz"
 	istatus "google.golang.org/grpc/internal/status"
 	"google.golang.org/grpc/internal/transport"
+	"google.golang.org/grpc/stats"
 	"google.golang.org/grpc/status"
 )
 
 // pickerWrapper is a wrapper of balancer.Picker. It blocks on certain pick
 // actions and unblock when there's a picker update.
 type pickerWrapper struct {
-	mu         sync.Mutex
-	done       bool
-	idle       bool
-	blockingCh chan struct{}
-	picker     balancer.Picker
+	mu            sync.Mutex
+	done          bool
+	blockingCh    chan struct{}
+	picker        balancer.Picker
+	statsHandlers []stats.Handler // to record blocking picker calls
 }
 
-func newPickerWrapper() *pickerWrapper {
-	return &pickerWrapper{blockingCh: make(chan struct{})}
+func newPickerWrapper(statsHandlers []stats.Handler) *pickerWrapper {
+	return &pickerWrapper{
+		blockingCh:    make(chan struct{}),
+		statsHandlers: statsHandlers,
+	}
 }
 
 // updatePicker is called by UpdateBalancerState. It unblocks all blocked pick.
 func (pw *pickerWrapper) updatePicker(p balancer.Picker) {
 	pw.mu.Lock()
-	if pw.done || pw.idle {
-		// There is a small window where a picker update from the LB policy can
-		// race with the channel going to idle mode. If the picker is idle here,
-		// it is because the channel asked it to do so, and therefore it is sage
-		// to ignore the update from the LB policy.
+	if pw.done {
 		pw.mu.Unlock()
 		return
 	}
@@ -95,6 +95,7 @@ func (pw *pickerWrapper) pick(ctx context.Context, failfast bool, info balancer.
 	var ch chan struct{}
 
 	var lastPickErr error
+
 	for {
 		pw.mu.Lock()
 		if pw.done {
@@ -129,6 +130,20 @@ func (pw *pickerWrapper) pick(ctx context.Context, failfast bool, info balancer.
 			continue
 		}
 
+		// If the channel is set, it means that the pick call had to wait for a
+		// new picker at some point. Either it's the first iteration and this
+		// function received the first picker, or a picker errored with
+		// ErrNoSubConnAvailable or errored with failfast set to false, which
+		// will trigger a continue to the next iteration. In the first case this
+		// conditional will hit if this call had to block (the channel is set).
+		// In the second case, the only way it will get to this conditional is
+		// if there is a new picker.
+		if ch != nil {
+			for _, sh := range pw.statsHandlers {
+				sh.HandleRPC(ctx, &stats.PickerUpdated{})
+			}
+		}
+
 		ch = pw.blockingCh
 		p := pw.picker
 		pw.mu.Unlock()
@@ -190,23 +205,15 @@ func (pw *pickerWrapper) close() {
 	close(pw.blockingCh)
 }
 
-func (pw *pickerWrapper) enterIdleMode() {
-	pw.mu.Lock()
-	defer pw.mu.Unlock()
-	if pw.done {
-		return
-	}
-	pw.idle = true
-}
-
-func (pw *pickerWrapper) exitIdleMode() {
+// reset clears the pickerWrapper and prepares it for being used again when idle
+// mode is exited.
+func (pw *pickerWrapper) reset() {
 	pw.mu.Lock()
 	defer pw.mu.Unlock()
 	if pw.done {
 		return
 	}
 	pw.blockingCh = make(chan struct{})
-	pw.idle = false
 }
 
 // dropError is a wrapper error that indicates the LB policy wishes to drop the
diff --git a/vendor/google.golang.org/grpc/pickfirst.go b/vendor/google.golang.org/grpc/pickfirst.go
index abe266b021d..5128f9364dd 100644
--- a/vendor/google.golang.org/grpc/pickfirst.go
+++ b/vendor/google.golang.org/grpc/pickfirst.go
@@ -25,13 +25,18 @@ import (
 
 	"google.golang.org/grpc/balancer"
 	"google.golang.org/grpc/connectivity"
-	"google.golang.org/grpc/internal/envconfig"
+	internalgrpclog "google.golang.org/grpc/internal/grpclog"
 	"google.golang.org/grpc/internal/grpcrand"
+	"google.golang.org/grpc/internal/pretty"
+	"google.golang.org/grpc/resolver"
 	"google.golang.org/grpc/serviceconfig"
 )
 
-// PickFirstBalancerName is the name of the pick_first balancer.
-const PickFirstBalancerName = "pick_first"
+const (
+	// PickFirstBalancerName is the name of the pick_first balancer.
+	PickFirstBalancerName = "pick_first"
+	logPrefix             = "[pick-first-lb %p] "
+)
 
 func newPickfirstBuilder() balancer.Builder {
 	return &pickfirstBuilder{}
@@ -40,7 +45,9 @@ func newPickfirstBuilder() balancer.Builder {
 type pickfirstBuilder struct{}
 
 func (*pickfirstBuilder) Build(cc balancer.ClientConn, opt balancer.BuildOptions) balancer.Balancer {
-	return &pickfirstBalancer{cc: cc}
+	b := &pickfirstBalancer{cc: cc}
+	b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(logPrefix, b))
+	return b
 }
 
 func (*pickfirstBuilder) Name() string {
@@ -57,23 +64,23 @@ type pfConfig struct {
 }
 
 func (*pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
-	cfg := &pfConfig{}
-	if err := json.Unmarshal(js, cfg); err != nil {
+	var cfg pfConfig
+	if err := json.Unmarshal(js, &cfg); err != nil {
 		return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err)
 	}
 	return cfg, nil
 }
 
 type pickfirstBalancer struct {
+	logger  *internalgrpclog.PrefixLogger
 	state   connectivity.State
 	cc      balancer.ClientConn
 	subConn balancer.SubConn
-	cfg     *pfConfig
 }
 
 func (b *pickfirstBalancer) ResolverError(err error) {
-	if logger.V(2) {
-		logger.Infof("pickfirstBalancer: ResolverError called with error: %v", err)
+	if b.logger.V(2) {
+		b.logger.Infof("Received error from the name resolver: %v", err)
 	}
 	if b.subConn == nil {
 		b.state = connectivity.TransientFailure
@@ -96,35 +103,44 @@ func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState
 		// The resolver reported an empty address list. Treat it like an error by
 		// calling b.ResolverError.
 		if b.subConn != nil {
-			// Remove the old subConn. All addresses were removed, so it is no longer
-			// valid.
-			b.cc.RemoveSubConn(b.subConn)
+			// Shut down the old subConn. All addresses were removed, so it is
+			// no longer valid.
+			b.subConn.Shutdown()
 			b.subConn = nil
 		}
 		b.ResolverError(errors.New("produced zero addresses"))
 		return balancer.ErrBadResolverState
 	}
 
-	if state.BalancerConfig != nil {
-		cfg, ok := state.BalancerConfig.(*pfConfig)
-		if !ok {
-			return fmt.Errorf("pickfirstBalancer: received nil or illegal BalancerConfig (type %T): %v", state.BalancerConfig, state.BalancerConfig)
-		}
-		b.cfg = cfg
+	// We don't have to guard this block with the env var because ParseConfig
+	// already does so.
+	cfg, ok := state.BalancerConfig.(pfConfig)
+	if state.BalancerConfig != nil && !ok {
+		return fmt.Errorf("pickfirst: received illegal BalancerConfig (type %T): %v", state.BalancerConfig, state.BalancerConfig)
 	}
-
-	if envconfig.PickFirstLBConfig && b.cfg != nil && b.cfg.ShuffleAddressList {
+	if cfg.ShuffleAddressList {
+		addrs = append([]resolver.Address{}, addrs...)
 		grpcrand.Shuffle(len(addrs), func(i, j int) { addrs[i], addrs[j] = addrs[j], addrs[i] })
 	}
+
+	if b.logger.V(2) {
+		b.logger.Infof("Received new config %s, resolver state %s", pretty.ToJSON(cfg), pretty.ToJSON(state.ResolverState))
+	}
+
 	if b.subConn != nil {
 		b.cc.UpdateAddresses(b.subConn, addrs)
 		return nil
 	}
 
-	subConn, err := b.cc.NewSubConn(addrs, balancer.NewSubConnOptions{})
+	var subConn balancer.SubConn
+	subConn, err := b.cc.NewSubConn(addrs, balancer.NewSubConnOptions{
+		StateListener: func(state balancer.SubConnState) {
+			b.updateSubConnState(subConn, state)
+		},
+	})
 	if err != nil {
-		if logger.V(2) {
-			logger.Errorf("pickfirstBalancer: failed to NewSubConn: %v", err)
+		if b.logger.V(2) {
+			b.logger.Infof("Failed to create new SubConn: %v", err)
 		}
 		b.state = connectivity.TransientFailure
 		b.cc.UpdateState(balancer.State{
@@ -143,13 +159,19 @@ func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState
 	return nil
 }
 
+// UpdateSubConnState is unused as a StateListener is always registered when
+// creating SubConns.
 func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) {
-	if logger.V(2) {
-		logger.Infof("pickfirstBalancer: UpdateSubConnState: %p, %v", subConn, state)
+	b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", subConn, state)
+}
+
+func (b *pickfirstBalancer) updateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) {
+	if b.logger.V(2) {
+		b.logger.Infof("Received SubConn state update: %p, %+v", subConn, state)
 	}
 	if b.subConn != subConn {
-		if logger.V(2) {
-			logger.Infof("pickfirstBalancer: ignored state change because subConn is not recognized")
+		if b.logger.V(2) {
+			b.logger.Infof("Ignored state change because subConn is not recognized")
 		}
 		return
 	}
diff --git a/vendor/google.golang.org/grpc/preloader.go b/vendor/google.golang.org/grpc/preloader.go
index cd45547854f..73bd6336433 100644
--- a/vendor/google.golang.org/grpc/preloader.go
+++ b/vendor/google.golang.org/grpc/preloader.go
@@ -37,7 +37,7 @@ type PreparedMsg struct {
 }
 
 // Encode marshalls and compresses the message using the codec and compressor for the stream.
-func (p *PreparedMsg) Encode(s Stream, msg interface{}) error {
+func (p *PreparedMsg) Encode(s Stream, msg any) error {
 	ctx := s.Context()
 	rpcInfo, ok := rpcInfoFromContext(ctx)
 	if !ok {
diff --git a/vendor/google.golang.org/grpc/resolver/dns/dns_resolver.go b/vendor/google.golang.org/grpc/resolver/dns/dns_resolver.go
new file mode 100644
index 00000000000..14aa6f20ae0
--- /dev/null
+++ b/vendor/google.golang.org/grpc/resolver/dns/dns_resolver.go
@@ -0,0 +1,36 @@
+/*
+ *
+ * Copyright 2018 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+// Package dns implements a dns resolver to be installed as the default resolver
+// in grpc.
+//
+// Deprecated: this package is imported by grpc and should not need to be
+// imported directly by users.
+package dns
+
+import (
+	"google.golang.org/grpc/internal/resolver/dns"
+	"google.golang.org/grpc/resolver"
+)
+
+// NewBuilder creates a dnsBuilder which is used to factory DNS resolvers.
+//
+// Deprecated: import grpc and use resolver.Get("dns") instead.
+func NewBuilder() resolver.Builder {
+	return dns.NewBuilder()
+}
diff --git a/vendor/google.golang.org/grpc/resolver/map.go b/vendor/google.golang.org/grpc/resolver/map.go
index efcb7f3efd8..ada5b9bb79b 100644
--- a/vendor/google.golang.org/grpc/resolver/map.go
+++ b/vendor/google.golang.org/grpc/resolver/map.go
@@ -20,7 +20,7 @@ package resolver
 
 type addressMapEntry struct {
 	addr  Address
-	value interface{}
+	value any
 }
 
 // AddressMap is a map of addresses to arbitrary values taking into account
@@ -69,7 +69,7 @@ func (l addressMapEntryList) find(addr Address) int {
 }
 
 // Get returns the value for the address in the map, if present.
-func (a *AddressMap) Get(addr Address) (value interface{}, ok bool) {
+func (a *AddressMap) Get(addr Address) (value any, ok bool) {
 	addrKey := toMapKey(&addr)
 	entryList := a.m[addrKey]
 	if entry := entryList.find(addr); entry != -1 {
@@ -79,7 +79,7 @@ func (a *AddressMap) Get(addr Address) (value interface{}, ok bool) {
 }
 
 // Set updates or adds the value to the address in the map.
-func (a *AddressMap) Set(addr Address, value interface{}) {
+func (a *AddressMap) Set(addr Address, value any) {
 	addrKey := toMapKey(&addr)
 	entryList := a.m[addrKey]
 	if entry := entryList.find(addr); entry != -1 {
@@ -127,8 +127,8 @@ func (a *AddressMap) Keys() []Address {
 }
 
 // Values returns a slice of all current map values.
-func (a *AddressMap) Values() []interface{} {
-	ret := make([]interface{}, 0, a.Len())
+func (a *AddressMap) Values() []any {
+	ret := make([]any, 0, a.Len())
 	for _, entryList := range a.m {
 		for _, entry := range entryList {
 			ret = append(ret, entry.value)
@@ -136,3 +136,116 @@ func (a *AddressMap) Values() []interface{} {
 	}
 	return ret
 }
+
+type endpointNode struct {
+	addrs map[string]struct{}
+}
+
+// Equal returns whether the unordered set of addrs are the same between the
+// endpoint nodes.
+func (en *endpointNode) Equal(en2 *endpointNode) bool {
+	if len(en.addrs) != len(en2.addrs) {
+		return false
+	}
+	for addr := range en.addrs {
+		if _, ok := en2.addrs[addr]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+func toEndpointNode(endpoint Endpoint) endpointNode {
+	en := make(map[string]struct{})
+	for _, addr := range endpoint.Addresses {
+		en[addr.Addr] = struct{}{}
+	}
+	return endpointNode{
+		addrs: en,
+	}
+}
+
+// EndpointMap is a map of endpoints to arbitrary values keyed on only the
+// unordered set of address strings within an endpoint. This map is not thread
+// safe, thus it is unsafe to access concurrently. Must be created via
+// NewEndpointMap; do not construct directly.
+type EndpointMap struct {
+	endpoints map[*endpointNode]any
+}
+
+// NewEndpointMap creates a new EndpointMap.
+func NewEndpointMap() *EndpointMap {
+	return &EndpointMap{
+		endpoints: make(map[*endpointNode]any),
+	}
+}
+
+// Get returns the value for the address in the map, if present.
+func (em *EndpointMap) Get(e Endpoint) (value any, ok bool) {
+	en := toEndpointNode(e)
+	if endpoint := em.find(en); endpoint != nil {
+		return em.endpoints[endpoint], true
+	}
+	return nil, false
+}
+
+// Set updates or adds the value to the address in the map.
+func (em *EndpointMap) Set(e Endpoint, value any) {
+	en := toEndpointNode(e)
+	if endpoint := em.find(en); endpoint != nil {
+		em.endpoints[endpoint] = value
+		return
+	}
+	em.endpoints[&en] = value
+}
+
+// Len returns the number of entries in the map.
+func (em *EndpointMap) Len() int {
+	return len(em.endpoints)
+}
+
+// Keys returns a slice of all current map keys, as endpoints specifying the
+// addresses present in the endpoint keys, in which uniqueness is determined by
+// the unordered set of addresses. Thus, endpoint information returned is not
+// the full endpoint data (drops duplicated addresses and attributes) but can be
+// used for EndpointMap accesses.
+func (em *EndpointMap) Keys() []Endpoint {
+	ret := make([]Endpoint, 0, len(em.endpoints))
+	for en := range em.endpoints {
+		var endpoint Endpoint
+		for addr := range en.addrs {
+			endpoint.Addresses = append(endpoint.Addresses, Address{Addr: addr})
+		}
+		ret = append(ret, endpoint)
+	}
+	return ret
+}
+
+// Values returns a slice of all current map values.
+func (em *EndpointMap) Values() []any {
+	ret := make([]any, 0, len(em.endpoints))
+	for _, val := range em.endpoints {
+		ret = append(ret, val)
+	}
+	return ret
+}
+
+// find returns a pointer to the endpoint node in em if the endpoint node is
+// already present. If not found, nil is returned. The comparisons are done on
+// the unordered set of addresses within an endpoint.
+func (em EndpointMap) find(e endpointNode) *endpointNode {
+	for endpoint := range em.endpoints {
+		if e.Equal(endpoint) {
+			return endpoint
+		}
+	}
+	return nil
+}
+
+// Delete removes the specified endpoint from the map.
+func (em *EndpointMap) Delete(e Endpoint) {
+	en := toEndpointNode(e)
+	if entry := em.find(en); entry != nil {
+		delete(em.endpoints, entry)
+	}
+}
diff --git a/vendor/google.golang.org/grpc/resolver/resolver.go b/vendor/google.golang.org/grpc/resolver/resolver.go
index 353c10b69a5..adf89dd9cfe 100644
--- a/vendor/google.golang.org/grpc/resolver/resolver.go
+++ b/vendor/google.golang.org/grpc/resolver/resolver.go
@@ -77,25 +77,6 @@ func GetDefaultScheme() string {
 	return defaultScheme
 }
 
-// AddressType indicates the address type returned by name resolution.
-//
-// Deprecated: use Attributes in Address instead.
-type AddressType uint8
-
-const (
-	// Backend indicates the address is for a backend server.
-	//
-	// Deprecated: use Attributes in Address instead.
-	Backend AddressType = iota
-	// GRPCLB indicates the address is for a grpclb load balancer.
-	//
-	// Deprecated: to select the GRPCLB load balancing policy, use a service
-	// config with a corresponding loadBalancingConfig.  To supply balancer
-	// addresses to the GRPCLB load balancing policy, set State.Attributes
-	// using balancer/grpclb/state.Set.
-	GRPCLB
-)
-
 // Address represents a server the client connects to.
 //
 // # Experimental
@@ -111,9 +92,6 @@ type Address struct {
 	// the address, instead of the hostname from the Dial target string. In most cases,
 	// this should not be set.
 	//
-	// If Type is GRPCLB, ServerName should be the name of the remote load
-	// balancer, not the name of the backend.
-	//
 	// WARNING: ServerName must only be populated with trusted values. It
 	// is insecure to populate it with data from untrusted inputs since untrusted
 	// values could be used to bypass the authority checks performed by TLS.
@@ -126,27 +104,29 @@ type Address struct {
 	// BalancerAttributes contains arbitrary data about this address intended
 	// for consumption by the LB policy.  These attributes do not affect SubConn
 	// creation, connection establishment, handshaking, etc.
-	BalancerAttributes *attributes.Attributes
-
-	// Type is the type of this address.
 	//
-	// Deprecated: use Attributes instead.
-	Type AddressType
+	// Deprecated: when an Address is inside an Endpoint, this field should not
+	// be used, and it will eventually be removed entirely.
+	BalancerAttributes *attributes.Attributes
 
 	// Metadata is the information associated with Addr, which may be used
 	// to make load balancing decision.
 	//
 	// Deprecated: use Attributes instead.
-	Metadata interface{}
+	Metadata any
 }
 
 // Equal returns whether a and o are identical.  Metadata is compared directly,
 // not with any recursive introspection.
+//
+// This method compares all fields of the address. When used to tell apart
+// addresses during subchannel creation or connection establishment, it might be
+// more appropriate for the caller to implement custom equality logic.
 func (a Address) Equal(o Address) bool {
 	return a.Addr == o.Addr && a.ServerName == o.ServerName &&
 		a.Attributes.Equal(o.Attributes) &&
 		a.BalancerAttributes.Equal(o.BalancerAttributes) &&
-		a.Type == o.Type && a.Metadata == o.Metadata
+		a.Metadata == o.Metadata
 }
 
 // String returns JSON formatted string representation of the address.
@@ -190,11 +170,37 @@ type BuildOptions struct {
 	Dialer func(context.Context, string) (net.Conn, error)
 }
 
+// An Endpoint is one network endpoint, or server, which may have multiple
+// addresses with which it can be accessed.
+type Endpoint struct {
+	// Addresses contains a list of addresses used to access this endpoint.
+	Addresses []Address
+
+	// Attributes contains arbitrary data about this endpoint intended for
+	// consumption by the LB policy.
+	Attributes *attributes.Attributes
+}
+
 // State contains the current Resolver state relevant to the ClientConn.
 type State struct {
 	// Addresses is the latest set of resolved addresses for the target.
+	//
+	// If a resolver sets Addresses but does not set Endpoints, one Endpoint
+	// will be created for each Address before the State is passed to the LB
+	// policy.  The BalancerAttributes of each entry in Addresses will be set
+	// in Endpoints.Attributes, and be cleared in the Endpoint's Address's
+	// BalancerAttributes.
+	//
+	// Soon, Addresses will be deprecated and replaced fully by Endpoints.
 	Addresses []Address
 
+	// Endpoints is the latest set of resolved endpoints for the target.
+	//
+	// If a resolver produces a State containing Endpoints but not Addresses,
+	// it must take care to ensure the LB policies it selects will support
+	// Endpoints.
+	Endpoints []Endpoint
+
 	// ServiceConfig contains the result from parsing the latest service
 	// config.  If it is nil, it indicates no service config is present or the
 	// resolver does not provide service configs.
@@ -234,11 +240,6 @@ type ClientConn interface {
 	//
 	// Deprecated: Use UpdateState instead.
 	NewAddress(addresses []Address)
-	// NewServiceConfig is called by resolver to notify ClientConn a new
-	// service config. The service config should be provided as a json string.
-	//
-	// Deprecated: Use UpdateState instead.
-	NewServiceConfig(serviceConfig string)
 	// ParseServiceConfig parses the provided service config and returns an
 	// object that provides the parsed config.
 	ParseServiceConfig(serviceConfigJSON string) *serviceconfig.ParseResult
@@ -254,20 +255,7 @@ type ClientConn interface {
 // target does not contain a scheme or if the parsed scheme is not registered
 // (i.e. no corresponding resolver available to resolve the endpoint), we will
 // apply the default scheme, and will attempt to reparse it.
-//
-// Examples:
-//
-//   - "dns://some_authority/foo.bar"
-//     Target{Scheme: "dns", Authority: "some_authority", Endpoint: "foo.bar"}
-//   - "foo.bar"
-//     Target{Scheme: resolver.GetDefaultScheme(), Endpoint: "foo.bar"}
-//   - "unknown_scheme://authority/endpoint"
-//     Target{Scheme: resolver.GetDefaultScheme(), Endpoint: "unknown_scheme://authority/endpoint"}
 type Target struct {
-	// Deprecated: use URL.Scheme instead.
-	Scheme string
-	// Deprecated: use URL.Host instead.
-	Authority string
 	// URL contains the parsed dial target with an optional default scheme added
 	// to it if the original dial target contained no scheme or contained an
 	// unregistered scheme. Any query params specified in the original dial
@@ -293,6 +281,11 @@ func (t Target) Endpoint() string {
 	return strings.TrimPrefix(endpoint, "/")
 }
 
+// String returns a string representation of Target.
+func (t Target) String() string {
+	return t.URL.String()
+}
+
 // Builder creates a resolver that will be used to watch name resolution updates.
 type Builder interface {
 	// Build creates a new resolver for the given target.
@@ -322,9 +315,12 @@ type Resolver interface {
 	Close()
 }
 
-// UnregisterForTesting removes the resolver builder with the given scheme from the
-// resolver map.
-// This function is for testing only.
-func UnregisterForTesting(scheme string) {
-	delete(m, scheme)
+// AuthorityOverrider is implemented by Builders that wish to override the
+// default authority for the ClientConn.
+// By default, the authority used is target.Endpoint().
+type AuthorityOverrider interface {
+	// OverrideAuthority returns the authority to use for a ClientConn with the
+	// given target. The implementation must generate it without blocking,
+	// typically in line, and must keep it unchanged.
+	OverrideAuthority(Target) string
 }
diff --git a/vendor/google.golang.org/grpc/resolver_conn_wrapper.go b/vendor/google.golang.org/grpc/resolver_conn_wrapper.go
deleted file mode 100644
index b408b3688f2..00000000000
--- a/vendor/google.golang.org/grpc/resolver_conn_wrapper.go
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- *
- * Copyright 2017 gRPC authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-
-package grpc
-
-import (
-	"context"
-	"strings"
-	"sync"
-
-	"google.golang.org/grpc/balancer"
-	"google.golang.org/grpc/internal/channelz"
-	"google.golang.org/grpc/internal/grpcsync"
-	"google.golang.org/grpc/internal/pretty"
-	"google.golang.org/grpc/resolver"
-	"google.golang.org/grpc/serviceconfig"
-)
-
-// resolverStateUpdater wraps the single method used by ccResolverWrapper to
-// report a state update from the actual resolver implementation.
-type resolverStateUpdater interface {
-	updateResolverState(s resolver.State, err error) error
-}
-
-// ccResolverWrapper is a wrapper on top of cc for resolvers.
-// It implements resolver.ClientConn interface.
-type ccResolverWrapper struct {
-	// The following fields are initialized when the wrapper is created and are
-	// read-only afterwards, and therefore can be accessed without a mutex.
-	cc                  resolverStateUpdater
-	channelzID          *channelz.Identifier
-	ignoreServiceConfig bool
-	opts                ccResolverWrapperOpts
-	serializer          *grpcsync.CallbackSerializer // To serialize all incoming calls.
-	serializerCancel    context.CancelFunc           // To close the serializer, accessed only from close().
-
-	// All incoming (resolver --> gRPC) calls are guaranteed to execute in a
-	// mutually exclusive manner as they are scheduled on the serializer.
-	// Fields accessed *only* in these serializer callbacks, can therefore be
-	// accessed without a mutex.
-	curState resolver.State
-
-	// mu guards access to the below fields.
-	mu       sync.Mutex
-	closed   bool
-	resolver resolver.Resolver // Accessed only from outgoing calls.
-}
-
-// ccResolverWrapperOpts wraps the arguments to be passed when creating a new
-// ccResolverWrapper.
-type ccResolverWrapperOpts struct {
-	target     resolver.Target       // User specified dial target to resolve.
-	builder    resolver.Builder      // Resolver builder to use.
-	bOpts      resolver.BuildOptions // Resolver build options to use.
-	channelzID *channelz.Identifier  // Channelz identifier for the channel.
-}
-
-// newCCResolverWrapper uses the resolver.Builder to build a Resolver and
-// returns a ccResolverWrapper object which wraps the newly built resolver.
-func newCCResolverWrapper(cc resolverStateUpdater, opts ccResolverWrapperOpts) (*ccResolverWrapper, error) {
-	ctx, cancel := context.WithCancel(context.Background())
-	ccr := &ccResolverWrapper{
-		cc:                  cc,
-		channelzID:          opts.channelzID,
-		ignoreServiceConfig: opts.bOpts.DisableServiceConfig,
-		opts:                opts,
-		serializer:          grpcsync.NewCallbackSerializer(ctx),
-		serializerCancel:    cancel,
-	}
-
-	// Cannot hold the lock at build time because the resolver can send an
-	// update or error inline and these incoming calls grab the lock to schedule
-	// a callback in the serializer.
-	r, err := opts.builder.Build(opts.target, ccr, opts.bOpts)
-	if err != nil {
-		cancel()
-		return nil, err
-	}
-
-	// Any error reported by the resolver at build time that leads to a
-	// re-resolution request from the balancer is dropped by grpc until we
-	// return from this function. So, we don't have to handle pending resolveNow
-	// requests here.
-	ccr.mu.Lock()
-	ccr.resolver = r
-	ccr.mu.Unlock()
-
-	return ccr, nil
-}
-
-func (ccr *ccResolverWrapper) resolveNow(o resolver.ResolveNowOptions) {
-	ccr.mu.Lock()
-	defer ccr.mu.Unlock()
-
-	// ccr.resolver field is set only after the call to Build() returns. But in
-	// the process of building, the resolver may send an error update which when
-	// propagated to the balancer may result in a re-resolution request.
-	if ccr.closed || ccr.resolver == nil {
-		return
-	}
-	ccr.resolver.ResolveNow(o)
-}
-
-func (ccr *ccResolverWrapper) close() {
-	ccr.mu.Lock()
-	if ccr.closed {
-		ccr.mu.Unlock()
-		return
-	}
-
-	channelz.Info(logger, ccr.channelzID, "Closing the name resolver")
-
-	// Close the serializer to ensure that no more calls from the resolver are
-	// handled, before actually closing the resolver.
-	ccr.serializerCancel()
-	ccr.closed = true
-	r := ccr.resolver
-	ccr.mu.Unlock()
-
-	// Give enqueued callbacks a chance to finish.
-	<-ccr.serializer.Done
-
-	// Spawn a goroutine to close the resolver (since it may block trying to
-	// cleanup all allocated resources) and return early.
-	go r.Close()
-}
-
-// serializerScheduleLocked is a convenience method to schedule a function to be
-// run on the serializer while holding ccr.mu.
-func (ccr *ccResolverWrapper) serializerScheduleLocked(f func(context.Context)) {
-	ccr.mu.Lock()
-	ccr.serializer.Schedule(f)
-	ccr.mu.Unlock()
-}
-
-// UpdateState is called by resolver implementations to report new state to gRPC
-// which includes addresses and service config.
-func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error {
-	errCh := make(chan error, 1)
-	ok := ccr.serializer.Schedule(func(context.Context) {
-		ccr.addChannelzTraceEvent(s)
-		ccr.curState = s
-		if err := ccr.cc.updateResolverState(ccr.curState, nil); err == balancer.ErrBadResolverState {
-			errCh <- balancer.ErrBadResolverState
-			return
-		}
-		errCh <- nil
-	})
-	if !ok {
-		// The only time when Schedule() fail to add the callback to the
-		// serializer is when the serializer is closed, and this happens only
-		// when the resolver wrapper is closed.
-		return nil
-	}
-	return <-errCh
-}
-
-// ReportError is called by resolver implementations to report errors
-// encountered during name resolution to gRPC.
-func (ccr *ccResolverWrapper) ReportError(err error) {
-	ccr.serializerScheduleLocked(func(_ context.Context) {
-		channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: reporting error to cc: %v", err)
-		ccr.cc.updateResolverState(resolver.State{}, err)
-	})
-}
-
-// NewAddress is called by the resolver implementation to send addresses to
-// gRPC.
-func (ccr *ccResolverWrapper) NewAddress(addrs []resolver.Address) {
-	ccr.serializerScheduleLocked(func(_ context.Context) {
-		ccr.addChannelzTraceEvent(resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig})
-		ccr.curState.Addresses = addrs
-		ccr.cc.updateResolverState(ccr.curState, nil)
-	})
-}
-
-// NewServiceConfig is called by the resolver implementation to send service
-// configs to gRPC.
-func (ccr *ccResolverWrapper) NewServiceConfig(sc string) {
-	ccr.serializerScheduleLocked(func(_ context.Context) {
-		channelz.Infof(logger, ccr.channelzID, "ccResolverWrapper: got new service config: %s", sc)
-		if ccr.ignoreServiceConfig {
-			channelz.Info(logger, ccr.channelzID, "Service config lookups disabled; ignoring config")
-			return
-		}
-		scpr := parseServiceConfig(sc)
-		if scpr.Err != nil {
-			channelz.Warningf(logger, ccr.channelzID, "ccResolverWrapper: error parsing service config: %v", scpr.Err)
-			return
-		}
-		ccr.addChannelzTraceEvent(resolver.State{Addresses: ccr.curState.Addresses, ServiceConfig: scpr})
-		ccr.curState.ServiceConfig = scpr
-		ccr.cc.updateResolverState(ccr.curState, nil)
-	})
-}
-
-// ParseServiceConfig is called by resolver implementations to parse a JSON
-// representation of the service config.
-func (ccr *ccResolverWrapper) ParseServiceConfig(scJSON string) *serviceconfig.ParseResult {
-	return parseServiceConfig(scJSON)
-}
-
-// addChannelzTraceEvent adds a channelz trace event containing the new
-// state received from resolver implementations.
-func (ccr *ccResolverWrapper) addChannelzTraceEvent(s resolver.State) {
-	var updates []string
-	var oldSC, newSC *ServiceConfig
-	var oldOK, newOK bool
-	if ccr.curState.ServiceConfig != nil {
-		oldSC, oldOK = ccr.curState.ServiceConfig.Config.(*ServiceConfig)
-	}
-	if s.ServiceConfig != nil {
-		newSC, newOK = s.ServiceConfig.Config.(*ServiceConfig)
-	}
-	if oldOK != newOK || (oldOK && newOK && oldSC.rawJSONString != newSC.rawJSONString) {
-		updates = append(updates, "service config updated")
-	}
-	if len(ccr.curState.Addresses) > 0 && len(s.Addresses) == 0 {
-		updates = append(updates, "resolver returned an empty address list")
-	} else if len(ccr.curState.Addresses) == 0 && len(s.Addresses) > 0 {
-		updates = append(updates, "resolver returned new addresses")
-	}
-	channelz.Infof(logger, ccr.channelzID, "Resolver state updated: %s (%v)", pretty.ToJSON(s), strings.Join(updates, "; "))
-}
diff --git a/vendor/google.golang.org/grpc/resolver_wrapper.go b/vendor/google.golang.org/grpc/resolver_wrapper.go
new file mode 100644
index 00000000000..c79bab12149
--- /dev/null
+++ b/vendor/google.golang.org/grpc/resolver_wrapper.go
@@ -0,0 +1,197 @@
+/*
+ *
+ * Copyright 2017 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package grpc
+
+import (
+	"context"
+	"strings"
+	"sync"
+
+	"google.golang.org/grpc/internal/channelz"
+	"google.golang.org/grpc/internal/grpcsync"
+	"google.golang.org/grpc/internal/pretty"
+	"google.golang.org/grpc/resolver"
+	"google.golang.org/grpc/serviceconfig"
+)
+
+// ccResolverWrapper is a wrapper on top of cc for resolvers.
+// It implements resolver.ClientConn interface.
+type ccResolverWrapper struct {
+	// The following fields are initialized when the wrapper is created and are
+	// read-only afterwards, and therefore can be accessed without a mutex.
+	cc                  *ClientConn
+	ignoreServiceConfig bool
+	serializer          *grpcsync.CallbackSerializer
+	serializerCancel    context.CancelFunc
+
+	resolver resolver.Resolver // only accessed within the serializer
+
+	// The following fields are protected by mu.  Caller must take cc.mu before
+	// taking mu.
+	mu       sync.Mutex
+	curState resolver.State
+	closed   bool
+}
+
+// newCCResolverWrapper initializes the ccResolverWrapper.  It can only be used
+// after calling start, which builds the resolver.
+func newCCResolverWrapper(cc *ClientConn) *ccResolverWrapper {
+	ctx, cancel := context.WithCancel(cc.ctx)
+	return &ccResolverWrapper{
+		cc:                  cc,
+		ignoreServiceConfig: cc.dopts.disableServiceConfig,
+		serializer:          grpcsync.NewCallbackSerializer(ctx),
+		serializerCancel:    cancel,
+	}
+}
+
+// start builds the name resolver using the resolver.Builder in cc and returns
+// any error encountered.  It must always be the first operation performed on
+// any newly created ccResolverWrapper, except that close may be called instead.
+func (ccr *ccResolverWrapper) start() error {
+	errCh := make(chan error)
+	ccr.serializer.Schedule(func(ctx context.Context) {
+		if ctx.Err() != nil {
+			return
+		}
+		opts := resolver.BuildOptions{
+			DisableServiceConfig: ccr.cc.dopts.disableServiceConfig,
+			DialCreds:            ccr.cc.dopts.copts.TransportCredentials,
+			CredsBundle:          ccr.cc.dopts.copts.CredsBundle,
+			Dialer:               ccr.cc.dopts.copts.Dialer,
+		}
+		var err error
+		ccr.resolver, err = ccr.cc.resolverBuilder.Build(ccr.cc.parsedTarget, ccr, opts)
+		errCh <- err
+	})
+	return <-errCh
+}
+
+func (ccr *ccResolverWrapper) resolveNow(o resolver.ResolveNowOptions) {
+	ccr.serializer.Schedule(func(ctx context.Context) {
+		if ctx.Err() != nil || ccr.resolver == nil {
+			return
+		}
+		ccr.resolver.ResolveNow(o)
+	})
+}
+
+// close initiates async shutdown of the wrapper.  To determine the wrapper has
+// finished shutting down, the channel should block on ccr.serializer.Done()
+// without cc.mu held.
+func (ccr *ccResolverWrapper) close() {
+	channelz.Info(logger, ccr.cc.channelzID, "Closing the name resolver")
+	ccr.mu.Lock()
+	ccr.closed = true
+	ccr.mu.Unlock()
+
+	ccr.serializer.Schedule(func(context.Context) {
+		if ccr.resolver == nil {
+			return
+		}
+		ccr.resolver.Close()
+		ccr.resolver = nil
+	})
+	ccr.serializerCancel()
+}
+
+// UpdateState is called by resolver implementations to report new state to gRPC
+// which includes addresses and service config.
+func (ccr *ccResolverWrapper) UpdateState(s resolver.State) error {
+	ccr.cc.mu.Lock()
+	ccr.mu.Lock()
+	if ccr.closed {
+		ccr.mu.Unlock()
+		ccr.cc.mu.Unlock()
+		return nil
+	}
+	if s.Endpoints == nil {
+		s.Endpoints = make([]resolver.Endpoint, 0, len(s.Addresses))
+		for _, a := range s.Addresses {
+			ep := resolver.Endpoint{Addresses: []resolver.Address{a}, Attributes: a.BalancerAttributes}
+			ep.Addresses[0].BalancerAttributes = nil
+			s.Endpoints = append(s.Endpoints, ep)
+		}
+	}
+	ccr.addChannelzTraceEvent(s)
+	ccr.curState = s
+	ccr.mu.Unlock()
+	return ccr.cc.updateResolverStateAndUnlock(s, nil)
+}
+
+// ReportError is called by resolver implementations to report errors
+// encountered during name resolution to gRPC.
+func (ccr *ccResolverWrapper) ReportError(err error) {
+	ccr.cc.mu.Lock()
+	ccr.mu.Lock()
+	if ccr.closed {
+		ccr.mu.Unlock()
+		ccr.cc.mu.Unlock()
+		return
+	}
+	ccr.mu.Unlock()
+	channelz.Warningf(logger, ccr.cc.channelzID, "ccResolverWrapper: reporting error to cc: %v", err)
+	ccr.cc.updateResolverStateAndUnlock(resolver.State{}, err)
+}
+
+// NewAddress is called by the resolver implementation to send addresses to
+// gRPC.
+func (ccr *ccResolverWrapper) NewAddress(addrs []resolver.Address) {
+	ccr.cc.mu.Lock()
+	ccr.mu.Lock()
+	if ccr.closed {
+		ccr.mu.Unlock()
+		ccr.cc.mu.Unlock()
+		return
+	}
+	s := resolver.State{Addresses: addrs, ServiceConfig: ccr.curState.ServiceConfig}
+	ccr.addChannelzTraceEvent(s)
+	ccr.curState = s
+	ccr.mu.Unlock()
+	ccr.cc.updateResolverStateAndUnlock(s, nil)
+}
+
+// ParseServiceConfig is called by resolver implementations to parse a JSON
+// representation of the service config.
+func (ccr *ccResolverWrapper) ParseServiceConfig(scJSON string) *serviceconfig.ParseResult {
+	return parseServiceConfig(scJSON)
+}
+
+// addChannelzTraceEvent adds a channelz trace event containing the new
+// state received from resolver implementations.
+func (ccr *ccResolverWrapper) addChannelzTraceEvent(s resolver.State) {
+	var updates []string
+	var oldSC, newSC *ServiceConfig
+	var oldOK, newOK bool
+	if ccr.curState.ServiceConfig != nil {
+		oldSC, oldOK = ccr.curState.ServiceConfig.Config.(*ServiceConfig)
+	}
+	if s.ServiceConfig != nil {
+		newSC, newOK = s.ServiceConfig.Config.(*ServiceConfig)
+	}
+	if oldOK != newOK || (oldOK && newOK && oldSC.rawJSONString != newSC.rawJSONString) {
+		updates = append(updates, "service config updated")
+	}
+	if len(ccr.curState.Addresses) > 0 && len(s.Addresses) == 0 {
+		updates = append(updates, "resolver returned an empty address list")
+	} else if len(ccr.curState.Addresses) == 0 && len(s.Addresses) > 0 {
+		updates = append(updates, "resolver returned new addresses")
+	}
+	channelz.Infof(logger, ccr.cc.channelzID, "Resolver state updated: %s (%v)", pretty.ToJSON(s), strings.Join(updates, "; "))
+}
diff --git a/vendor/google.golang.org/grpc/rpc_util.go b/vendor/google.golang.org/grpc/rpc_util.go
index 2030736a306..a4b6bc6873c 100644
--- a/vendor/google.golang.org/grpc/rpc_util.go
+++ b/vendor/google.golang.org/grpc/rpc_util.go
@@ -75,7 +75,7 @@ func NewGZIPCompressorWithLevel(level int) (Compressor, error) {
 	}
 	return &gzipCompressor{
 		pool: sync.Pool{
-			New: func() interface{} {
+			New: func() any {
 				w, err := gzip.NewWriterLevel(io.Discard, level)
 				if err != nil {
 					panic(err)
@@ -577,6 +577,9 @@ type parser struct {
 	// The header of a gRPC message. Find more detail at
 	// https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md
 	header [5]byte
+
+	// recvBufferPool is the pool of shared receive buffers.
+	recvBufferPool SharedBufferPool
 }
 
 // recvMsg reads a complete gRPC message from the stream.
@@ -610,9 +613,7 @@ func (p *parser) recvMsg(maxReceiveMessageSize int) (pf payloadFormat, msg []byt
 	if int(length) > maxReceiveMessageSize {
 		return 0, nil, status.Errorf(codes.ResourceExhausted, "grpc: received message larger than max (%d vs. %d)", length, maxReceiveMessageSize)
 	}
-	// TODO(bradfitz,zhaoq): garbage. reuse buffer after proto decoding instead
-	// of making it for each message:
-	msg = make([]byte, int(length))
+	msg = p.recvBufferPool.Get(int(length))
 	if _, err := p.r.Read(msg); err != nil {
 		if err == io.EOF {
 			err = io.ErrUnexpectedEOF
@@ -625,7 +626,7 @@ func (p *parser) recvMsg(maxReceiveMessageSize int) (pf payloadFormat, msg []byt
 // encode serializes msg and returns a buffer containing the message, or an
 // error if it is too large to be transmitted by grpc.  If msg is nil, it
 // generates an empty message.
-func encode(c baseCodec, msg interface{}) ([]byte, error) {
+func encode(c baseCodec, msg any) ([]byte, error) {
 	if msg == nil { // NOTE: typed nils will not be caught by this check
 		return nil, nil
 	}
@@ -639,14 +640,18 @@ func encode(c baseCodec, msg interface{}) ([]byte, error) {
 	return b, nil
 }
 
-// compress returns the input bytes compressed by compressor or cp.  If both
-// compressors are nil, returns nil.
+// compress returns the input bytes compressed by compressor or cp.
+// If both compressors are nil, or if the message has zero length, returns nil,
+// indicating no compression was done.
 //
 // TODO(dfawley): eliminate cp parameter by wrapping Compressor in an encoding.Compressor.
 func compress(in []byte, cp Compressor, compressor encoding.Compressor) ([]byte, error) {
 	if compressor == nil && cp == nil {
 		return nil, nil
 	}
+	if len(in) == 0 {
+		return nil, nil
+	}
 	wrapErr := func(err error) error {
 		return status.Errorf(codes.Internal, "grpc: error while compressing: %v", err.Error())
 	}
@@ -692,7 +697,7 @@ func msgHeader(data, compData []byte) (hdr []byte, payload []byte) {
 	return hdr, data
 }
 
-func outPayload(client bool, msg interface{}, data, payload []byte, t time.Time) *stats.OutPayload {
+func outPayload(client bool, msg any, data, payload []byte, t time.Time) *stats.OutPayload {
 	return &stats.OutPayload{
 		Client:           client,
 		Payload:          msg,
@@ -726,12 +731,12 @@ type payloadInfo struct {
 }
 
 func recvAndDecompress(p *parser, s *transport.Stream, dc Decompressor, maxReceiveMessageSize int, payInfo *payloadInfo, compressor encoding.Compressor) ([]byte, error) {
-	pf, d, err := p.recvMsg(maxReceiveMessageSize)
+	pf, buf, err := p.recvMsg(maxReceiveMessageSize)
 	if err != nil {
 		return nil, err
 	}
 	if payInfo != nil {
-		payInfo.compressedLength = len(d)
+		payInfo.compressedLength = len(buf)
 	}
 
 	if st := checkRecvPayload(pf, s.RecvCompress(), compressor != nil || dc != nil); st != nil {
@@ -743,10 +748,10 @@ func recvAndDecompress(p *parser, s *transport.Stream, dc Decompressor, maxRecei
 		// To match legacy behavior, if the decompressor is set by WithDecompressor or RPCDecompressor,
 		// use this decompressor as the default.
 		if dc != nil {
-			d, err = dc.Do(bytes.NewReader(d))
-			size = len(d)
+			buf, err = dc.Do(bytes.NewReader(buf))
+			size = len(buf)
 		} else {
-			d, size, err = decompress(compressor, d, maxReceiveMessageSize)
+			buf, size, err = decompress(compressor, buf, maxReceiveMessageSize)
 		}
 		if err != nil {
 			return nil, status.Errorf(codes.Internal, "grpc: failed to decompress the received message: %v", err)
@@ -757,7 +762,7 @@ func recvAndDecompress(p *parser, s *transport.Stream, dc Decompressor, maxRecei
 			return nil, status.Errorf(codes.ResourceExhausted, "grpc: received message after decompression larger than max (%d vs. %d)", size, maxReceiveMessageSize)
 		}
 	}
-	return d, nil
+	return buf, nil
 }
 
 // Using compressor, decompress d, returning data and size.
@@ -791,16 +796,18 @@ func decompress(compressor encoding.Compressor, d []byte, maxReceiveMessageSize
 // For the two compressor parameters, both should not be set, but if they are,
 // dc takes precedence over compressor.
 // TODO(dfawley): wrap the old compressor/decompressor using the new API?
-func recv(p *parser, c baseCodec, s *transport.Stream, dc Decompressor, m interface{}, maxReceiveMessageSize int, payInfo *payloadInfo, compressor encoding.Compressor) error {
-	d, err := recvAndDecompress(p, s, dc, maxReceiveMessageSize, payInfo, compressor)
+func recv(p *parser, c baseCodec, s *transport.Stream, dc Decompressor, m any, maxReceiveMessageSize int, payInfo *payloadInfo, compressor encoding.Compressor) error {
+	buf, err := recvAndDecompress(p, s, dc, maxReceiveMessageSize, payInfo, compressor)
 	if err != nil {
 		return err
 	}
-	if err := c.Unmarshal(d, m); err != nil {
+	if err := c.Unmarshal(buf, m); err != nil {
 		return status.Errorf(codes.Internal, "grpc: failed to unmarshal the received message: %v", err)
 	}
 	if payInfo != nil {
-		payInfo.uncompressedBytes = d
+		payInfo.uncompressedBytes = buf
+	} else {
+		p.recvBufferPool.Put(&buf)
 	}
 	return nil
 }
@@ -860,19 +867,22 @@ func ErrorDesc(err error) string {
 // Errorf returns nil if c is OK.
 //
 // Deprecated: use status.Errorf instead.
-func Errorf(c codes.Code, format string, a ...interface{}) error {
+func Errorf(c codes.Code, format string, a ...any) error {
 	return status.Errorf(c, format, a...)
 }
 
+var errContextCanceled = status.Error(codes.Canceled, context.Canceled.Error())
+var errContextDeadline = status.Error(codes.DeadlineExceeded, context.DeadlineExceeded.Error())
+
 // toRPCErr converts an error into an error from the status package.
 func toRPCErr(err error) error {
 	switch err {
 	case nil, io.EOF:
 		return err
 	case context.DeadlineExceeded:
-		return status.Error(codes.DeadlineExceeded, err.Error())
+		return errContextDeadline
 	case context.Canceled:
-		return status.Error(codes.Canceled, err.Error())
+		return errContextCanceled
 	case io.ErrUnexpectedEOF:
 		return status.Error(codes.Internal, err.Error())
 	}
diff --git a/vendor/google.golang.org/grpc/server.go b/vendor/google.golang.org/grpc/server.go
index 8869cc906f2..e89c5ac6136 100644
--- a/vendor/google.golang.org/grpc/server.go
+++ b/vendor/google.golang.org/grpc/server.go
@@ -70,9 +70,10 @@ func init() {
 	internal.GetServerCredentials = func(srv *Server) credentials.TransportCredentials {
 		return srv.opts.creds
 	}
-	internal.DrainServerTransports = func(srv *Server, addr string) {
-		srv.drainServerTransports(addr)
+	internal.IsRegisteredMethod = func(srv *Server, method string) bool {
+		return srv.isRegisteredMethod(method)
 	}
+	internal.ServerFromContext = serverFromContext
 	internal.AddGlobalServerOptions = func(opt ...ServerOption) {
 		globalServerOptions = append(globalServerOptions, opt...)
 	}
@@ -81,12 +82,13 @@ func init() {
 	}
 	internal.BinaryLogger = binaryLogger
 	internal.JoinServerOptions = newJoinServerOption
+	internal.RecvBufferPool = recvBufferPool
 }
 
 var statusOK = status.New(codes.OK, "")
 var logger = grpclog.Component("core")
 
-type methodHandler func(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor UnaryServerInterceptor) (interface{}, error)
+type methodHandler func(srv any, ctx context.Context, dec func(any) error, interceptor UnaryServerInterceptor) (any, error)
 
 // MethodDesc represents an RPC service's method specification.
 type MethodDesc struct {
@@ -99,20 +101,20 @@ type ServiceDesc struct {
 	ServiceName string
 	// The pointer to the service interface. Used to check whether the user
 	// provided implementation satisfies the interface requirements.
-	HandlerType interface{}
+	HandlerType any
 	Methods     []MethodDesc
 	Streams     []StreamDesc
-	Metadata    interface{}
+	Metadata    any
 }
 
 // serviceInfo wraps information about a service. It is very similar to
 // ServiceDesc and is constructed from it for internal purposes.
 type serviceInfo struct {
 	// Contains the implementation for the methods in this service.
-	serviceImpl interface{}
+	serviceImpl any
 	methods     map[string]*MethodDesc
 	streams     map[string]*StreamDesc
-	mdata       interface{}
+	mdata       any
 }
 
 // Server is a gRPC server to serve RPC requests.
@@ -134,12 +136,14 @@ type Server struct {
 	quit               *grpcsync.Event
 	done               *grpcsync.Event
 	channelzRemoveOnce sync.Once
-	serveWG            sync.WaitGroup // counts active Serve goroutines for GracefulStop
+	serveWG            sync.WaitGroup // counts active Serve goroutines for Stop/GracefulStop
+	handlersWG         sync.WaitGroup // counts active method handler goroutines
 
 	channelzID *channelz.Identifier
 	czData     *channelzData
 
-	serverWorkerChannel chan func()
+	serverWorkerChannel      chan func()
+	serverWorkerChannelClose func()
 }
 
 type serverOptions struct {
@@ -164,10 +168,13 @@ type serverOptions struct {
 	initialConnWindowSize int32
 	writeBufferSize       int
 	readBufferSize        int
+	sharedWriteBuffer     bool
 	connectionTimeout     time.Duration
 	maxHeaderListSize     *uint32
 	headerTableSize       *uint32
 	numServerWorkers      uint32
+	recvBufferPool        SharedBufferPool
+	waitForHandlers       bool
 }
 
 var defaultServerOptions = serverOptions{
@@ -177,6 +184,7 @@ var defaultServerOptions = serverOptions{
 	connectionTimeout:     120 * time.Second,
 	writeBufferSize:       defaultWriteBufSize,
 	readBufferSize:        defaultReadBufSize,
+	recvBufferPool:        nopBufferPool{},
 }
 var globalServerOptions []ServerOption
 
@@ -228,6 +236,20 @@ func newJoinServerOption(opts ...ServerOption) ServerOption {
 	return &joinServerOption{opts: opts}
 }
 
+// SharedWriteBuffer allows reusing per-connection transport write buffer.
+// If this option is set to true every connection will release the buffer after
+// flushing the data on the wire.
+//
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a
+// later release.
+func SharedWriteBuffer(val bool) ServerOption {
+	return newFuncServerOption(func(o *serverOptions) {
+		o.sharedWriteBuffer = val
+	})
+}
+
 // WriteBufferSize determines how much data can be batched before doing a write
 // on the wire. The corresponding memory allocation for this buffer will be
 // twice the size to keep syscalls low. The default value for this buffer is
@@ -268,9 +290,9 @@ func InitialConnWindowSize(s int32) ServerOption {
 
 // KeepaliveParams returns a ServerOption that sets keepalive and max-age parameters for the server.
 func KeepaliveParams(kp keepalive.ServerParameters) ServerOption {
-	if kp.Time > 0 && kp.Time < time.Second {
+	if kp.Time > 0 && kp.Time < internal.KeepaliveMinServerPingTime {
 		logger.Warning("Adjusting keepalive ping interval to minimum period of 1s")
-		kp.Time = time.Second
+		kp.Time = internal.KeepaliveMinServerPingTime
 	}
 
 	return newFuncServerOption(func(o *serverOptions) {
@@ -550,6 +572,44 @@ func NumStreamWorkers(numServerWorkers uint32) ServerOption {
 	})
 }
 
+// WaitForHandlers cause Stop to wait until all outstanding method handlers have
+// exited before returning.  If false, Stop will return as soon as all
+// connections have closed, but method handlers may still be running. By
+// default, Stop does not wait for method handlers to return.
+//
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a
+// later release.
+func WaitForHandlers(w bool) ServerOption {
+	return newFuncServerOption(func(o *serverOptions) {
+		o.waitForHandlers = w
+	})
+}
+
+// RecvBufferPool returns a ServerOption that configures the server
+// to use the provided shared buffer pool for parsing incoming messages. Depending
+// on the application's workload, this could result in reduced memory allocation.
+//
+// If you are unsure about how to implement a memory pool but want to utilize one,
+// begin with grpc.NewSharedBufferPool.
+//
+// Note: The shared buffer pool feature will not be active if any of the following
+// options are used: StatsHandler, EnableTracing, or binary logging. In such
+// cases, the shared buffer pool will be ignored.
+//
+// Deprecated: use experimental.WithRecvBufferPool instead.  Will be deleted in
+// v1.60.0 or later.
+func RecvBufferPool(bufferPool SharedBufferPool) ServerOption {
+	return recvBufferPool(bufferPool)
+}
+
+func recvBufferPool(bufferPool SharedBufferPool) ServerOption {
+	return newFuncServerOption(func(o *serverOptions) {
+		o.recvBufferPool = bufferPool
+	})
+}
+
 // serverWorkerResetThreshold defines how often the stack must be reset. Every
 // N requests, by spawning a new goroutine in its place, a worker can reset its
 // stack so that large stacks don't live in memory forever. 2^16 should allow
@@ -578,15 +638,14 @@ func (s *Server) serverWorker() {
 // connections to reduce the time spent overall on runtime.morestack.
 func (s *Server) initServerWorkers() {
 	s.serverWorkerChannel = make(chan func())
+	s.serverWorkerChannelClose = grpcsync.OnceFunc(func() {
+		close(s.serverWorkerChannel)
+	})
 	for i := uint32(0); i < s.opts.numServerWorkers; i++ {
 		go s.serverWorker()
 	}
 }
 
-func (s *Server) stopServerWorkers() {
-	close(s.serverWorkerChannel)
-}
-
 // NewServer creates a gRPC server which has no service registered and has not
 // started to accept requests yet.
 func NewServer(opt ...ServerOption) *Server {
@@ -625,7 +684,7 @@ func NewServer(opt ...ServerOption) *Server {
 
 // printf records an event in s's event log, unless s has been stopped.
 // REQUIRES s.mu is held.
-func (s *Server) printf(format string, a ...interface{}) {
+func (s *Server) printf(format string, a ...any) {
 	if s.events != nil {
 		s.events.Printf(format, a...)
 	}
@@ -633,7 +692,7 @@ func (s *Server) printf(format string, a ...interface{}) {
 
 // errorf records an error in s's event log, unless s has been stopped.
 // REQUIRES s.mu is held.
-func (s *Server) errorf(format string, a ...interface{}) {
+func (s *Server) errorf(format string, a ...any) {
 	if s.events != nil {
 		s.events.Errorf(format, a...)
 	}
@@ -648,14 +707,14 @@ type ServiceRegistrar interface {
 	// once the server has started serving.
 	// desc describes the service and its methods and handlers. impl is the
 	// service implementation which is passed to the method handlers.
-	RegisterService(desc *ServiceDesc, impl interface{})
+	RegisterService(desc *ServiceDesc, impl any)
 }
 
 // RegisterService registers a service and its implementation to the gRPC
 // server. It is called from the IDL generated code. This must be called before
 // invoking Serve. If ss is non-nil (for legacy code), its type is checked to
 // ensure it implements sd.HandlerType.
-func (s *Server) RegisterService(sd *ServiceDesc, ss interface{}) {
+func (s *Server) RegisterService(sd *ServiceDesc, ss any) {
 	if ss != nil {
 		ht := reflect.TypeOf(sd.HandlerType).Elem()
 		st := reflect.TypeOf(ss)
@@ -666,7 +725,7 @@ func (s *Server) RegisterService(sd *ServiceDesc, ss interface{}) {
 	s.register(sd, ss)
 }
 
-func (s *Server) register(sd *ServiceDesc, ss interface{}) {
+func (s *Server) register(sd *ServiceDesc, ss any) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.printf("RegisterService(%q)", sd.ServiceName)
@@ -707,7 +766,7 @@ type MethodInfo struct {
 type ServiceInfo struct {
 	Methods []MethodInfo
 	// Metadata is the metadata specified in ServiceDesc when registering service.
-	Metadata interface{}
+	Metadata any
 }
 
 // GetServiceInfo returns a map from service names to ServiceInfo.
@@ -768,6 +827,18 @@ func (l *listenSocket) Close() error {
 // Serve returns when lis.Accept fails with fatal errors.  lis will be closed when
 // this method returns.
 // Serve will return a non-nil error unless Stop or GracefulStop is called.
+//
+// Note: All supported releases of Go (as of December 2023) override the OS
+// defaults for TCP keepalive time and interval to 15s. To enable TCP keepalive
+// with OS defaults for keepalive time and interval, callers need to do the
+// following two things:
+//   - pass a net.Listener created by calling the Listen method on a
+//     net.ListenConfig with the `KeepAlive` field set to a negative value. This
+//     will result in the Go standard library not overriding OS defaults for TCP
+//     keepalive interval and time. But this will also result in the Go standard
+//     library not enabling TCP keepalives by default.
+//   - override the Accept method on the passed in net.Listener and set the
+//     SO_KEEPALIVE socket option to enable TCP keepalives, with OS defaults.
 func (s *Server) Serve(lis net.Listener) error {
 	s.mu.Lock()
 	s.printf("serving")
@@ -875,24 +946,21 @@ func (s *Server) handleRawConn(lisAddr string, rawConn net.Conn) {
 		return
 	}
 
+	if cc, ok := rawConn.(interface {
+		PassServerTransport(transport.ServerTransport)
+	}); ok {
+		cc.PassServerTransport(st)
+	}
+
 	if !s.addConn(lisAddr, st) {
 		return
 	}
 	go func() {
-		s.serveStreams(st)
+		s.serveStreams(context.Background(), st, rawConn)
 		s.removeConn(lisAddr, st)
 	}()
 }
 
-func (s *Server) drainServerTransports(addr string) {
-	s.mu.Lock()
-	conns := s.conns[addr]
-	for st := range conns {
-		st.Drain("")
-	}
-	s.mu.Unlock()
-}
-
 // newHTTP2Transport sets up a http/2 transport (using the
 // gRPC http2 server transport in transport/http2_server.go).
 func (s *Server) newHTTP2Transport(c net.Conn) transport.ServerTransport {
@@ -908,6 +976,7 @@ func (s *Server) newHTTP2Transport(c net.Conn) transport.ServerTransport {
 		InitialConnWindowSize: s.opts.initialConnWindowSize,
 		WriteBufferSize:       s.opts.writeBufferSize,
 		ReadBufferSize:        s.opts.readBufferSize,
+		SharedWriteBuffer:     s.opts.sharedWriteBuffer,
 		ChannelzParentID:      s.channelzID,
 		MaxHeaderListSize:     s.opts.maxHeaderListSize,
 		HeaderTableSize:       s.opts.headerTableSize,
@@ -932,19 +1001,32 @@ func (s *Server) newHTTP2Transport(c net.Conn) transport.ServerTransport {
 	return st
 }
 
-func (s *Server) serveStreams(st transport.ServerTransport) {
-	defer st.Close(errors.New("finished serving streams for the server transport"))
-	var wg sync.WaitGroup
+func (s *Server) serveStreams(ctx context.Context, st transport.ServerTransport, rawConn net.Conn) {
+	ctx = transport.SetConnection(ctx, rawConn)
+	ctx = peer.NewContext(ctx, st.Peer())
+	for _, sh := range s.opts.statsHandlers {
+		ctx = sh.TagConn(ctx, &stats.ConnTagInfo{
+			RemoteAddr: st.Peer().Addr,
+			LocalAddr:  st.Peer().LocalAddr,
+		})
+		sh.HandleConn(ctx, &stats.ConnBegin{})
+	}
 
-	streamQuota := newHandlerQuota(s.opts.maxConcurrentStreams)
-	st.HandleStreams(func(stream *transport.Stream) {
-		wg.Add(1)
+	defer func() {
+		st.Close(errors.New("finished serving streams for the server transport"))
+		for _, sh := range s.opts.statsHandlers {
+			sh.HandleConn(ctx, &stats.ConnEnd{})
+		}
+	}()
 
+	streamQuota := newHandlerQuota(s.opts.maxConcurrentStreams)
+	st.HandleStreams(ctx, func(stream *transport.Stream) {
+		s.handlersWG.Add(1)
 		streamQuota.acquire()
 		f := func() {
 			defer streamQuota.release()
-			defer wg.Done()
-			s.handleStream(st, stream, s.traceInfo(st, stream))
+			defer s.handlersWG.Done()
+			s.handleStream(st, stream)
 		}
 
 		if s.opts.numServerWorkers > 0 {
@@ -956,14 +1038,7 @@ func (s *Server) serveStreams(st transport.ServerTransport) {
 			}
 		}
 		go f()
-	}, func(ctx context.Context, method string) context.Context {
-		if !EnableTracing {
-			return ctx
-		}
-		tr := trace.New("grpc.Recv."+methodFamily(method), method)
-		return trace.NewContext(ctx, tr)
 	})
-	wg.Wait()
 }
 
 var _ http.Handler = (*Server)(nil)
@@ -1007,31 +1082,7 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	defer s.removeConn(listenerAddressForServeHTTP, st)
-	s.serveStreams(st)
-}
-
-// traceInfo returns a traceInfo and associates it with stream, if tracing is enabled.
-// If tracing is not enabled, it returns nil.
-func (s *Server) traceInfo(st transport.ServerTransport, stream *transport.Stream) (trInfo *traceInfo) {
-	if !EnableTracing {
-		return nil
-	}
-	tr, ok := trace.FromContext(stream.Context())
-	if !ok {
-		return nil
-	}
-
-	trInfo = &traceInfo{
-		tr: tr,
-		firstLine: firstLine{
-			client:     false,
-			remoteAddr: st.RemoteAddr(),
-		},
-	}
-	if dl, ok := stream.Context().Deadline(); ok {
-		trInfo.firstLine.deadline = time.Until(dl)
-	}
-	return trInfo
+	s.serveStreams(r.Context(), st, nil)
 }
 
 func (s *Server) addConn(addr string, st transport.ServerTransport) bool {
@@ -1094,7 +1145,7 @@ func (s *Server) incrCallsFailed() {
 	atomic.AddInt64(&s.czData.callsFailed, 1)
 }
 
-func (s *Server) sendResponse(t transport.ServerTransport, stream *transport.Stream, msg interface{}, cp Compressor, opts *transport.Options, comp encoding.Compressor) error {
+func (s *Server) sendResponse(ctx context.Context, t transport.ServerTransport, stream *transport.Stream, msg any, cp Compressor, opts *transport.Options, comp encoding.Compressor) error {
 	data, err := encode(s.getCodec(stream.ContentSubtype()), msg)
 	if err != nil {
 		channelz.Error(logger, s.channelzID, "grpc: server failed to encode response: ", err)
@@ -1113,7 +1164,7 @@ func (s *Server) sendResponse(t transport.ServerTransport, stream *transport.Str
 	err = t.Write(stream, hdr, payload, opts)
 	if err == nil {
 		for _, sh := range s.opts.statsHandlers {
-			sh.HandleRPC(stream.Context(), outPayload(false, msg, data, payload, time.Now()))
+			sh.HandleRPC(ctx, outPayload(false, msg, data, payload, time.Now()))
 		}
 	}
 	return err
@@ -1141,7 +1192,7 @@ func chainUnaryServerInterceptors(s *Server) {
 }
 
 func chainUnaryInterceptors(interceptors []UnaryServerInterceptor) UnaryServerInterceptor {
-	return func(ctx context.Context, req interface{}, info *UnaryServerInfo, handler UnaryHandler) (interface{}, error) {
+	return func(ctx context.Context, req any, info *UnaryServerInfo, handler UnaryHandler) (any, error) {
 		return interceptors[0](ctx, req, info, getChainUnaryHandler(interceptors, 0, info, handler))
 	}
 }
@@ -1150,12 +1201,12 @@ func getChainUnaryHandler(interceptors []UnaryServerInterceptor, curr int, info
 	if curr == len(interceptors)-1 {
 		return finalHandler
 	}
-	return func(ctx context.Context, req interface{}) (interface{}, error) {
+	return func(ctx context.Context, req any) (any, error) {
 		return interceptors[curr+1](ctx, req, info, getChainUnaryHandler(interceptors, curr+1, info, finalHandler))
 	}
 }
 
-func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.Stream, info *serviceInfo, md *MethodDesc, trInfo *traceInfo) (err error) {
+func (s *Server) processUnaryRPC(ctx context.Context, t transport.ServerTransport, stream *transport.Stream, info *serviceInfo, md *MethodDesc, trInfo *traceInfo) (err error) {
 	shs := s.opts.statsHandlers
 	if len(shs) != 0 || trInfo != nil || channelz.IsOn() {
 		if channelz.IsOn() {
@@ -1169,7 +1220,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 				IsClientStream: false,
 				IsServerStream: false,
 			}
-			sh.HandleRPC(stream.Context(), statsBegin)
+			sh.HandleRPC(ctx, statsBegin)
 		}
 		if trInfo != nil {
 			trInfo.tr.LazyLog(&trInfo.firstLine, false)
@@ -1187,7 +1238,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 		defer func() {
 			if trInfo != nil {
 				if err != nil && err != io.EOF {
-					trInfo.tr.LazyLog(&fmtStringer{"%v", []interface{}{err}}, true)
+					trInfo.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true)
 					trInfo.tr.SetError()
 				}
 				trInfo.tr.Finish()
@@ -1201,7 +1252,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 				if err != nil && err != io.EOF {
 					end.Error = toRPCErr(err)
 				}
-				sh.HandleRPC(stream.Context(), end)
+				sh.HandleRPC(ctx, end)
 			}
 
 			if channelz.IsOn() {
@@ -1223,7 +1274,6 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 		}
 	}
 	if len(binlogs) != 0 {
-		ctx := stream.Context()
 		md, _ := metadata.FromIncomingContext(ctx)
 		logEntry := &binarylog.ClientHeader{
 			Header:     md,
@@ -1294,7 +1344,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 	if len(shs) != 0 || len(binlogs) != 0 {
 		payInfo = &payloadInfo{}
 	}
-	d, err := recvAndDecompress(&parser{r: stream}, stream, dc, s.opts.maxReceiveMessageSize, payInfo, decomp)
+	d, err := recvAndDecompress(&parser{r: stream, recvBufferPool: s.opts.recvBufferPool}, stream, dc, s.opts.maxReceiveMessageSize, payInfo, decomp)
 	if err != nil {
 		if e := t.WriteStatus(stream, status.Convert(err)); e != nil {
 			channelz.Warningf(logger, s.channelzID, "grpc: Server.processUnaryRPC failed to write status: %v", e)
@@ -1304,12 +1354,12 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 	if channelz.IsOn() {
 		t.IncrMsgRecv()
 	}
-	df := func(v interface{}) error {
+	df := func(v any) error {
 		if err := s.getCodec(stream.ContentSubtype()).Unmarshal(d, v); err != nil {
 			return status.Errorf(codes.Internal, "grpc: error unmarshalling request: %v", err)
 		}
 		for _, sh := range shs {
-			sh.HandleRPC(stream.Context(), &stats.InPayload{
+			sh.HandleRPC(ctx, &stats.InPayload{
 				RecvTime:         time.Now(),
 				Payload:          v,
 				Length:           len(d),
@@ -1323,7 +1373,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 				Message: d,
 			}
 			for _, binlog := range binlogs {
-				binlog.Log(stream.Context(), cm)
+				binlog.Log(ctx, cm)
 			}
 		}
 		if trInfo != nil {
@@ -1331,7 +1381,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 		}
 		return nil
 	}
-	ctx := NewContextWithServerTransportStream(stream.Context(), stream)
+	ctx = NewContextWithServerTransportStream(ctx, stream)
 	reply, appErr := md.Handler(info.serviceImpl, ctx, df, s.opts.unaryInt)
 	if appErr != nil {
 		appStatus, ok := status.FromError(appErr)
@@ -1356,7 +1406,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 					Header: h,
 				}
 				for _, binlog := range binlogs {
-					binlog.Log(stream.Context(), sh)
+					binlog.Log(ctx, sh)
 				}
 			}
 			st := &binarylog.ServerTrailer{
@@ -1364,7 +1414,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 				Err:     appErr,
 			}
 			for _, binlog := range binlogs {
-				binlog.Log(stream.Context(), st)
+				binlog.Log(ctx, st)
 			}
 		}
 		return appErr
@@ -1379,7 +1429,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 	if stream.SendCompress() != sendCompressorName {
 		comp = encoding.GetCompressor(stream.SendCompress())
 	}
-	if err := s.sendResponse(t, stream, reply, cp, opts, comp); err != nil {
+	if err := s.sendResponse(ctx, t, stream, reply, cp, opts, comp); err != nil {
 		if err == io.EOF {
 			// The entire stream is done (for unary RPC only).
 			return err
@@ -1406,8 +1456,8 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 				Err:     appErr,
 			}
 			for _, binlog := range binlogs {
-				binlog.Log(stream.Context(), sh)
-				binlog.Log(stream.Context(), st)
+				binlog.Log(ctx, sh)
+				binlog.Log(ctx, st)
 			}
 		}
 		return err
@@ -1421,8 +1471,8 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 			Message: reply,
 		}
 		for _, binlog := range binlogs {
-			binlog.Log(stream.Context(), sh)
-			binlog.Log(stream.Context(), sm)
+			binlog.Log(ctx, sh)
+			binlog.Log(ctx, sm)
 		}
 	}
 	if channelz.IsOn() {
@@ -1440,7 +1490,7 @@ func (s *Server) processUnaryRPC(t transport.ServerTransport, stream *transport.
 			Err:     appErr,
 		}
 		for _, binlog := range binlogs {
-			binlog.Log(stream.Context(), st)
+			binlog.Log(ctx, st)
 		}
 	}
 	return t.WriteStatus(stream, statusOK)
@@ -1468,7 +1518,7 @@ func chainStreamServerInterceptors(s *Server) {
 }
 
 func chainStreamInterceptors(interceptors []StreamServerInterceptor) StreamServerInterceptor {
-	return func(srv interface{}, ss ServerStream, info *StreamServerInfo, handler StreamHandler) error {
+	return func(srv any, ss ServerStream, info *StreamServerInfo, handler StreamHandler) error {
 		return interceptors[0](srv, ss, info, getChainStreamHandler(interceptors, 0, info, handler))
 	}
 }
@@ -1477,12 +1527,12 @@ func getChainStreamHandler(interceptors []StreamServerInterceptor, curr int, inf
 	if curr == len(interceptors)-1 {
 		return finalHandler
 	}
-	return func(srv interface{}, stream ServerStream) error {
+	return func(srv any, stream ServerStream) error {
 		return interceptors[curr+1](srv, stream, info, getChainStreamHandler(interceptors, curr+1, info, finalHandler))
 	}
 }
 
-func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transport.Stream, info *serviceInfo, sd *StreamDesc, trInfo *traceInfo) (err error) {
+func (s *Server) processStreamingRPC(ctx context.Context, t transport.ServerTransport, stream *transport.Stream, info *serviceInfo, sd *StreamDesc, trInfo *traceInfo) (err error) {
 	if channelz.IsOn() {
 		s.incrCallsStarted()
 	}
@@ -1496,15 +1546,15 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 			IsServerStream: sd.ServerStreams,
 		}
 		for _, sh := range shs {
-			sh.HandleRPC(stream.Context(), statsBegin)
+			sh.HandleRPC(ctx, statsBegin)
 		}
 	}
-	ctx := NewContextWithServerTransportStream(stream.Context(), stream)
+	ctx = NewContextWithServerTransportStream(ctx, stream)
 	ss := &serverStream{
 		ctx:                   ctx,
 		t:                     t,
 		s:                     stream,
-		p:                     &parser{r: stream},
+		p:                     &parser{r: stream, recvBufferPool: s.opts.recvBufferPool},
 		codec:                 s.getCodec(stream.ContentSubtype()),
 		maxReceiveMessageSize: s.opts.maxReceiveMessageSize,
 		maxSendMessageSize:    s.opts.maxSendMessageSize,
@@ -1518,7 +1568,7 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 			if trInfo != nil {
 				ss.mu.Lock()
 				if err != nil && err != io.EOF {
-					ss.trInfo.tr.LazyLog(&fmtStringer{"%v", []interface{}{err}}, true)
+					ss.trInfo.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true)
 					ss.trInfo.tr.SetError()
 				}
 				ss.trInfo.tr.Finish()
@@ -1535,7 +1585,7 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 					end.Error = toRPCErr(err)
 				}
 				for _, sh := range shs {
-					sh.HandleRPC(stream.Context(), end)
+					sh.HandleRPC(ctx, end)
 				}
 			}
 
@@ -1577,7 +1627,7 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 			logEntry.PeerAddr = peer.Addr
 		}
 		for _, binlog := range ss.binlogs {
-			binlog.Log(stream.Context(), logEntry)
+			binlog.Log(ctx, logEntry)
 		}
 	}
 
@@ -1621,7 +1671,7 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 		trInfo.tr.LazyLog(&trInfo.firstLine, false)
 	}
 	var appErr error
-	var server interface{}
+	var server any
 	if info != nil {
 		server = info.serviceImpl
 	}
@@ -1655,7 +1705,7 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 				Err:     appErr,
 			}
 			for _, binlog := range ss.binlogs {
-				binlog.Log(stream.Context(), st)
+				binlog.Log(ctx, st)
 			}
 		}
 		t.WriteStatus(ss.s, appStatus)
@@ -1673,53 +1723,87 @@ func (s *Server) processStreamingRPC(t transport.ServerTransport, stream *transp
 			Err:     appErr,
 		}
 		for _, binlog := range ss.binlogs {
-			binlog.Log(stream.Context(), st)
+			binlog.Log(ctx, st)
 		}
 	}
 	return t.WriteStatus(ss.s, statusOK)
 }
 
-func (s *Server) handleStream(t transport.ServerTransport, stream *transport.Stream, trInfo *traceInfo) {
+func (s *Server) handleStream(t transport.ServerTransport, stream *transport.Stream) {
+	ctx := stream.Context()
+	ctx = contextWithServer(ctx, s)
+	var ti *traceInfo
+	if EnableTracing {
+		tr := trace.New("grpc.Recv."+methodFamily(stream.Method()), stream.Method())
+		ctx = trace.NewContext(ctx, tr)
+		ti = &traceInfo{
+			tr: tr,
+			firstLine: firstLine{
+				client:     false,
+				remoteAddr: t.Peer().Addr,
+			},
+		}
+		if dl, ok := ctx.Deadline(); ok {
+			ti.firstLine.deadline = time.Until(dl)
+		}
+	}
+
 	sm := stream.Method()
 	if sm != "" && sm[0] == '/' {
 		sm = sm[1:]
 	}
 	pos := strings.LastIndex(sm, "/")
 	if pos == -1 {
-		if trInfo != nil {
-			trInfo.tr.LazyLog(&fmtStringer{"Malformed method name %q", []interface{}{sm}}, true)
-			trInfo.tr.SetError()
+		if ti != nil {
+			ti.tr.LazyLog(&fmtStringer{"Malformed method name %q", []any{sm}}, true)
+			ti.tr.SetError()
 		}
 		errDesc := fmt.Sprintf("malformed method name: %q", stream.Method())
 		if err := t.WriteStatus(stream, status.New(codes.Unimplemented, errDesc)); err != nil {
-			if trInfo != nil {
-				trInfo.tr.LazyLog(&fmtStringer{"%v", []interface{}{err}}, true)
-				trInfo.tr.SetError()
+			if ti != nil {
+				ti.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true)
+				ti.tr.SetError()
 			}
 			channelz.Warningf(logger, s.channelzID, "grpc: Server.handleStream failed to write status: %v", err)
 		}
-		if trInfo != nil {
-			trInfo.tr.Finish()
+		if ti != nil {
+			ti.tr.Finish()
 		}
 		return
 	}
 	service := sm[:pos]
 	method := sm[pos+1:]
 
+	md, _ := metadata.FromIncomingContext(ctx)
+	for _, sh := range s.opts.statsHandlers {
+		ctx = sh.TagRPC(ctx, &stats.RPCTagInfo{FullMethodName: stream.Method()})
+		sh.HandleRPC(ctx, &stats.InHeader{
+			FullMethod:  stream.Method(),
+			RemoteAddr:  t.Peer().Addr,
+			LocalAddr:   t.Peer().LocalAddr,
+			Compression: stream.RecvCompress(),
+			WireLength:  stream.HeaderWireLength(),
+			Header:      md,
+		})
+	}
+	// To have calls in stream callouts work. Will delete once all stats handler
+	// calls come from the gRPC layer.
+	stream.SetContext(ctx)
+
 	srv, knownService := s.services[service]
 	if knownService {
 		if md, ok := srv.methods[method]; ok {
-			s.processUnaryRPC(t, stream, srv, md, trInfo)
+			s.processUnaryRPC(ctx, t, stream, srv, md, ti)
 			return
 		}
 		if sd, ok := srv.streams[method]; ok {
-			s.processStreamingRPC(t, stream, srv, sd, trInfo)
+			s.processStreamingRPC(ctx, t, stream, srv, sd, ti)
 			return
 		}
 	}
 	// Unknown service, or known server unknown method.
 	if unknownDesc := s.opts.unknownStreamDesc; unknownDesc != nil {
-		s.processStreamingRPC(t, stream, nil, unknownDesc, trInfo)
+		s.processStreamingRPC(ctx, t, stream, nil, unknownDesc, ti)
 		return
 	}
 	var errDesc string
@@ -1728,19 +1812,19 @@ func (s *Server) handleStream(t transport.ServerTransport, stream *transport.Str
 	} else {
 		errDesc = fmt.Sprintf("unknown method %v for service %v", method, service)
 	}
-	if trInfo != nil {
-		trInfo.tr.LazyPrintf("%s", errDesc)
-		trInfo.tr.SetError()
+	if ti != nil {
+		ti.tr.LazyPrintf("%s", errDesc)
+		ti.tr.SetError()
 	}
 	if err := t.WriteStatus(stream, status.New(codes.Unimplemented, errDesc)); err != nil {
-		if trInfo != nil {
-			trInfo.tr.LazyLog(&fmtStringer{"%v", []interface{}{err}}, true)
-			trInfo.tr.SetError()
+		if ti != nil {
+			ti.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true)
+			ti.tr.SetError()
 		}
 		channelz.Warningf(logger, s.channelzID, "grpc: Server.handleStream failed to write status: %v", err)
 	}
-	if trInfo != nil {
-		trInfo.tr.Finish()
+	if ti != nil {
+		ti.tr.Finish()
 	}
 }
 
@@ -1795,62 +1879,72 @@ func ServerTransportStreamFromContext(ctx context.Context) ServerTransportStream
 // pending RPCs on the client side will get notified by connection
 // errors.
 func (s *Server) Stop() {
-	s.quit.Fire()
+	s.stop(false)
+}
 
-	defer func() {
-		s.serveWG.Wait()
-		s.done.Fire()
-	}()
+// GracefulStop stops the gRPC server gracefully. It stops the server from
+// accepting new connections and RPCs and blocks until all the pending RPCs are
+// finished.
+func (s *Server) GracefulStop() {
+	s.stop(true)
+}
+
+func (s *Server) stop(graceful bool) {
+	s.quit.Fire()
+	defer s.done.Fire()
 
 	s.channelzRemoveOnce.Do(func() { channelz.RemoveEntry(s.channelzID) })
 
 	s.mu.Lock()
-	listeners := s.lis
-	s.lis = nil
-	conns := s.conns
-	s.conns = nil
-	// interrupt GracefulStop if Stop and GracefulStop are called concurrently.
-	s.cv.Broadcast()
+	s.closeListenersLocked()
+	// Wait for serving threads to be ready to exit.  Only then can we be sure no
+	// new conns will be created.
 	s.mu.Unlock()
+	s.serveWG.Wait()
 
-	for lis := range listeners {
-		lis.Close()
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if graceful {
+		s.drainAllServerTransportsLocked()
+	} else {
+		s.closeServerTransportsLocked()
 	}
-	for _, cs := range conns {
-		for st := range cs {
-			st.Close(errors.New("Server.Stop called"))
-		}
+
+	for len(s.conns) != 0 {
+		s.cv.Wait()
 	}
+	s.conns = nil
+
 	if s.opts.numServerWorkers > 0 {
-		s.stopServerWorkers()
+		// Closing the channel (only once, via grpcsync.OnceFunc) after all the
+		// connections have been closed above ensures that there are no
+		// goroutines executing the callback passed to st.HandleStreams (where
+		// the channel is written to).
+		s.serverWorkerChannelClose()
+	}
+
+	if graceful || s.opts.waitForHandlers {
+		s.handlersWG.Wait()
 	}
 
-	s.mu.Lock()
 	if s.events != nil {
 		s.events.Finish()
 		s.events = nil
 	}
-	s.mu.Unlock()
 }
 
-// GracefulStop stops the gRPC server gracefully. It stops the server from
-// accepting new connections and RPCs and blocks until all the pending RPCs are
-// finished.
-func (s *Server) GracefulStop() {
-	s.quit.Fire()
-	defer s.done.Fire()
-
-	s.channelzRemoveOnce.Do(func() { channelz.RemoveEntry(s.channelzID) })
-	s.mu.Lock()
-	if s.conns == nil {
-		s.mu.Unlock()
-		return
+// s.mu must be held by the caller.
+func (s *Server) closeServerTransportsLocked() {
+	for _, conns := range s.conns {
+		for st := range conns {
+			st.Close(errors.New("Server.Stop called"))
+		}
 	}
+}
 
-	for lis := range s.lis {
-		lis.Close()
-	}
-	s.lis = nil
+// s.mu must be held by the caller.
+func (s *Server) drainAllServerTransportsLocked() {
 	if !s.drain {
 		for _, conns := range s.conns {
 			for st := range conns {
@@ -1859,22 +1953,14 @@ func (s *Server) GracefulStop() {
 		}
 		s.drain = true
 	}
+}
 
-	// Wait for serving threads to be ready to exit.  Only then can we be sure no
-	// new conns will be created.
-	s.mu.Unlock()
-	s.serveWG.Wait()
-	s.mu.Lock()
-
-	for len(s.conns) != 0 {
-		s.cv.Wait()
-	}
-	s.conns = nil
-	if s.events != nil {
-		s.events.Finish()
-		s.events = nil
+// s.mu must be held by the caller.
+func (s *Server) closeListenersLocked() {
+	for lis := range s.lis {
+		lis.Close()
 	}
-	s.mu.Unlock()
+	s.lis = nil
 }
 
 // contentSubtype must be lowercase
@@ -1888,11 +1974,50 @@ func (s *Server) getCodec(contentSubtype string) baseCodec {
 	}
 	codec := encoding.GetCodec(contentSubtype)
 	if codec == nil {
+		logger.Warningf("Unsupported codec %q. Defaulting to %q for now. This will start to fail in future releases.", contentSubtype, proto.Name)
 		return encoding.GetCodec(proto.Name)
 	}
 	return codec
 }
 
+type serverKey struct{}
+
+// serverFromContext gets the Server from the context.
+func serverFromContext(ctx context.Context) *Server {
+	s, _ := ctx.Value(serverKey{}).(*Server)
+	return s
+}
+
+// contextWithServer sets the Server in the context.
+func contextWithServer(ctx context.Context, server *Server) context.Context {
+	return context.WithValue(ctx, serverKey{}, server)
+}
+
+// isRegisteredMethod returns whether the passed in method is registered as a
+// method on the server. /service/method and service/method will match if the
+// service and method are registered on the server.
+func (s *Server) isRegisteredMethod(serviceMethod string) bool {
+	if serviceMethod != "" && serviceMethod[0] == '/' {
+		serviceMethod = serviceMethod[1:]
+	}
+	pos := strings.LastIndex(serviceMethod, "/")
+	if pos == -1 { // Invalid method name syntax.
+		return false
+	}
+	service := serviceMethod[:pos]
+	method := serviceMethod[pos+1:]
+	srv, knownService := s.services[service]
+	if knownService {
+		if _, ok := srv.methods[method]; ok {
+			return true
+		}
+		if _, ok := srv.streams[method]; ok {
+			return true
+		}
+	}
+	return false
+}
+
 // SetHeader sets the header metadata to be sent from the server to the client.
 // The context provided must be the context passed to the server's handler.
 //
@@ -2054,12 +2179,12 @@ func validateSendCompressor(name, clientCompressors string) error {
 // atomicSemaphore implements a blocking, counting semaphore. acquire should be
 // called synchronously; release may be called asynchronously.
 type atomicSemaphore struct {
-	n    int64
+	n    atomic.Int64
 	wait chan struct{}
 }
 
 func (q *atomicSemaphore) acquire() {
-	if atomic.AddInt64(&q.n, -1) < 0 {
+	if q.n.Add(-1) < 0 {
 		// We ran out of quota.  Block until a release happens.
 		<-q.wait
 	}
@@ -2070,12 +2195,14 @@ func (q *atomicSemaphore) release() {
 	// concurrent calls to acquire, but also note that with synchronous calls to
 	// acquire, as our system does, n will never be less than -1.  There are
 	// fairness issues (queuing) to consider if this was to be generalized.
-	if atomic.AddInt64(&q.n, 1) <= 0 {
+	if q.n.Add(1) <= 0 {
 		// An acquire was waiting on us.  Unblock it.
 		q.wait <- struct{}{}
 	}
 }
 
 func newHandlerQuota(n uint32) *atomicSemaphore {
-	return &atomicSemaphore{n: int64(n), wait: make(chan struct{}, 1)}
+	a := &atomicSemaphore{wait: make(chan struct{}, 1)}
+	a.n.Store(int64(n))
+	return a
 }
diff --git a/vendor/google.golang.org/grpc/shared_buffer_pool.go b/vendor/google.golang.org/grpc/shared_buffer_pool.go
new file mode 100644
index 00000000000..48a64cfe8e2
--- /dev/null
+++ b/vendor/google.golang.org/grpc/shared_buffer_pool.go
@@ -0,0 +1,154 @@
+/*
+ *
+ * Copyright 2023 gRPC authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package grpc
+
+import "sync"
+
+// SharedBufferPool is a pool of buffers that can be shared, resulting in
+// decreased memory allocation. Currently, in gRPC-go, it is only utilized
+// for parsing incoming messages.
+//
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a
+// later release.
+type SharedBufferPool interface {
+	// Get returns a buffer with specified length from the pool.
+	//
+	// The returned byte slice may be not zero initialized.
+	Get(length int) []byte
+
+	// Put returns a buffer to the pool.
+	Put(*[]byte)
+}
+
+// NewSharedBufferPool creates a simple SharedBufferPool with buckets
+// of different sizes to optimize memory usage. This prevents the pool from
+// wasting large amounts of memory, even when handling messages of varying sizes.
+//
+// # Experimental
+//
+// Notice: This API is EXPERIMENTAL and may be changed or removed in a
+// later release.
+func NewSharedBufferPool() SharedBufferPool {
+	return &simpleSharedBufferPool{
+		pools: [poolArraySize]simpleSharedBufferChildPool{
+			newBytesPool(level0PoolMaxSize),
+			newBytesPool(level1PoolMaxSize),
+			newBytesPool(level2PoolMaxSize),
+			newBytesPool(level3PoolMaxSize),
+			newBytesPool(level4PoolMaxSize),
+			newBytesPool(0),
+		},
+	}
+}
+
+// simpleSharedBufferPool is a simple implementation of SharedBufferPool.
+type simpleSharedBufferPool struct {
+	pools [poolArraySize]simpleSharedBufferChildPool
+}
+
+func (p *simpleSharedBufferPool) Get(size int) []byte {
+	return p.pools[p.poolIdx(size)].Get(size)
+}
+
+func (p *simpleSharedBufferPool) Put(bs *[]byte) {
+	p.pools[p.poolIdx(cap(*bs))].Put(bs)
+}
+
+func (p *simpleSharedBufferPool) poolIdx(size int) int {
+	switch {
+	case size <= level0PoolMaxSize:
+		return level0PoolIdx
+	case size <= level1PoolMaxSize:
+		return level1PoolIdx
+	case size <= level2PoolMaxSize:
+		return level2PoolIdx
+	case size <= level3PoolMaxSize:
+		return level3PoolIdx
+	case size <= level4PoolMaxSize:
+		return level4PoolIdx
+	default:
+		return levelMaxPoolIdx
+	}
+}
+
+const (
+	level0PoolMaxSize = 16                     //  16  B
+	level1PoolMaxSize = level0PoolMaxSize * 16 // 256  B
+	level2PoolMaxSize = level1PoolMaxSize * 16 //   4 KB
+	level3PoolMaxSize = level2PoolMaxSize * 16 //  64 KB
+	level4PoolMaxSize = level3PoolMaxSize * 16 //   1 MB
+)
+
+const (
+	level0PoolIdx = iota
+	level1PoolIdx
+	level2PoolIdx
+	level3PoolIdx
+	level4PoolIdx
+	levelMaxPoolIdx
+	poolArraySize
+)
+
+type simpleSharedBufferChildPool interface {
+	Get(size int) []byte
+	Put(any)
+}
+
+type bufferPool struct {
+	sync.Pool
+
+	defaultSize int
+}
+
+func (p *bufferPool) Get(size int) []byte {
+	bs := p.Pool.Get().(*[]byte)
+
+	if cap(*bs) < size {
+		p.Pool.Put(bs)
+
+		return make([]byte, size)
+	}
+
+	return (*bs)[:size]
+}
+
+func newBytesPool(size int) simpleSharedBufferChildPool {
+	return &bufferPool{
+		Pool: sync.Pool{
+			New: func() any {
+				bs := make([]byte, size)
+				return &bs
+			},
+		},
+		defaultSize: size,
+	}
+}
+
+// nopBufferPool is a buffer pool just makes new buffer without pooling.
+type nopBufferPool struct {
+}
+
+func (nopBufferPool) Get(length int) []byte {
+	return make([]byte, length)
+}
+
+func (nopBufferPool) Put(*[]byte) {
+}
diff --git a/vendor/google.golang.org/grpc/stats/stats.go b/vendor/google.golang.org/grpc/stats/stats.go
index 7a552a9b787..4ab70e2d462 100644
--- a/vendor/google.golang.org/grpc/stats/stats.go
+++ b/vendor/google.golang.org/grpc/stats/stats.go
@@ -59,12 +59,22 @@ func (s *Begin) IsClient() bool { return s.Client }
 
 func (s *Begin) isRPCStats() {}
 
+// PickerUpdated indicates that the LB policy provided a new picker while the
+// RPC was waiting for one.
+type PickerUpdated struct{}
+
+// IsClient indicates if the stats information is from client side. Only Client
+// Side interfaces with a Picker, thus always returns true.
+func (*PickerUpdated) IsClient() bool { return true }
+
+func (*PickerUpdated) isRPCStats() {}
+
 // InPayload contains the information for an incoming payload.
 type InPayload struct {
 	// Client is true if this InPayload is from client side.
 	Client bool
 	// Payload is the payload with original type.
-	Payload interface{}
+	Payload any
 	// Data is the serialized message payload.
 	Data []byte
 
@@ -134,7 +144,7 @@ type OutPayload struct {
 	// Client is true if this OutPayload is from client side.
 	Client bool
 	// Payload is the payload with original type.
-	Payload interface{}
+	Payload any
 	// Data is the serialized message payload.
 	Data []byte
 	// Length is the size of the uncompressed payload data. Does not include any
diff --git a/vendor/google.golang.org/grpc/status/status.go b/vendor/google.golang.org/grpc/status/status.go
index bcf2e4d81be..a93360efb84 100644
--- a/vendor/google.golang.org/grpc/status/status.go
+++ b/vendor/google.golang.org/grpc/status/status.go
@@ -50,7 +50,7 @@ func New(c codes.Code, msg string) *Status {
 }
 
 // Newf returns New(c, fmt.Sprintf(format, a...)).
-func Newf(c codes.Code, format string, a ...interface{}) *Status {
+func Newf(c codes.Code, format string, a ...any) *Status {
 	return New(c, fmt.Sprintf(format, a...))
 }
 
@@ -60,7 +60,7 @@ func Error(c codes.Code, msg string) error {
 }
 
 // Errorf returns Error(c, fmt.Sprintf(format, a...)).
-func Errorf(c codes.Code, format string, a ...interface{}) error {
+func Errorf(c codes.Code, format string, a ...any) error {
 	return Error(c, fmt.Sprintf(format, a...))
 }
 
@@ -99,25 +99,27 @@ func FromError(err error) (s *Status, ok bool) {
 	}
 	type grpcstatus interface{ GRPCStatus() *Status }
 	if gs, ok := err.(grpcstatus); ok {
-		if gs.GRPCStatus() == nil {
+		grpcStatus := gs.GRPCStatus()
+		if grpcStatus == nil {
 			// Error has status nil, which maps to codes.OK. There
 			// is no sensible behavior for this, so we turn it into
 			// an error with codes.Unknown and discard the existing
 			// status.
 			return New(codes.Unknown, err.Error()), false
 		}
-		return gs.GRPCStatus(), true
+		return grpcStatus, true
 	}
 	var gs grpcstatus
 	if errors.As(err, &gs) {
-		if gs.GRPCStatus() == nil {
+		grpcStatus := gs.GRPCStatus()
+		if grpcStatus == nil {
 			// Error wraps an error that has status nil, which maps
 			// to codes.OK.  There is no sensible behavior for this,
 			// so we turn it into an error with codes.Unknown and
 			// discard the existing status.
 			return New(codes.Unknown, err.Error()), false
 		}
-		p := gs.GRPCStatus().Proto()
+		p := grpcStatus.Proto()
 		p.Message = err.Error()
 		return status.FromProto(p), true
 	}
diff --git a/vendor/google.golang.org/grpc/stream.go b/vendor/google.golang.org/grpc/stream.go
index 10092685b22..d621f52b1ab 100644
--- a/vendor/google.golang.org/grpc/stream.go
+++ b/vendor/google.golang.org/grpc/stream.go
@@ -31,6 +31,7 @@ import (
 	"google.golang.org/grpc/balancer"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/encoding"
+	"google.golang.org/grpc/internal"
 	"google.golang.org/grpc/internal/balancerload"
 	"google.golang.org/grpc/internal/binarylog"
 	"google.golang.org/grpc/internal/channelz"
@@ -47,6 +48,8 @@ import (
 	"google.golang.org/grpc/status"
 )
 
+var metadataFromOutgoingContextRaw = internal.FromOutgoingContextRaw.(func(context.Context) (metadata.MD, [][]string, bool))
+
 // StreamHandler defines the handler called by gRPC server to complete the
 // execution of a streaming RPC.
 //
@@ -54,7 +57,7 @@ import (
 // status package, or be one of the context errors. Otherwise, gRPC will use
 // codes.Unknown as the status code and err.Error() as the status message of the
 // RPC.
-type StreamHandler func(srv interface{}, stream ServerStream) error
+type StreamHandler func(srv any, stream ServerStream) error
 
 // StreamDesc represents a streaming RPC service's method specification.  Used
 // on the server when registering services and on the client when initiating
@@ -79,9 +82,9 @@ type Stream interface {
 	// Deprecated: See ClientStream and ServerStream documentation instead.
 	Context() context.Context
 	// Deprecated: See ClientStream and ServerStream documentation instead.
-	SendMsg(m interface{}) error
+	SendMsg(m any) error
 	// Deprecated: See ClientStream and ServerStream documentation instead.
-	RecvMsg(m interface{}) error
+	RecvMsg(m any) error
 }
 
 // ClientStream defines the client-side behavior of a streaming RPC.
@@ -90,7 +93,9 @@ type Stream interface {
 // status package.
 type ClientStream interface {
 	// Header returns the header metadata received from the server if there
-	// is any. It blocks if the metadata is not ready to read.
+	// is any. It blocks if the metadata is not ready to read.  If the metadata
+	// is nil and the error is also nil, then the stream was terminated without
+	// headers, and the status can be discovered by calling RecvMsg.
 	Header() (metadata.MD, error)
 	// Trailer returns the trailer metadata from the server, if there is any.
 	// It must only be called after stream.CloseAndRecv has returned, or
@@ -126,7 +131,7 @@ type ClientStream interface {
 	//
 	// It is not safe to modify the message after calling SendMsg. Tracing
 	// libraries and stats handlers may use the message lazily.
-	SendMsg(m interface{}) error
+	SendMsg(m any) error
 	// RecvMsg blocks until it receives a message into m or the stream is
 	// done. It returns io.EOF when the stream completes successfully. On
 	// any other error, the stream is aborted and the error contains the RPC
@@ -135,7 +140,7 @@ type ClientStream interface {
 	// It is safe to have a goroutine calling SendMsg and another goroutine
 	// calling RecvMsg on the same stream at the same time, but it is not
 	// safe to call RecvMsg on the same stream in different goroutines.
-	RecvMsg(m interface{}) error
+	RecvMsg(m any) error
 }
 
 // NewStream creates a new Stream for the client side. This is typically
@@ -155,11 +160,6 @@ type ClientStream interface {
 // If none of the above happen, a goroutine and a context will be leaked, and grpc
 // will not call the optionally-configured stats handler with a stats.End message.
 func (cc *ClientConn) NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) {
-	if err := cc.idlenessMgr.onCallBegin(); err != nil {
-		return nil, err
-	}
-	defer cc.idlenessMgr.onCallEnd()
-
 	// allow interceptor to see all applicable call options, which means those
 	// configured as defaults from dial option as well as per-call options
 	opts = combine(cc.dopts.callOptions, opts)
@@ -176,7 +176,17 @@ func NewClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, meth
 }
 
 func newClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (_ ClientStream, err error) {
-	if md, added, ok := metadata.FromOutgoingContextRaw(ctx); ok {
+	// Start tracking the RPC for idleness purposes. This is where a stream is
+	// created for both streaming and unary RPCs, and hence is a good place to
+	// track active RPC count.
+	if err := cc.idlenessMgr.OnCallBegin(); err != nil {
+		return nil, err
+	}
+	// Add a calloption, to decrement the active call count, that gets executed
+	// when the RPC completes.
+	opts = append([]CallOption{OnFinish(func(error) { cc.idlenessMgr.OnCallEnd() })}, opts...)
+
+	if md, added, ok := metadataFromOutgoingContextRaw(ctx); ok {
 		// validate md
 		if err := imetadata.Validate(md); err != nil {
 			return nil, status.Error(codes.Internal, err.Error())
@@ -433,7 +443,7 @@ func (cs *clientStream) newAttemptLocked(isTransparent bool) (*csAttempt, error)
 		ctx = trace.NewContext(ctx, trInfo.tr)
 	}
 
-	if cs.cc.parsedTarget.URL.Scheme == "xds" {
+	if cs.cc.parsedTarget.URL.Scheme == internal.GRPCResolverSchemeExtraMetadata {
 		// Add extra metadata (metadata that will be added by transport) to context
 		// so the balancer can see them.
 		ctx = grpcutil.WithExtraMetadata(ctx, metadata.Pairs(
@@ -507,7 +517,7 @@ func (a *csAttempt) newStream() error {
 		return toRPCErr(nse.Err)
 	}
 	a.s = s
-	a.p = &parser{r: s}
+	a.p = &parser{r: s, recvBufferPool: a.cs.cc.dopts.recvBufferPool}
 	return nil
 }
 
@@ -788,23 +798,24 @@ func (cs *clientStream) withRetry(op func(a *csAttempt) error, onSuccess func())
 
 func (cs *clientStream) Header() (metadata.MD, error) {
 	var m metadata.MD
-	noHeader := false
 	err := cs.withRetry(func(a *csAttempt) error {
 		var err error
 		m, err = a.s.Header()
-		if err == transport.ErrNoHeaders {
-			noHeader = true
-			return nil
-		}
 		return toRPCErr(err)
 	}, cs.commitAttemptLocked)
 
+	if m == nil && err == nil {
+		// The stream ended with success.  Finish the clientStream.
+		err = io.EOF
+	}
+
 	if err != nil {
 		cs.finish(err)
-		return nil, err
+		// Do not return the error.  The user should get it by calling Recv().
+		return nil, nil
 	}
 
-	if len(cs.binlogs) != 0 && !cs.serverHeaderBinlogged && !noHeader {
+	if len(cs.binlogs) != 0 && !cs.serverHeaderBinlogged && m != nil {
 		// Only log if binary log is on and header has not been logged, and
 		// there is actually headers to log.
 		logEntry := &binarylog.ServerHeader{
@@ -820,6 +831,7 @@ func (cs *clientStream) Header() (metadata.MD, error) {
 			binlog.Log(cs.ctx, logEntry)
 		}
 	}
+
 	return m, nil
 }
 
@@ -860,7 +872,7 @@ func (cs *clientStream) bufferForRetryLocked(sz int, op func(a *csAttempt) error
 	cs.buffer = append(cs.buffer, op)
 }
 
-func (cs *clientStream) SendMsg(m interface{}) (err error) {
+func (cs *clientStream) SendMsg(m any) (err error) {
 	defer func() {
 		if err != nil && err != io.EOF {
 			// Call finish on the client stream for errors generated by this SendMsg
@@ -904,7 +916,7 @@ func (cs *clientStream) SendMsg(m interface{}) (err error) {
 	return err
 }
 
-func (cs *clientStream) RecvMsg(m interface{}) error {
+func (cs *clientStream) RecvMsg(m any) error {
 	if len(cs.binlogs) != 0 && !cs.serverHeaderBinlogged {
 		// Call Header() to binary log header if it's not already logged.
 		cs.Header()
@@ -928,24 +940,6 @@ func (cs *clientStream) RecvMsg(m interface{}) error {
 	if err != nil || !cs.desc.ServerStreams {
 		// err != nil or non-server-streaming indicates end of stream.
 		cs.finish(err)
-
-		if len(cs.binlogs) != 0 {
-			// finish will not log Trailer. Log Trailer here.
-			logEntry := &binarylog.ServerTrailer{
-				OnClientSide: true,
-				Trailer:      cs.Trailer(),
-				Err:          err,
-			}
-			if logEntry.Err == io.EOF {
-				logEntry.Err = nil
-			}
-			if peer, ok := peer.FromContext(cs.Context()); ok {
-				logEntry.PeerAddr = peer.Addr
-			}
-			for _, binlog := range cs.binlogs {
-				binlog.Log(cs.ctx, logEntry)
-			}
-		}
 	}
 	return err
 }
@@ -1001,18 +995,30 @@ func (cs *clientStream) finish(err error) {
 			}
 		}
 	}
+
 	cs.mu.Unlock()
-	// For binary logging. only log cancel in finish (could be caused by RPC ctx
-	// canceled or ClientConn closed). Trailer will be logged in RecvMsg.
-	//
-	// Only one of cancel or trailer needs to be logged. In the cases where
-	// users don't call RecvMsg, users must have already canceled the RPC.
-	if len(cs.binlogs) != 0 && status.Code(err) == codes.Canceled {
-		c := &binarylog.Cancel{
-			OnClientSide: true,
-		}
-		for _, binlog := range cs.binlogs {
-			binlog.Log(cs.ctx, c)
+	// Only one of cancel or trailer needs to be logged.
+	if len(cs.binlogs) != 0 {
+		switch err {
+		case errContextCanceled, errContextDeadline, ErrClientConnClosing:
+			c := &binarylog.Cancel{
+				OnClientSide: true,
+			}
+			for _, binlog := range cs.binlogs {
+				binlog.Log(cs.ctx, c)
+			}
+		default:
+			logEntry := &binarylog.ServerTrailer{
+				OnClientSide: true,
+				Trailer:      cs.Trailer(),
+				Err:          err,
+			}
+			if peer, ok := peer.FromContext(cs.Context()); ok {
+				logEntry.PeerAddr = peer.Addr
+			}
+			for _, binlog := range cs.binlogs {
+				binlog.Log(cs.ctx, logEntry)
+			}
 		}
 	}
 	if err == nil {
@@ -1028,7 +1034,7 @@ func (cs *clientStream) finish(err error) {
 	cs.cancel()
 }
 
-func (a *csAttempt) sendMsg(m interface{}, hdr, payld, data []byte) error {
+func (a *csAttempt) sendMsg(m any, hdr, payld, data []byte) error {
 	cs := a.cs
 	if a.trInfo != nil {
 		a.mu.Lock()
@@ -1055,7 +1061,7 @@ func (a *csAttempt) sendMsg(m interface{}, hdr, payld, data []byte) error {
 	return nil
 }
 
-func (a *csAttempt) recvMsg(m interface{}, payInfo *payloadInfo) (err error) {
+func (a *csAttempt) recvMsg(m any, payInfo *payloadInfo) (err error) {
 	cs := a.cs
 	if len(a.statsHandlers) != 0 && payInfo == nil {
 		payInfo = &payloadInfo{}
@@ -1270,7 +1276,7 @@ func newNonRetryClientStream(ctx context.Context, desc *StreamDesc, method strin
 		return nil, err
 	}
 	as.s = s
-	as.p = &parser{r: s}
+	as.p = &parser{r: s, recvBufferPool: ac.dopts.recvBufferPool}
 	ac.incrCallsStarted()
 	if desc != unaryStreamDesc {
 		// Listen on stream context to cleanup when the stream context is
@@ -1348,7 +1354,7 @@ func (as *addrConnStream) Context() context.Context {
 	return as.s.Context()
 }
 
-func (as *addrConnStream) SendMsg(m interface{}) (err error) {
+func (as *addrConnStream) SendMsg(m any) (err error) {
 	defer func() {
 		if err != nil && err != io.EOF {
 			// Call finish on the client stream for errors generated by this SendMsg
@@ -1393,7 +1399,7 @@ func (as *addrConnStream) SendMsg(m interface{}) (err error) {
 	return nil
 }
 
-func (as *addrConnStream) RecvMsg(m interface{}) (err error) {
+func (as *addrConnStream) RecvMsg(m any) (err error) {
 	defer func() {
 		if err != nil || !as.desc.ServerStreams {
 			// err != nil or non-server-streaming indicates end of stream.
@@ -1512,7 +1518,7 @@ type ServerStream interface {
 	//
 	// It is not safe to modify the message after calling SendMsg. Tracing
 	// libraries and stats handlers may use the message lazily.
-	SendMsg(m interface{}) error
+	SendMsg(m any) error
 	// RecvMsg blocks until it receives a message into m or the stream is
 	// done. It returns io.EOF when the client has performed a CloseSend. On
 	// any non-EOF error, the stream is aborted and the error contains the
@@ -1521,7 +1527,7 @@ type ServerStream interface {
 	// It is safe to have a goroutine calling SendMsg and another goroutine
 	// calling RecvMsg on the same stream at the same time, but it is not
 	// safe to call RecvMsg on the same stream in different goroutines.
-	RecvMsg(m interface{}) error
+	RecvMsg(m any) error
 }
 
 // serverStream implements a server side Stream.
@@ -1602,7 +1608,7 @@ func (ss *serverStream) SetTrailer(md metadata.MD) {
 	ss.s.SetTrailer(md)
 }
 
-func (ss *serverStream) SendMsg(m interface{}) (err error) {
+func (ss *serverStream) SendMsg(m any) (err error) {
 	defer func() {
 		if ss.trInfo != nil {
 			ss.mu.Lock()
@@ -1610,7 +1616,7 @@ func (ss *serverStream) SendMsg(m interface{}) (err error) {
 				if err == nil {
 					ss.trInfo.tr.LazyLog(&payload{sent: true, msg: m}, true)
 				} else {
-					ss.trInfo.tr.LazyLog(&fmtStringer{"%v", []interface{}{err}}, true)
+					ss.trInfo.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true)
 					ss.trInfo.tr.SetError()
 				}
 			}
@@ -1677,7 +1683,7 @@ func (ss *serverStream) SendMsg(m interface{}) (err error) {
 	return nil
 }
 
-func (ss *serverStream) RecvMsg(m interface{}) (err error) {
+func (ss *serverStream) RecvMsg(m any) (err error) {
 	defer func() {
 		if ss.trInfo != nil {
 			ss.mu.Lock()
@@ -1685,7 +1691,7 @@ func (ss *serverStream) RecvMsg(m interface{}) (err error) {
 				if err == nil {
 					ss.trInfo.tr.LazyLog(&payload{sent: false, msg: m}, true)
 				} else if err != io.EOF {
-					ss.trInfo.tr.LazyLog(&fmtStringer{"%v", []interface{}{err}}, true)
+					ss.trInfo.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true)
 					ss.trInfo.tr.SetError()
 				}
 			}
@@ -1757,7 +1763,7 @@ func MethodFromServerStream(stream ServerStream) (string, bool) {
 // prepareMsg returns the hdr, payload and data
 // using the compressors passed or using the
 // passed preparedmsg
-func prepareMsg(m interface{}, codec baseCodec, cp Compressor, comp encoding.Compressor) (hdr, payload, data []byte, err error) {
+func prepareMsg(m any, codec baseCodec, cp Compressor, comp encoding.Compressor) (hdr, payload, data []byte, err error) {
 	if preparedMsg, ok := m.(*PreparedMsg); ok {
 		return preparedMsg.hdr, preparedMsg.payload, preparedMsg.encodedData, nil
 	}
diff --git a/vendor/google.golang.org/grpc/tap/tap.go b/vendor/google.golang.org/grpc/tap/tap.go
index bfa5dfa40e4..07f01257688 100644
--- a/vendor/google.golang.org/grpc/tap/tap.go
+++ b/vendor/google.golang.org/grpc/tap/tap.go
@@ -27,6 +27,8 @@ package tap
 
 import (
 	"context"
+
+	"google.golang.org/grpc/metadata"
 )
 
 // Info defines the relevant information needed by the handles.
@@ -34,6 +36,10 @@ type Info struct {
 	// FullMethodName is the string of grpc method (in the format of
 	// /package.service/method).
 	FullMethodName string
+
+	// Header contains the header metadata received.
+	Header metadata.MD
+
 	// TODO: More to be added.
 }
 
diff --git a/vendor/google.golang.org/grpc/trace.go b/vendor/google.golang.org/grpc/trace.go
index 07a2d26b3e7..9ded79321ba 100644
--- a/vendor/google.golang.org/grpc/trace.go
+++ b/vendor/google.golang.org/grpc/trace.go
@@ -97,8 +97,8 @@ func truncate(x string, l int) string {
 
 // payload represents an RPC request or response payload.
 type payload struct {
-	sent bool        // whether this is an outgoing payload
-	msg  interface{} // e.g. a proto.Message
+	sent bool // whether this is an outgoing payload
+	msg  any  // e.g. a proto.Message
 	// TODO(dsymonds): add stringifying info to codec, and limit how much we hold here?
 }
 
@@ -111,7 +111,7 @@ func (p payload) String() string {
 
 type fmtStringer struct {
 	format string
-	a      []interface{}
+	a      []any
 }
 
 func (f *fmtStringer) String() string {
diff --git a/vendor/google.golang.org/grpc/version.go b/vendor/google.golang.org/grpc/version.go
index 3cc75406218..1ad1ba2ad68 100644
--- a/vendor/google.golang.org/grpc/version.go
+++ b/vendor/google.golang.org/grpc/version.go
@@ -19,4 +19,4 @@
 package grpc
 
 // Version is the current grpc version.
-const Version = "1.56.3"
+const Version = "1.61.0"
diff --git a/vendor/google.golang.org/grpc/vet.sh b/vendor/google.golang.org/grpc/vet.sh
index a8e4732b3d2..5da38a40996 100644
--- a/vendor/google.golang.org/grpc/vet.sh
+++ b/vendor/google.golang.org/grpc/vet.sh
@@ -35,7 +35,6 @@ if [[ "$1" = "-install" ]]; then
   # Install the pinned versions as defined in module tools.
   pushd ./test/tools
   go install \
-    golang.org/x/lint/golint \
     golang.org/x/tools/cmd/goimports \
     honnef.co/go/tools/cmd/staticcheck \
     github.com/client9/misspell/cmd/misspell
@@ -77,12 +76,19 @@ fi
 not grep 'func Test[^(]' *_test.go
 not grep 'func Test[^(]' test/*.go
 
+# - Check for typos in test function names
+git grep 'func (s) ' -- "*_test.go" | not grep -v 'func (s) Test'
+git grep 'func [A-Z]' -- "*_test.go" | not grep -v 'func Test\|Benchmark\|Example'
+
 # - Do not import x/net/context.
 not git grep -l 'x/net/context' -- "*.go"
 
 # - Do not import math/rand for real library code.  Use internal/grpcrand for
 #   thread safety.
-git grep -l '"math/rand"' -- "*.go" 2>&1 | not grep -v '^examples\|^stress\|grpcrand\|^benchmark\|wrr_test'
+git grep -l '"math/rand"' -- "*.go" 2>&1 | not grep -v '^examples\|^interop/stress\|grpcrand\|^benchmark\|wrr_test'
+
+# - Do not use "interface{}"; use "any" instead.
+git grep -l 'interface{}' -- "*.go" 2>&1 | not grep -v '\.pb\.go\|protoc-gen-go-grpc\|grpc_testing_not_regenerate'
 
 # - Do not call grpclog directly. Use grpclog.Component instead.
 git grep -l -e 'grpclog.I' --or -e 'grpclog.W' --or -e 'grpclog.E' --or -e 'grpclog.F' --or -e 'grpclog.V' -- "*.go" | not grep -v '^grpclog/component.go\|^internal/grpctest/tlogger_test.go'
@@ -90,13 +96,15 @@ git grep -l -e 'grpclog.I' --or -e 'grpclog.W' --or -e 'grpclog.E' --or -e 'grpc
 # - Ensure all ptypes proto packages are renamed when importing.
 not git grep "\(import \|^\s*\)\"github.com/golang/protobuf/ptypes/" -- "*.go"
 
+# - Ensure all usages of grpc_testing package are renamed when importing.
+not git grep "\(import \|^\s*\)\"google.golang.org/grpc/interop/grpc_testing" -- "*.go"
+
 # - Ensure all xds proto imports are renamed to *pb or *grpc.
 git grep '"github.com/envoyproxy/go-control-plane/envoy' -- '*.go' ':(exclude)*.pb.go' | not grep -v 'pb "\|grpc "'
 
 misspell -error .
 
-# - gofmt, goimports, golint (with exceptions for generated code), go vet,
-# go mod tidy.
+# - gofmt, goimports, go vet, go mod tidy.
 # Perform these checks on each module inside gRPC.
 for MOD_FILE in $(find . -name 'go.mod'); do
   MOD_DIR=$(dirname ${MOD_FILE})
@@ -104,105 +112,79 @@ for MOD_FILE in $(find . -name 'go.mod'); do
   go vet -all ./... | fail_on_output
   gofmt -s -d -l . 2>&1 | fail_on_output
   goimports -l . 2>&1 | not grep -vE "\.pb\.go"
-  golint ./... 2>&1 | not grep -vE "/grpc_testing_not_regenerate/.*\.pb\.go:"
 
-  go mod tidy -compat=1.17
+  go mod tidy -compat=1.19
   git status --porcelain 2>&1 | fail_on_output || \
     (git status; git --no-pager diff; exit 1)
   popd
 done
 
 # - Collection of static analysis checks
-#
-# TODO(dfawley): don't use deprecated functions in examples or first-party
-# plugins.
-# TODO(dfawley): enable ST1019 (duplicate imports) but allow for protobufs.
 SC_OUT="$(mktemp)"
-staticcheck -go 1.19 -checks 'inherit,-ST1015,-ST1019,-SA1019' ./... > "${SC_OUT}" || true
-# Error if anything other than deprecation warnings are printed.
-not grep -v "is deprecated:.*SA1019" "${SC_OUT}"
-# Only ignore the following deprecated types/fields/functions.
-not grep -Fv '.CredsBundle
-.HeaderMap
-.Metadata is deprecated: use Attributes
-.NewAddress
-.NewServiceConfig
-.Type is deprecated: use Attributes
-BuildVersion is deprecated
-balancer.ErrTransientFailure
-balancer.Picker
-extDesc.Filename is deprecated
-github.com/golang/protobuf/jsonpb is deprecated
-grpc.CallCustomCodec
-grpc.Code
-grpc.Compressor
-grpc.CustomCodec
-grpc.Decompressor
-grpc.MaxMsgSize
-grpc.MethodConfig
-grpc.NewGZIPCompressor
-grpc.NewGZIPDecompressor
-grpc.RPCCompressor
-grpc.RPCDecompressor
-grpc.ServiceConfig
-grpc.WithCompressor
-grpc.WithDecompressor
-grpc.WithDialer
-grpc.WithMaxMsgSize
-grpc.WithServiceConfig
-grpc.WithTimeout
-http.CloseNotifier
-info.SecurityVersion
-proto is deprecated
-proto.InternalMessageInfo is deprecated
-proto.EnumName is deprecated
-proto.ErrInternalBadWireType is deprecated
-proto.FileDescriptor is deprecated
-proto.Marshaler is deprecated
-proto.MessageType is deprecated
-proto.RegisterEnum is deprecated
-proto.RegisterFile is deprecated
-proto.RegisterType is deprecated
-proto.RegisterExtension is deprecated
-proto.RegisteredExtension is deprecated
-proto.RegisteredExtensions is deprecated
-proto.RegisterMapType is deprecated
-proto.Unmarshaler is deprecated
-resolver.Backend
-resolver.GRPCLB
+staticcheck -go 1.19 -checks 'all' ./... > "${SC_OUT}" || true
+
+# Error for anything other than checks that need exclusions.
+grep -v "(ST1000)" "${SC_OUT}" | grep -v "(SA1019)" | grep -v "(ST1003)" | not grep -v "(ST1019)\|\(other import of\)"
+
+# Exclude underscore checks for generated code.
+grep "(ST1003)" "${SC_OUT}" | not grep -v '\(.pb.go:\)\|\(code_string_test.go:\)\|\(grpc_testing_not_regenerate\)'
+
+# Error for duplicate imports not including grpc protos.
+grep "(ST1019)\|\(other import of\)" "${SC_OUT}" | not grep -Fv 'XXXXX PleaseIgnoreUnused
+channelz/grpc_channelz_v1"
+go-control-plane/envoy
+grpclb/grpc_lb_v1"
+health/grpc_health_v1"
+interop/grpc_testing"
+orca/v3"
+proto/grpc_gcp"
+proto/grpc_lookup_v1"
+reflection/grpc_reflection_v1"
+reflection/grpc_reflection_v1alpha"
+XXXXX PleaseIgnoreUnused'
+
+# Error for any package comments not in generated code.
+grep "(ST1000)" "${SC_OUT}" | not grep -v "\.pb\.go:"
+
+# Only ignore the following deprecated types/fields/functions and exclude
+# generated code.
+grep "(SA1019)" "${SC_OUT}" | not grep -Fv 'XXXXX PleaseIgnoreUnused
+XXXXX Protobuf related deprecation errors:
+"github.com/golang/protobuf
+.pb.go:
+grpc_testing_not_regenerate
+: ptypes.
+proto.RegisterType
+XXXXX gRPC internal usage deprecation errors:
+"google.golang.org/grpc
+: grpc.
+: v1alpha.
+: v1alphareflectionpb.
+BalancerAttributes is deprecated:
+CredsBundle is deprecated:
+Metadata is deprecated: use Attributes instead.
+NewSubConn is deprecated:
+OverrideServerName is deprecated:
+RemoveSubConn is deprecated:
+SecurityVersion is deprecated:
 Target is deprecated: Use the Target field in the BuildOptions instead.
-xxx_messageInfo_
-' "${SC_OUT}"
-
-# - special golint on package comments.
-lint_package_comment_per_package() {
-  # Number of files in this go package.
-  fileCount=$(go list -f '{{len .GoFiles}}' $1)
-  if [ ${fileCount} -eq 0 ]; then
-    return 0
-  fi
-  # Number of package errors generated by golint.
-  lintPackageCommentErrorsCount=$(golint --min_confidence 0 $1 | grep -c "should have a package comment")
-  # golint complains about every file that's missing the package comment. If the
-  # number of files for this package is greater than the number of errors, there's
-  # at least one file with package comment, good. Otherwise, fail.
-  if [ ${fileCount} -le ${lintPackageCommentErrorsCount} ]; then
-    echo "Package $1 (with ${fileCount} files) is missing package comment"
-    return 1
-  fi
-}
-lint_package_comment() {
-  set +ex
-
-  count=0
-  for i in $(go list ./...); do
-    lint_package_comment_per_package "$i"
-    ((count += $?))
-  done
-
-  set -ex
-  return $count
-}
-lint_package_comment
+UpdateAddresses is deprecated:
+UpdateSubConnState is deprecated:
+balancer.ErrTransientFailure is deprecated:
+grpc/reflection/v1alpha/reflection.proto
+XXXXX xDS deprecated fields we support
+.ExactMatch
+.PrefixMatch
+.SafeRegexMatch
+.SuffixMatch
+GetContainsMatch
+GetExactMatch
+GetMatchSubjectAltNames
+GetPrefixMatch
+GetSafeRegexMatch
+GetSuffixMatch
+GetTlsCertificateCertificateProviderInstance
+GetValidationContextCertificateProviderInstance
+XXXXX PleaseIgnoreUnused'
 
 echo SUCCESS
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 4e6d4f34e1a..1a549f1cd2e 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -38,6 +38,8 @@ github.com/Microsoft/go-winio/internal/fs
 github.com/Microsoft/go-winio/internal/socket
 github.com/Microsoft/go-winio/internal/stringbuffer
 github.com/Microsoft/go-winio/pkg/guid
+# github.com/Microsoft/hcsshim v0.11.4
+## explicit; go 1.18
 # github.com/Shopify/sarama v1.20.1
 ## explicit
 github.com/Shopify/sarama
@@ -76,8 +78,8 @@ github.com/consensys/gnark-crypto/field
 github.com/consensys/gnark-crypto/field/internal/addchain
 github.com/consensys/gnark-crypto/internal/generator/config
 github.com/consensys/gnark-crypto/internal/parallel
-# github.com/containerd/containerd v1.6.18
-## explicit; go 1.17
+# github.com/containerd/containerd v1.6.26
+## explicit; go 1.19
 github.com/containerd/containerd/pkg/userns
 # github.com/davecgh/go-spew v1.1.1
 ## explicit
@@ -233,11 +235,12 @@ github.com/hyperledger/fabric-protos-go/transientstore
 # github.com/inconshreveable/mousetrap v1.0.0
 ## explicit
 github.com/inconshreveable/mousetrap
-# github.com/klauspost/compress v1.13.6
-## explicit; go 1.15
+# github.com/klauspost/compress v1.17.6
+## explicit; go 1.19
 github.com/klauspost/compress
 github.com/klauspost/compress/fse
 github.com/klauspost/compress/huff0
+github.com/klauspost/compress/internal/cpuinfo
 github.com/klauspost/compress/internal/snapref
 github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd/internal/xxhash
@@ -330,8 +333,8 @@ github.com/onsi/gomega/types
 # github.com/opencontainers/go-digest v1.0.0
 ## explicit; go 1.13
 github.com/opencontainers/go-digest
-# github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799
-## explicit
+# github.com/opencontainers/image-spec v1.1.0-rc6
+## explicit; go 1.18
 github.com/opencontainers/image-spec/specs-go
 github.com/opencontainers/image-spec/specs-go/v1
 # github.com/opencontainers/runc v1.1.12
@@ -471,7 +474,7 @@ go.uber.org/zap/zaptest/observer
 # golang.org/x/crypto v0.18.0
 ## explicit; go 1.18
 golang.org/x/crypto/sha3
-# golang.org/x/mod v0.14.0
+# golang.org/x/mod v0.15.0
 ## explicit; go 1.18
 golang.org/x/mod/semver
 # golang.org/x/net v0.20.0
@@ -485,6 +488,8 @@ golang.org/x/net/http2/hpack
 golang.org/x/net/idna
 golang.org/x/net/internal/timeseries
 golang.org/x/net/trace
+# golang.org/x/sync v0.6.0
+## explicit; go 1.18
 # golang.org/x/sys v0.16.0
 ## explicit; go 1.18
 golang.org/x/sys/cpu
@@ -532,11 +537,11 @@ golang.org/x/tools/internal/pkgbits
 golang.org/x/tools/internal/tokeninternal
 golang.org/x/tools/internal/typeparams
 golang.org/x/tools/internal/typesinternal
-# google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1
+# google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17
 ## explicit; go 1.19
 google.golang.org/genproto/googleapis/rpc/status
-# google.golang.org/grpc v1.56.3
-## explicit; go 1.17
+# google.golang.org/grpc v1.61.0
+## explicit; go 1.19
 google.golang.org/grpc
 google.golang.org/grpc/attributes
 google.golang.org/grpc/backoff
@@ -568,10 +573,12 @@ google.golang.org/grpc/internal/grpclog
 google.golang.org/grpc/internal/grpcrand
 google.golang.org/grpc/internal/grpcsync
 google.golang.org/grpc/internal/grpcutil
+google.golang.org/grpc/internal/idle
 google.golang.org/grpc/internal/metadata
 google.golang.org/grpc/internal/pretty
 google.golang.org/grpc/internal/resolver
 google.golang.org/grpc/internal/resolver/dns
+google.golang.org/grpc/internal/resolver/dns/internal
 google.golang.org/grpc/internal/resolver/passthrough
 google.golang.org/grpc/internal/resolver/unix
 google.golang.org/grpc/internal/serviceconfig
@@ -583,6 +590,7 @@ google.golang.org/grpc/keepalive
 google.golang.org/grpc/metadata
 google.golang.org/grpc/peer
 google.golang.org/grpc/resolver
+google.golang.org/grpc/resolver/dns
 google.golang.org/grpc/serviceconfig
 google.golang.org/grpc/stats
 google.golang.org/grpc/status