diff --git a/CHANGES.md b/CHANGES.md index 74125613f2a8..666cd31ba0f9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,73 @@ +# Synapse 1.90.0 (2023-08-15) + +No significant changes since 1.90.0rc1. + + +# Synapse 1.90.0rc1 (2023-08-08) + +### Features + +- Scope transaction IDs to devices (implement [MSC3970](https://github.com/matrix-org/matrix-spec-proposals/pull/3970)). ([\#15629](https://github.com/matrix-org/synapse/issues/15629)) +- Remove old rows from the `cache_invalidation_stream_by_instance` table automatically (this table is unused in SQLite). ([\#15868](https://github.com/matrix-org/synapse/issues/15868)) + +### Bugfixes + +- Fix a long-standing bug where purging history and paginating simultaneously could lead to database corruption when using workers. ([\#15791](https://github.com/matrix-org/synapse/issues/15791)) +- Fix a long-standing bug where profile endpoint returned a 404 when the user's display name was empty. ([\#16012](https://github.com/matrix-org/synapse/issues/16012)) +- Fix a long-standing bug where the `synapse_port_db` failed to configure sequences for application services and partial stated rooms. ([\#16043](https://github.com/matrix-org/synapse/issues/16043)) +- Fix long-standing bug with deletion in dehydrated devices v2. ([\#16046](https://github.com/matrix-org/synapse/issues/16046)) + +### Updates to the Docker image + +- Add `org.opencontainers.image.version` labels to Docker containers [published by Matrix.org](https://hub.docker.com/r/matrixdotorg/synapse). Contributed by Mo Balaa. ([\#15972](https://github.com/matrix-org/synapse/issues/15972), [\#16009](https://github.com/matrix-org/synapse/issues/16009)) + +### Improved Documentation + +- Add a internal documentation page describing the ["streams" used within Synapse](https://matrix-org.github.io/synapse/v1.90/development/synapse_architecture/streams.html). ([\#16015](https://github.com/matrix-org/synapse/issues/16015)) +- Clarify comment on the keys/upload over replication enpoint. ([\#16016](https://github.com/matrix-org/synapse/issues/16016)) +- Do not expose Admin API in caddy reverse proxy example. Contributed by @NilsIrl. ([\#16027](https://github.com/matrix-org/synapse/issues/16027)) + +### Deprecations and Removals + +- Remove support for legacy application service paths. ([\#15964](https://github.com/matrix-org/synapse/issues/15964)) +- Move support for application service query parameter authorization behind a configuration option. ([\#16017](https://github.com/matrix-org/synapse/issues/16017)) + +### Internal Changes + +- Update SQL queries to inline boolean parameters as supported in SQLite 3.27. ([\#15525](https://github.com/matrix-org/synapse/issues/15525)) +- Allow for the configuration of the backoff algorithm for federation destinations. ([\#15754](https://github.com/matrix-org/synapse/issues/15754)) +- Allow modules to check whether the current worker is configured to run background tasks. ([\#15991](https://github.com/matrix-org/synapse/issues/15991)) +- Update support for [MSC3958](https://github.com/matrix-org/matrix-spec-proposals/pull/3958) to match the latest revision of the MSC. ([\#15992](https://github.com/matrix-org/synapse/issues/15992)) +- Allow modules to schedule delayed background calls. ([\#15993](https://github.com/matrix-org/synapse/issues/15993)) +- Properly overwrite the `redacts` content-property for forwards-compatibility with room versions 1 through 10. ([\#16013](https://github.com/matrix-org/synapse/issues/16013)) +- Fix building the nix development environment on MacOS systems. ([\#16019](https://github.com/matrix-org/synapse/issues/16019)) +- Remove leading and trailing spaces when setting a display name. ([\#16031](https://github.com/matrix-org/synapse/issues/16031)) +- Combine duplicated code. ([\#16023](https://github.com/matrix-org/synapse/issues/16023)) +- Collect additional metrics from `ResponseCache` for eviction. ([\#16028](https://github.com/matrix-org/synapse/issues/16028)) +- Fix endpoint improperly declaring support for MSC3814. ([\#16068](https://github.com/matrix-org/synapse/issues/16068)) +- Drop backwards compat hack for event serialization. ([\#16069](https://github.com/matrix-org/synapse/issues/16069)) + +### Updates to locked dependencies + +* Update PyYAML to 6.0.1. ([\#16011](https://github.com/matrix-org/synapse/issues/16011)) +* Bump cryptography from 41.0.2 to 41.0.3. ([\#16048](https://github.com/matrix-org/synapse/issues/16048)) +* Bump furo from 2023.5.20 to 2023.7.26. ([\#16077](https://github.com/matrix-org/synapse/issues/16077)) +* Bump immutabledict from 2.2.4 to 3.0.0. ([\#16034](https://github.com/matrix-org/synapse/issues/16034)) +* Update certifi to 2023.7.22 and pygments to 2.15.1. ([\#16044](https://github.com/matrix-org/synapse/issues/16044)) +* Bump jsonschema from 4.18.3 to 4.19.0. ([\#16081](https://github.com/matrix-org/synapse/issues/16081)) +* Bump phonenumbers from 8.13.14 to 8.13.18. ([\#16076](https://github.com/matrix-org/synapse/issues/16076)) +* Bump regex from 1.9.1 to 1.9.3. ([\#16073](https://github.com/matrix-org/synapse/issues/16073)) +* Bump serde from 1.0.171 to 1.0.175. ([\#15982](https://github.com/matrix-org/synapse/issues/15982)) +* Bump serde from 1.0.175 to 1.0.179. ([\#16033](https://github.com/matrix-org/synapse/issues/16033)) +* Bump serde from 1.0.179 to 1.0.183. ([\#16074](https://github.com/matrix-org/synapse/issues/16074)) +* Bump serde_json from 1.0.103 to 1.0.104. ([\#16032](https://github.com/matrix-org/synapse/issues/16032)) +* Bump service-identity from 21.1.0 to 23.1.0. ([\#16038](https://github.com/matrix-org/synapse/issues/16038)) +* Bump types-commonmark from 0.9.2.3 to 0.9.2.4. ([\#16037](https://github.com/matrix-org/synapse/issues/16037)) +* Bump types-jsonschema from 4.17.0.8 to 4.17.0.10. ([\#16036](https://github.com/matrix-org/synapse/issues/16036)) +* Bump types-netaddr from 0.8.0.8 to 0.8.0.9. ([\#16035](https://github.com/matrix-org/synapse/issues/16035)) +* Bump types-opentracing from 2.4.10.5 to 2.4.10.6. ([\#16078](https://github.com/matrix-org/synapse/issues/16078)) +* Bump types-setuptools from 68.0.0.0 to 68.0.0.3. ([\#16079](https://github.com/matrix-org/synapse/issues/16079)) + # Synapse 1.89.0 (2023-08-01) No significant changes since 1.89.0rc1. diff --git a/Cargo.lock b/Cargo.lock index 2264e672455c..45e0f116e6e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -291,9 +291,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", @@ -303,9 +303,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.2" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83d3daa6976cffb758ec878f108ba0e062a45b2d6ca3a2cca965338855476caf" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" dependencies = [ "aho-corasick", "memchr", @@ -314,9 +314,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "ryu" @@ -332,29 +332,29 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" -version = "1.0.171" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.171" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.28", ] [[package]] name = "serde_json" -version = "1.0.103" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ "itoa", "ryu", @@ -386,9 +386,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.25" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e3fc8c0c74267e2df136e5e5fb656a464158aa57624053375eb9c8c6e25ae2" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", diff --git a/debian/changelog b/debian/changelog index 90240b808205..ad9a4b3c8cfd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +matrix-synapse-py3 (1.90.0) stable; urgency=medium + + * New Synapse release 1.90.0. + + -- Synapse Packaging team Tue, 15 Aug 2023 11:17:34 +0100 + +matrix-synapse-py3 (1.90.0~rc1) stable; urgency=medium + + * New Synapse release 1.90.0rc1. + + -- Synapse Packaging team Tue, 08 Aug 2023 15:29:34 +0100 + matrix-synapse-py3 (1.89.0) stable; urgency=medium * New Synapse release 1.89.0. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index a8e5ddad9d48..31b303202913 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -97,6 +97,7 @@ - [Cancellation](development/synapse_architecture/cancellation.md) - [Log Contexts](log_contexts.md) - [Replication](replication.md) + - [Streams](development/synapse_architecture/streams.md) - [TCP Replication](tcp_replication.md) - [Faster remote joins](development/synapse_architecture/faster_joins.md) - [Internal Documentation](development/internal_documentation/README.md) diff --git a/docs/development/synapse_architecture/streams.md b/docs/development/synapse_architecture/streams.md new file mode 100644 index 000000000000..bee0b8a8c0a2 --- /dev/null +++ b/docs/development/synapse_architecture/streams.md @@ -0,0 +1,157 @@ +## Streams + +Synapse has a concept of "streams", which are roughly described in [`id_generators.py`]( + https://github.com/matrix-org/synapse/blob/develop/synapse/storage/util/id_generators.py +). +Generally speaking, streams are a series of notifications that something in Synapse's database has changed that the application might need to respond to. +For example: + +- The events stream reports new events (PDUs) that Synapse creates, or that Synapse accepts from another homeserver. +- The account data stream reports changes to users' [account data](https://spec.matrix.org/v1.7/client-server-api/#client-config). +- The to-device stream reports when a device has a new [to-device message](https://spec.matrix.org/v1.7/client-server-api/#send-to-device-messaging). + +See [`synapse.replication.tcp.streams`]( + https://github.com/matrix-org/synapse/blob/develop/synapse/replication/tcp/streams/__init__.py +) for the full list of streams. + +It is very helpful to understand the streams mechanism when working on any part of Synapse that needs to respond to changes—especially if those changes are made by different workers. +To that end, let's describe streams formally, paraphrasing from the docstring of [`AbstractStreamIdGenerator`]( + https://github.com/matrix-org/synapse/blob/a719b703d9bd0dade2565ddcad0e2f3a7a9d4c37/synapse/storage/util/id_generators.py#L96 +). + +### Definition + +A stream is an append-only log `T1, T2, ..., Tn, ...` of facts[^1] which grows over time. +Only "writers" can add facts to a stream, and there may be multiple writers. + +Each fact has an ID, called its "stream ID". +Readers should only process facts in ascending stream ID order. + +Roughly speaking, each stream is backed by a database table. +It should have a `stream_id` (or similar) bigint column holding stream IDs, plus additional columns as necessary to describe the fact. +Typically, a fact is expressed with a single row in its backing table.[^2] +Within a stream, no two facts may have the same stream_id. + +> _Aside_. Some additional notes on streams' backing tables. +> +> 1. Rich would like to [ditch the backing tables](https://github.com/matrix-org/synapse/issues/13456). +> 2. The backing tables may have other uses. + > For example, the events table serves backs the events stream, and is read when processing new events. + > But old rows are read from the table all the time, whenever Synapse needs to lookup some facts about an event. +> 3. Rich suspects that sometimes the stream is backed by multiple tables, so the stream proper is the union of those tables. + +Stream writers can "reserve" a stream ID, and then later mark it as having being completed. +Stream writers need to track the completion of each stream fact. +In the happy case, completion means a fact has been written to the stream table. +But unhappy cases (e.g. transaction rollback due to an error) also count as completion. +Once completed, the rows written with that stream ID are fixed, and no new rows +will be inserted with that ID. + +### Current stream ID + +For any given stream reader (including writers themselves), we may define a per-writer current stream ID: + +> The current stream ID _for a writer W_ is the largest stream ID such that +> all transactions added by W with equal or smaller ID have completed. + +Similarly, there is a "linear" notion of current stream ID: + +> The "linear" current stream ID is the largest stream ID such that +> all facts (added by any writer) with equal or smaller ID have completed. + +Because different stream readers A and B learn about new facts at different times, A and B may disagree about current stream IDs. +Put differently: we should think of stream readers as being independent of each other, proceeding through a stream of facts at different rates. + +**NB.** For both senses of "current", that if a writer opens a transaction that never completes, the current stream ID will never advance beyond that writer's last written stream ID. + +For single-writer streams, the per-writer current ID and the linear current ID are the same. +Both senses of current ID are monotonic, but they may "skip" or jump over IDs because facts complete out of order. + + +_Example_. +Consider a single-writer stream which is initially at ID 1. + +| Action | Current stream ID | Notes | +|------------|-------------------|-------------------------------------------------| +| | 1 | | +| Reserve 2 | 1 | | +| Reserve 3 | 1 | | +| Complete 3 | 1 | current ID unchanged, waiting for 2 to complete | +| Complete 2 | 3 | current ID jumps from 1 -> 3 | +| Reserve 4 | 3 | | +| Reserve 5 | 3 | | +| Reserve 6 | 3 | | +| Complete 5 | 3 | | +| Complete 4 | 5 | current ID jumps 3->5, even though 6 is pending | +| Complete 6 | 6 | | + + +### Multi-writer streams + +There are two ways to view a multi-writer stream. + +1. Treat it as a collection of distinct single-writer streams, one + for each writer. +2. Treat it as a single stream. + +The single stream (option 2) is conceptually simpler, and easier to represent (a single stream id). +However, it requires each reader to know about the entire set of writers, to ensures that readers don't erroneously advance their current stream position too early and miss a fact from an unknown writer. +In contrast, multiple parallel streams (option 1) are more complex, requiring more state to represent (map from writer to stream id). +The payoff for doing so is that readers can "peek" ahead to facts that completed on one writer no matter the state of the others, reducing latency. + +Note that a multi-writer stream can be viewed in both ways. +For example, the events stream is treated as multiple single-writer streams (option 1) by the sync handler, so that events are sent to clients as soon as possible. +But the background process that works through events treats them as a single linear stream. + +Another useful example is the cache invalidation stream. +The facts this stream holds are instructions to "you should now invalidate these cache entries". +We only ever treat this as a multiple single-writer streams as there is no important ordering between cache invalidations. +(Invalidations are self-contained facts; and the invalidations commute/are idempotent). + +### Writing to streams + +Writers need to track: + - track their current position (i.e. its own per-writer stream ID). + - their facts currently awaiting completion. + +At startup, + - the current position of that writer can be found by querying the database (which suggests that facts need to be written to the database atomically, in a transaction); and + - there are no facts awaiting completion. + +To reserve a stream ID, call [`nextval`](https://www.postgresql.org/docs/current/functions-sequence.html) on the appropriate postgres sequence. + +To write a fact to the stream: insert the appropriate rows to the appropriate backing table. + +To complete a fact, first remove it from your map of facts currently awaiting completion. +Then, if no earlier fact is awaiting completion, the writer can advance its current position in that stream. +Upon doing so it should emit an `RDATA` message[^3], once for every fact between the old and the new stream ID. + +### Subscribing to streams + +Readers need to track the current position of every writer. + +At startup, they can find this by contacting each writer with a `REPLICATE` message, +requesting that all writers reply describing their current position in their streams. +Writers reply with a `POSITION` message. + +To learn about new facts, readers should listen for `RDATA` messages and process them to respond to the new fact. +The `RDATA` itself is not a self-contained representation of the fact; +readers will have to query the stream tables for the full details. +Readers must also advance their record of the writer's current position for that stream. + +# Summary + +In a nutshell: we have an append-only log with a "buffer/scratchpad" at the end where we have to wait for the sequence to be linear and contiguous. + + +--- + +[^1]: we use the word _fact_ here for two reasons. +Firstly, the word "event" is already heavily overloaded (PDUs, EDUs, account data, ...) and we don't need to make that worse. +Secondly, "fact" emphasises that the things we append to a stream cannot change after the fact. + +[^2]: A fact might be expressed with 0 rows, e.g. if we opened a transaction to persist an event, but failed and rolled the transaction back before marking the fact as completed. +In principle a fact might be expressed with 2 or more rows; if so, each of those rows should share the fact's stream ID. + +[^3]: This communication used to happen directly with the writers [over TCP](../../tcp_replication.md); +nowadays it's done via Redis's Pubsub. diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md index 06337e7c0039..fe9519b4b624 100644 --- a/docs/reverse_proxy.md +++ b/docs/reverse_proxy.md @@ -95,7 +95,7 @@ matrix.example.com { } example.com:8448 { - reverse_proxy localhost:8008 + reverse_proxy /_matrix/* localhost:8008 } ``` diff --git a/docs/upgrade.md b/docs/upgrade.md index 5dde6c769e85..f50a279e985a 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,21 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.90.0 + +## App service query parameter authorization is now a configuration option + +Synapse v1.81.0 deprecated application service authorization via query parameters as this is +considered insecure - and from Synapse v1.71.0 forwards the application service token has also been sent via +[the `Authorization` header](https://spec.matrix.org/v1.6/application-service-api/#authorization)], making the insecure +query parameter authorization redundant. Since removing the ability to continue to use query parameters could break +backwards compatibility it has now been put behind a configuration option, `use_appservice_legacy_authorization`. +This option defaults to false, but can be activated by adding +```yaml +use_appservice_legacy_authorization: true +``` +to your configuration. + # Upgrading to v1.89.0 ## Removal of unspecced `user` property for `/register` @@ -97,7 +112,6 @@ The standard `username` property should be used instead. See the [Application Service specification](https://spec.matrix.org/v1.7/application-service-api/#server-admin-style-permissions) for more information. - # Upgrading to v1.88.0 ## Minimum supported Python version diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 4e6fcd085acb..2987c9332d14 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1242,6 +1242,14 @@ like sending a federation transaction. * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts. * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts. +The following options control the retry logic when communicating with a specific homeserver destination. +Unlike the previous configuration options, these values apply across all requests +for a given destination and the state of the backoff is stored in the database. + +* `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m. +* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2. +* `destination_max_retry_interval`: a cap on the backoff. Defaults to a week. + Example configuration: ```yaml federation: @@ -1250,6 +1258,9 @@ federation: max_long_retry_delay: 100s max_short_retries: 5 max_long_retries: 20 + destination_min_retry_interval: 30s + destination_retry_multiplier: 5 + destination_max_retry_interval: 12h ``` --- ## Caching @@ -2837,6 +2848,20 @@ Example configuration: ```yaml track_appservice_user_ips: true ``` +--- +### `use_appservice_legacy_authorization` + +Whether to send the application service access tokens via the `access_token` query parameter +per older versions of the Matrix specification. Defaults to false. Set to true to enable sending +access tokens via a query parameter. + +**Enabling this option is considered insecure and is not recommended. ** + +Example configuration: +```yaml +use_appservice_legacy_authorization: true +``` + --- ### `macaroon_secret_key` diff --git a/flake.lock b/flake.lock index eb5a65e44527..1a2d9014c35e 100644 --- a/flake.lock +++ b/flake.lock @@ -8,11 +8,11 @@ "pre-commit-hooks": "pre-commit-hooks" }, "locked": { - "lastModified": 1683102061, - "narHash": "sha256-kOphT6V0uQUlFNBP3GBjs7DAU7fyZGGqCs9ue1gNY6E=", + "lastModified": 1690534632, + "narHash": "sha256-kOXS9x5y17VKliC7wZxyszAYrWdRl1JzggbQl0gyo94=", "owner": "cachix", "repo": "devenv", - "rev": "ff1f29e41756553174d596cafe3a9fa77595100b", + "rev": "6568e7e485a46bbf32051e4d6347fa1fed8b2f25", "type": "github" }, "original": { @@ -39,12 +39,33 @@ } }, "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1685518550, + "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, "locked": { - "lastModified": 1667395993, - "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", + "lastModified": 1681202837, + "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", "owner": "numtide", "repo": "flake-utils", - "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", + "rev": "cfacdce06f30d2b68473a46042957675eebb3401", "type": "github" }, "original": { @@ -167,27 +188,27 @@ }, "nixpkgs-stable": { "locked": { - "lastModified": 1673800717, - "narHash": "sha256-SFHraUqLSu5cC6IxTprex/nTsI81ZQAtDvlBvGDWfnA=", + "lastModified": 1685801374, + "narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "2f9fd351ec37f5d479556cd48be4ca340da59b8f", + "rev": "c37ca420157f4abc31e26f436c1145f8951ff373", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-22.11", + "ref": "nixos-23.05", "repo": "nixpkgs", "type": "github" } }, "nixpkgs_2": { "locked": { - "lastModified": 1682519441, - "narHash": "sha256-Vsq/8NOtvW1AoC6shCBxRxZyMQ+LhvPuJT6ltbzuv+Y=", + "lastModified": 1690535733, + "narHash": "sha256-WgjUPscQOw3cB8yySDGlyzo6cZNihnRzUwE9kadv/5I=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "7a32a141db568abde9bc389845949dc2a454dfd3", + "rev": "8cacc05fbfffeaab910e8c2c9e2a7c6b32ce881a", "type": "github" }, "original": { @@ -228,11 +249,11 @@ "nixpkgs-stable": "nixpkgs-stable" }, "locked": { - "lastModified": 1678376203, - "narHash": "sha256-3tyYGyC8h7fBwncLZy5nCUjTJPrHbmNwp47LlNLOHSM=", + "lastModified": 1688056373, + "narHash": "sha256-2+SDlNRTKsgo3LBRiMUcoEUb6sDViRNQhzJquZ4koOI=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "1a20b9708962096ec2481eeb2ddca29ed747770a", + "rev": "5843cf069272d92b60c3ed9e55b7a8989c01d4c7", "type": "github" }, "original": { @@ -246,7 +267,7 @@ "devenv": "devenv", "nixpkgs": "nixpkgs_2", "rust-overlay": "rust-overlay", - "systems": "systems_2" + "systems": "systems_3" } }, "rust-overlay": { @@ -255,11 +276,11 @@ "nixpkgs": "nixpkgs_3" }, "locked": { - "lastModified": 1689302058, - "narHash": "sha256-yD74lcHTrw4niXcE9goJLbzsgyce48rQQoy5jK5ZK40=", + "lastModified": 1690510705, + "narHash": "sha256-6mjs3Gl9/xrseFh9iNcNq1u5yJ/MIoAmjoaG7SXZDIE=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "7b8dbbf4c67ed05a9bf3d9e658c12d4108bc24c8", + "rev": "851ae4c128905a62834d53ce7704ebc1ba481bea", "type": "github" }, "original": { @@ -297,6 +318,21 @@ "repo": "default", "type": "github" } + }, + "systems_3": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index bacb70f478a2..e70a41dfc2fa 100644 --- a/flake.nix +++ b/flake.nix @@ -39,8 +39,8 @@ { inputs = { - # Use the master/unstable branch of nixpkgs. The latest stable, 22.11, - # does not contain 'perl536Packages.NetAsyncHTTP', needed by Sytest. + # Use the master/unstable branch of nixpkgs. Used to fetch the latest + # available versions of packages. nixpkgs.url = "github:NixOS/nixpkgs/master"; # Output a development shell for x86_64/aarch64 Linux/Darwin (MacOS). systems.url = "github:nix-systems/default"; diff --git a/pyproject.toml b/pyproject.toml index 3866cac3393c..1ebf5e754e8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ manifest-path = "rust/Cargo.toml" [tool.poetry] name = "matrix-synapse" -version = "1.89.0" +version = "1.90.0" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "Apache-2.0" diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index c2f33258a4e3..6e1eab2a3b29 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -13,6 +13,9 @@ // limitations under the License. #![feature(test)] + +use std::borrow::Cow; + use synapse::push::{ evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, JsonValue, PushRules, SimpleJsonValue, @@ -26,15 +29,15 @@ fn bench_match_exact(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("m.text"))), ), ( "room_id".to_string(), - JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("!room:server"))), ), ( "content.body".to_string(), - JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("test message"))), ), ] .into_iter() @@ -71,15 +74,15 @@ fn bench_match_word(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("m.text"))), ), ( "room_id".to_string(), - JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("!room:server"))), ), ( "content.body".to_string(), - JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("test message"))), ), ] .into_iter() @@ -116,15 +119,15 @@ fn bench_match_word_miss(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("m.text"))), ), ( "room_id".to_string(), - JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("!room:server"))), ), ( "content.body".to_string(), - JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("test message"))), ), ] .into_iter() @@ -161,15 +164,15 @@ fn bench_eval_message(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("m.text"))), ), ( "room_id".to_string(), - JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("!room:server"))), ), ( "content.body".to_string(), - JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("test message"))), ), ] .into_iter() diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index 2ad73fcd42bd..2edb8a0d43a2 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -187,7 +187,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ priority_class: 5, conditions: Cow::Borrowed(&[Condition::Known( KnownCondition::ExactEventPropertyContainsType(EventPropertyIsTypeCondition { - key: Cow::Borrowed("content.m\\.mentions.user_ids"), + key: Cow::Borrowed(r"content.m\.mentions.user_ids"), value_type: Cow::Borrowed(&EventMatchPatternType::UserId), }), )]), @@ -208,8 +208,8 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ priority_class: 5, conditions: Cow::Borrowed(&[ Condition::Known(KnownCondition::EventPropertyIs(EventPropertyIsCondition { - key: Cow::Borrowed("content.m\\.mentions.room"), - value: Cow::Borrowed(&SimpleJsonValue::Bool(true)), + key: Cow::Borrowed(r"content.m\.mentions.room"), + value: Cow::Owned(SimpleJsonValue::Bool(true)), })), Condition::Known(KnownCondition::SenderNotificationPermission { key: Cow::Borrowed("room"), @@ -269,6 +269,21 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ default: true, default_enabled: true, }, + // We don't want to notify on edits *unless* the edit directly mentions a + // user, which is handled above. + PushRule { + rule_id: Cow::Borrowed("global/override/.org.matrix.msc3958.suppress_edits"), + priority_class: 5, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventPropertyIs( + EventPropertyIsCondition { + key: Cow::Borrowed(r"content.m\.relates_to.rel_type"), + value: Cow::Owned(SimpleJsonValue::Str(Cow::Borrowed("m.replace"))), + }, + ))]), + actions: Cow::Borrowed(&[]), + default: true, + default_enabled: true, + }, PushRule { rule_id: Cow::Borrowed("global/override/.org.matrix.msc3930.rule.poll_response"), priority_class: 5, diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index 59c53b1776c6..48e670478bf7 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -117,7 +117,7 @@ impl PushRuleEvaluator { msc3931_enabled: bool, ) -> Result { let body = match flattened_keys.get("content.body") { - Some(JsonValue::Value(SimpleJsonValue::Str(s))) => s.clone(), + Some(JsonValue::Value(SimpleJsonValue::Str(s))) => s.clone().into_owned(), _ => String::new(), }; @@ -313,13 +313,15 @@ impl PushRuleEvaluator { }; let pattern = match &*exact_event_match.value_type { - EventMatchPatternType::UserId => user_id, - EventMatchPatternType::UserLocalpart => get_localpart_from_id(user_id)?, + EventMatchPatternType::UserId => user_id.to_owned(), + EventMatchPatternType::UserLocalpart => { + get_localpart_from_id(user_id)?.to_owned() + } }; self.match_event_property_contains( exact_event_match.key.clone(), - Cow::Borrowed(&SimpleJsonValue::Str(pattern.to_string())), + Cow::Borrowed(&SimpleJsonValue::Str(Cow::Owned(pattern))), )? } KnownCondition::ContainsDisplayName => { @@ -494,7 +496,7 @@ fn push_rule_evaluator() { let mut flattened_keys = BTreeMap::new(); flattened_keys.insert( "content.body".to_string(), - JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("foo bar bob hello"))), ); let evaluator = PushRuleEvaluator::py_new( flattened_keys, @@ -522,7 +524,7 @@ fn test_requires_room_version_supports_condition() { let mut flattened_keys = BTreeMap::new(); flattened_keys.insert( "content.body".to_string(), - JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())), + JsonValue::Value(SimpleJsonValue::Str(Cow::Borrowed("foo bar bob hello"))), ); let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 514980579b63..829fb79d0e5b 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -256,7 +256,7 @@ impl<'de> Deserialize<'de> for Action { #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(untagged)] pub enum SimpleJsonValue { - Str(String), + Str(Cow<'static, str>), Int(i64), Bool(bool), Null, @@ -265,7 +265,7 @@ pub enum SimpleJsonValue { impl<'source> FromPyObject<'source> for SimpleJsonValue { fn extract(ob: &'source PyAny) -> PyResult { if let Ok(s) = ::try_from(ob) { - Ok(SimpleJsonValue::Str(s.to_string())) + Ok(SimpleJsonValue::Str(Cow::Owned(s.to_string()))) // A bool *is* an int, ensure we try bool first. } else if let Ok(b) = ::try_from(ob) { Ok(SimpleJsonValue::Bool(b.extract()?)) @@ -585,7 +585,7 @@ impl FilteredPushRules { } if !self.msc3958_suppress_edits_enabled - && rule.rule_id == "global/override/.com.beeper.suppress_edits" + && rule.rule_id == "global/override/.org.matrix.msc3958.suppress_edits" { return false; } diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 7c4aa0afa269..22c84fbd5b3f 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -761,7 +761,7 @@ def alter_table(txn: LoggingTransaction) -> None: # Step 2. Set up sequences # - # We do this before porting the tables so that event if we fail half + # We do this before porting the tables so that even if we fail half # way through the postgres DB always have sequences that are greater # than their respective tables. If we don't then creating the # `DataStore` object will fail due to the inconsistency. @@ -769,6 +769,10 @@ def alter_table(txn: LoggingTransaction) -> None: await self._setup_state_group_id_seq() await self._setup_user_id_seq() await self._setup_events_stream_seqs() + await self._setup_sequence( + "un_partial_stated_event_stream_sequence", + ("un_partial_stated_event_stream",), + ) await self._setup_sequence( "device_inbox_sequence", ("device_inbox", "device_federation_outbox") ) @@ -779,6 +783,11 @@ def alter_table(txn: LoggingTransaction) -> None: await self._setup_sequence("receipts_sequence", ("receipts_linearized",)) await self._setup_sequence("presence_stream_sequence", ("presence_stream",)) await self._setup_auth_chain_sequence() + await self._setup_sequence( + "application_services_txn_id_seq", + ("application_services_txns",), + "txn_id", + ) # Step 3. Get tables. self.progress.set_state("Fetching tables") @@ -1083,7 +1092,10 @@ def _setup_events_stream_seqs_set_pos(txn: LoggingTransaction) -> None: ) async def _setup_sequence( - self, sequence_name: str, stream_id_tables: Iterable[str] + self, + sequence_name: str, + stream_id_tables: Iterable[str], + column_name: str = "stream_id", ) -> None: """Set a sequence to the correct value.""" current_stream_ids = [] @@ -1093,7 +1105,7 @@ async def _setup_sequence( await self.sqlite_store.db_pool.simple_select_one_onecol( table=stream_id_table, keyvalues={}, - retcol="COALESCE(MAX(stream_id), 1)", + retcol=f"COALESCE(MAX({column_name}), 1)", allow_none=True, ), ) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 5fb3d5083da2..de7a94bf2643 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -16,9 +16,6 @@ import urllib.parse from typing import ( TYPE_CHECKING, - Any, - Awaitable, - Callable, Dict, Iterable, List, @@ -27,10 +24,11 @@ Sequence, Tuple, TypeVar, + Union, ) from prometheus_client import Counter -from typing_extensions import Concatenate, ParamSpec, TypeGuard +from typing_extensions import ParamSpec, TypeGuard from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind from synapse.api.errors import CodeMessageException, HttpResponseException @@ -80,9 +78,7 @@ HOUR_IN_MS = 60 * 60 * 1000 - APP_SERVICE_PREFIX = "/_matrix/app/v1" -APP_SERVICE_UNSTABLE_PREFIX = "/_matrix/app/unstable" P = ParamSpec("P") R = TypeVar("R") @@ -123,52 +119,12 @@ class ApplicationServiceApi(SimpleHttpClient): def __init__(self, hs: "HomeServer"): super().__init__(hs) self.clock = hs.get_clock() + self.config = hs.config.appservice self.protocol_meta_cache: ResponseCache[Tuple[str, str]] = ResponseCache( hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS ) - async def _send_with_fallbacks( - self, - service: "ApplicationService", - prefixes: List[str], - path: str, - func: Callable[Concatenate[str, P], Awaitable[R]], - *args: P.args, - **kwargs: P.kwargs, - ) -> R: - """ - Attempt to call an application service with multiple paths, falling back - until one succeeds. - - Args: - service: The appliacation service, this provides the base URL. - prefixes: A last of paths to try in order for the requests. - path: A suffix to append to each prefix. - func: The function to call, the first argument will be the full - endpoint to fetch. Other arguments are provided by args/kwargs. - - Returns: - The return value of func. - """ - for i, prefix in enumerate(prefixes, start=1): - uri = f"{service.url}{prefix}{path}" - try: - return await func(uri, *args, **kwargs) - except HttpResponseException as e: - # If an error is received that is due to an unrecognised path, - # fallback to next path (if one exists). Otherwise, consider it - # a legitimate error and raise. - if i < len(prefixes) and is_unknown_endpoint(e): - continue - raise - except Exception: - # Unexpected exceptions get sent to the caller. - raise - - # The function should always exit via the return or raise above this. - raise RuntimeError("Unexpected fallback behaviour. This should never be seen.") - async def query_user(self, service: "ApplicationService", user_id: str) -> bool: if service.url is None: return False @@ -177,12 +133,12 @@ async def query_user(self, service: "ApplicationService", user_id: str) -> bool: assert service.hs_token is not None try: - response = await self._send_with_fallbacks( - service, - [APP_SERVICE_PREFIX, ""], - f"/users/{urllib.parse.quote(user_id)}", - self.get_json, - {"access_token": service.hs_token}, + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} + response = await self.get_json( + f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}", + args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) if response is not None: # just an empty json object @@ -203,12 +159,12 @@ async def query_alias(self, service: "ApplicationService", alias: str) -> bool: assert service.hs_token is not None try: - response = await self._send_with_fallbacks( - service, - [APP_SERVICE_PREFIX, ""], - f"/rooms/{urllib.parse.quote(alias)}", - self.get_json, - {"access_token": service.hs_token}, + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} + response = await self.get_json( + f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}", + args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) if response is not None: # just an empty json object @@ -241,15 +197,14 @@ async def query_3pe( assert service.hs_token is not None try: - args: Mapping[Any, Any] = { - **fields, - b"access_token": service.hs_token, - } - response = await self._send_with_fallbacks( - service, - [APP_SERVICE_PREFIX, APP_SERVICE_UNSTABLE_PREFIX], - f"/thirdparty/{kind}/{urllib.parse.quote(protocol)}", - self.get_json, + args: Mapping[bytes, Union[List[bytes], str]] = fields + if self.config.use_appservice_legacy_authorization: + args = { + **fields, + b"access_token": service.hs_token, + } + response = await self.get_json( + f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}", args=args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) @@ -285,12 +240,12 @@ async def _get() -> Optional[JsonDict]: # This is required by the configuration. assert service.hs_token is not None try: - info = await self._send_with_fallbacks( - service, - [APP_SERVICE_PREFIX, APP_SERVICE_UNSTABLE_PREFIX], - f"/thirdparty/protocol/{urllib.parse.quote(protocol)}", - self.get_json, - {"access_token": service.hs_token}, + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} + info = await self.get_json( + f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}", + args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) @@ -401,13 +356,14 @@ async def push_bulk( } try: - await self._send_with_fallbacks( - service, - [APP_SERVICE_PREFIX, ""], - f"/transactions/{urllib.parse.quote(str(txn_id))}", - self.put_json, + args = None + if self.config.use_appservice_legacy_authorization: + args = {"access_token": service.hs_token} + + await self.put_json( + f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}", json_body=body, - args={"access_token": service.hs_token}, + args=args, headers={"Authorization": [f"Bearer {service.hs_token}"]}, ) if logger.isEnabledFor(logging.DEBUG): diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index c2710fdf0410..919f81a9b716 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -43,6 +43,14 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ) self.track_appservice_user_ips = config.get("track_appservice_user_ips", False) + self.use_appservice_legacy_authorization = config.get( + "use_appservice_legacy_authorization", False + ) + if self.use_appservice_legacy_authorization: + logger.warning( + "The use of appservice legacy authorization via query params is deprecated" + " and should be considered insecure." + ) def load_appservices( diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 7d6d41923361..21b634af3741 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -216,12 +216,6 @@ def check_config_conflicts(self, root: RootConfig) -> None: ("session_lifetime",), ) - if not root.experimental.msc3970_enabled: - raise ConfigError( - "experimental_features.msc3970_enabled must be 'true' when OAuth delegation is enabled", - ("experimental_features", "msc3970_enabled"), - ) - @attr.s(auto_attribs=True, frozen=True, slots=True) class MSC3866Config: @@ -410,9 +404,6 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: "Invalid MSC3861 configuration", ("experimental", "msc3861") ) from exc - # MSC3970: Scope transaction IDs to devices - self.msc3970_enabled = experimental.get("msc3970_enabled", self.msc3861.enabled) - # Check that none of the other config options conflict with MSC3861 when enabled self.msc3861.check_config_conflicts(self.root) diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 0e1cb8b6e30c..97636039b8ad 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -65,5 +65,23 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.max_long_retries = federation_config.get("max_long_retries", 10) self.max_short_retries = federation_config.get("max_short_retries", 3) + # Allow for the configuration of the backoff algorithm used + # when trying to reach an unavailable destination. + # Unlike previous configuration those values applies across + # multiple requests and the state of the backoff is stored on DB. + self.destination_min_retry_interval_ms = Config.parse_duration( + federation_config.get("destination_min_retry_interval", "10m") + ) + self.destination_retry_multiplier = federation_config.get( + "destination_retry_multiplier", 2 + ) + self.destination_max_retry_interval_ms = min( + Config.parse_duration( + federation_config.get("destination_max_retry_interval", "7d") + ), + # Set a hard-limit to not overflow the database column. + 2**62, + ) + _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}} diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index a43498ed4d51..a9e3d4e55689 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -186,9 +186,6 @@ async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: ), "app_service_id": self.app_service.id if self.app_service else None, "partial_state": self.partial_state, - # add dummy delta_ids and prev_group for backwards compatibility - "delta_ids": None, - "prev_group": None, } @staticmethod @@ -203,13 +200,6 @@ def deserialize(storage: "StorageControllers", input: JsonDict) -> "EventContext Returns: The event context. """ - # workaround for backwards/forwards compatibility: if the input doesn't have a value - # for "state_group_deltas" just assign an empty dict - state_group_deltas = input.get("state_group_deltas", None) - if state_group_deltas: - state_group_deltas = _decode_state_group_delta(state_group_deltas) - else: - state_group_deltas = {} context = EventContext( # We use the state_group and prev_state_id stuff to pull the @@ -217,7 +207,7 @@ def deserialize(storage: "StorageControllers", input: JsonDict) -> "EventContext storage=storage, state_group=input["state_group"], state_group_before_event=input["state_group_before_event"], - state_group_deltas=state_group_deltas, + state_group_deltas=_decode_state_group_delta(input["state_group_deltas"]), state_delta_due_to_event=_decode_state_dict( input["state_delta_due_to_event"] ), diff --git a/synapse/events/utils.py b/synapse/events/utils.py index fe05a74b4b51..e557ce6a0fea 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -394,7 +394,6 @@ def serialize_event( time_now_ms: int, *, config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG, - msc3970_enabled: bool = False, ) -> JsonDict: """Serialize event for clients @@ -402,8 +401,6 @@ def serialize_event( e time_now_ms config: Event serialization config - msc3970_enabled: Whether MSC3970 is enabled. It changes whether we should - include the `transaction_id` in the event's `unsigned` section. Returns: The serialized event dictionary. @@ -429,38 +426,46 @@ def serialize_event( e.unsigned["redacted_because"], time_now_ms, config=config, - msc3970_enabled=msc3970_enabled, ) # If we have a txn_id saved in the internal_metadata, we should include it in the # unsigned section of the event if it was sent by the same session as the one # requesting the event. txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None) - if txn_id is not None and config.requester is not None: - # For the MSC3970 rules to be applied, we *need* to have the device ID in the - # event internal metadata. Since we were not recording them before, if it hasn't - # been recorded, we fallback to the old behaviour. + if ( + txn_id is not None + and config.requester is not None + and config.requester.user.to_string() == e.sender + ): + # Some events do not have the device ID stored in the internal metadata, + # this includes old events as well as those created by appservice, guests, + # or with tokens minted with the admin API. For those events, fallback + # to using the access token instead. event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None) - if msc3970_enabled and event_device_id is not None: + if event_device_id is not None: if event_device_id == config.requester.device_id: d["unsigned"]["transaction_id"] = txn_id else: - # The pre-MSC3970 behaviour is to only include the transaction ID if the - # event was sent from the same access token. For regular users, we can use - # the access token ID to determine this. For guests, we can't, but since - # each guest only has one access token, we can just check that the event was - # sent by the same user as the one requesting the event. + # Fallback behaviour: only include the transaction ID if the event + # was sent from the same access token. + # + # For regular users, the access token ID can be used to determine this. + # This includes access tokens minted with the admin API. + # + # For guests and appservice users, we can't check the access token ID + # so assume it is the same session. event_token_id: Optional[int] = getattr( e.internal_metadata, "token_id", None ) - if config.requester.user.to_string() == e.sender and ( + if ( ( event_token_id is not None and config.requester.access_token_id is not None and event_token_id == config.requester.access_token_id ) or config.requester.is_guest + or config.requester.app_service ): d["unsigned"]["transaction_id"] = txn_id @@ -480,14 +485,16 @@ def serialize_event( if config.as_client_event: d = config.event_format(d) - # If the event is a redaction, copy the redacts field from the content to - # top-level for backwards compatibility. - if ( - e.type == EventTypes.Redaction - and e.room_version.updated_redaction_rules - and e.redacts is not None - ): - d["redacts"] = e.redacts + # If the event is a redaction, the field with the redacted event ID appears + # in a different location depending on the room version. e.redacts handles + # fetching from the proper location; copy it to the other location for forwards- + # and backwards-compatibility with clients. + if e.type == EventTypes.Redaction and e.redacts is not None: + if e.room_version.updated_redaction_rules: + d["redacts"] = e.redacts + else: + d["content"] = dict(d["content"]) + d["content"]["redacts"] = e.redacts only_event_fields = config.only_event_fields if only_event_fields: @@ -507,9 +514,6 @@ class EventClientSerializer: clients. """ - def __init__(self, *, msc3970_enabled: bool = False): - self._msc3970_enabled = msc3970_enabled - def serialize_event( self, event: Union[JsonDict, EventBase], @@ -534,9 +538,7 @@ def serialize_event( if not isinstance(event, EventBase): return event - serialized_event = serialize_event( - event, time_now, config=config, msc3970_enabled=self._msc3970_enabled - ) + serialized_event = serialize_event(event, time_now, config=config) # Check if there are any bundled aggregations to include with the event. if bundle_aggregations: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index fa61dd8c1092..a90d99c4d676 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -63,6 +63,7 @@ ) from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction +from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import ( make_deferred_yieldable, @@ -137,6 +138,7 @@ def __init__(self, hs: "HomeServer"): self._event_auth_handler = hs.get_event_auth_handler() self._room_member_handler = hs.get_room_member_handler() self._e2e_keys_handler = hs.get_e2e_keys_handler() + self._worker_lock_handler = hs.get_worker_locks_handler() self._state_storage_controller = hs.get_storage_controllers().state @@ -1236,9 +1238,18 @@ async def _process_incoming_pdus_in_room_inner( logger.info("handling received PDU in room %s: %s", room_id, event) try: with nested_logging_context(event.event_id): - await self._federation_event_handler.on_receive_pdu( - origin, event - ) + # We're taking out a lock within a lock, which could + # lead to deadlocks if we're not careful. However, it is + # safe on this occasion as we only ever take a write + # lock when deleting a room, which we would never do + # while holding the `_INBOUND_EVENT_HANDLING_LOCK_NAME` + # lock. + async with self._worker_lock_handler.acquire_read_write_lock( + DELETE_ROOM_LOCK_NAME, room_id, write=False + ): + await self._federation_event_handler.on_receive_pdu( + origin, event + ) except FederationError as e: # XXX: Ideally we'd inform the remote we failed to process # the event, but we can't return an error in the transaction diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index f3a713f5fa77..b7bf70a72db8 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -722,6 +722,22 @@ async def rehydrate_device( return {"success": True} + async def delete_dehydrated_device(self, user_id: str, device_id: str) -> None: + """ + Delete a stored dehydrated device. + + Args: + user_id: the user_id to delete the device from + device_id: id of the dehydrated device to delete + """ + success = await self.store.remove_dehydrated_device(user_id, device_id) + + if not success: + raise errors.NotFoundError() + + await self.delete_devices(user_id, [device_id]) + await self.store.delete_e2e_keys_by_device(user_id=user_id, device_id=device_id) + @wrap_as_background_process("_handle_new_device_update_async") async def _handle_new_device_update_async(self) -> None: """Called when we have a new local device list update that we need to diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 73b78b1d006a..61b2ea28d887 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -53,6 +53,7 @@ from synapse.events.utils import SerializeEventConfig, maybe_upsert_event_field from synapse.events.validator import EventValidator from synapse.handlers.directory import DirectoryHandler +from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME from synapse.logging import opentracing from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process @@ -485,6 +486,7 @@ def __init__(self, hs: "HomeServer"): self._events_shard_config = self.config.worker.events_shard_config self._instance_name = hs.get_instance_name() self._notifier = hs.get_notifier() + self._worker_lock_handler = hs.get_worker_locks_handler() self.room_prejoin_state_types = self.hs.config.api.room_prejoin_state @@ -559,8 +561,6 @@ def __init__(self, hs: "HomeServer"): expiry_ms=30 * 60 * 1000, ) - self._msc3970_enabled = hs.config.experimental.msc3970_enabled - async def create_event( self, requester: Requester, @@ -876,14 +876,13 @@ async def deduplicate_state_event( return prev_event return None - async def get_event_from_transaction( + async def get_event_id_from_transaction( self, requester: Requester, txn_id: str, room_id: str, - ) -> Optional[EventBase]: - """For the given transaction ID and room ID, check if there is a matching event. - If so, fetch it and return it. + ) -> Optional[str]: + """For the given transaction ID and room ID, check if there is a matching event ID. Args: requester: The requester making the request in the context of which we want @@ -892,12 +891,12 @@ async def get_event_from_transaction( room_id: The room ID. Returns: - An event if one could be found, None otherwise. + An event ID if one could be found, None otherwise. """ + existing_event_id = None - if self._msc3970_enabled and requester.device_id: - # When MSC3970 is enabled, we lookup for events sent by the same device first, - # and fallback to the old behaviour if none were found. + # According to the spec, transactions are scoped to a user's device ID. + if requester.device_id: existing_event_id = ( await self.store.get_event_id_from_transaction_id_and_device_id( room_id, @@ -907,10 +906,11 @@ async def get_event_from_transaction( ) ) if existing_event_id: - return await self.store.get_event(existing_event_id) + return existing_event_id - # Pre-MSC3970, we looked up for events that were sent by the same session by - # using the access token ID. + # Some requsters don't have device IDs (appservice, guests, and access + # tokens minted with the admin API), fallback to checking the access token + # ID, which should be close enough. if requester.access_token_id: existing_event_id = ( await self.store.get_event_id_from_transaction_id_and_token_id( @@ -920,9 +920,32 @@ async def get_event_from_transaction( txn_id, ) ) - if existing_event_id: - return await self.store.get_event(existing_event_id) + return existing_event_id + + async def get_event_from_transaction( + self, + requester: Requester, + txn_id: str, + room_id: str, + ) -> Optional[EventBase]: + """For the given transaction ID and room ID, check if there is a matching event. + If so, fetch it and return it. + + Args: + requester: The requester making the request in the context of which we want + to fetch the event. + txn_id: The transaction ID. + room_id: The room ID. + + Returns: + An event if one could be found, None otherwise. + """ + existing_event_id = await self.get_event_id_from_transaction( + requester, txn_id, room_id + ) + if existing_event_id: + return await self.store.get_event(existing_event_id) return None async def create_and_send_nonmember_event( @@ -1010,6 +1033,37 @@ async def create_and_send_nonmember_event( event.internal_metadata.stream_ordering, ) + async with self._worker_lock_handler.acquire_read_write_lock( + DELETE_ROOM_LOCK_NAME, room_id, write=False + ): + return await self._create_and_send_nonmember_event_locked( + requester=requester, + event_dict=event_dict, + allow_no_prev_events=allow_no_prev_events, + prev_event_ids=prev_event_ids, + state_event_ids=state_event_ids, + ratelimit=ratelimit, + txn_id=txn_id, + ignore_shadow_ban=ignore_shadow_ban, + outlier=outlier, + depth=depth, + ) + + async def _create_and_send_nonmember_event_locked( + self, + requester: Requester, + event_dict: dict, + allow_no_prev_events: bool = False, + prev_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, + ratelimit: bool = True, + txn_id: Optional[str] = None, + ignore_shadow_ban: bool = False, + outlier: bool = False, + depth: Optional[int] = None, + ) -> Tuple[EventBase, int]: + room_id = event_dict["room_id"] + # If we don't have any prev event IDs specified then we need to # check that the host is in the room (as otherwise populating the # prev events will fail), at which point we may as well check the @@ -1941,7 +1995,10 @@ async def _send_dummy_events_to_fill_extremities(self) -> None: ) for room_id in room_ids: - dummy_event_sent = await self._send_dummy_event_for_room(room_id) + async with self._worker_lock_handler.acquire_read_write_lock( + DELETE_ROOM_LOCK_NAME, room_id, write=False + ): + dummy_event_sent = await self._send_dummy_event_for_room(room_id) if not dummy_event_sent: # Did not find a valid user in the room, so remove from future attempts diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index ea98e7e26e8a..5c85d9b2f02e 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -50,6 +50,11 @@ BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3 +PURGE_HISTORY_LOCK_NAME = "purge_history_lock" + +DELETE_ROOM_LOCK_NAME = "delete_room_lock" + + @attr.s(slots=True, auto_attribs=True) class PurgeStatus: """Object tracking the status of a purge request @@ -146,6 +151,7 @@ def __init__(self, hs: "HomeServer"): self._server_name = hs.hostname self._room_shutdown_handler = hs.get_room_shutdown_handler() self._relations_handler = hs.get_relations_handler() + self._worker_locks = hs.get_worker_locks_handler() self.pagination_lock = ReadWriteLock() # IDs of rooms in which there currently an active purge *or delete* operation. @@ -360,7 +366,9 @@ async def _purge_history( """ self._purges_in_progress_by_room.add(room_id) try: - async with self.pagination_lock.write(room_id): + async with self._worker_locks.acquire_read_write_lock( + PURGE_HISTORY_LOCK_NAME, room_id, write=True + ): await self._storage_controllers.purge_events.purge_history( room_id, token, delete_local_events ) @@ -418,7 +426,10 @@ async def purge_room(self, room_id: str, force: bool = False) -> None: """ logger.info(f"[purge room] purging {room_id}, force={force}") purge_start = time.time() - async with self.pagination_lock.write(room_id): + async with self._worker_locks.acquire_multi_read_write_lock( + [(PURGE_HISTORY_LOCK_NAME, room_id), (DELETE_ROOM_LOCK_NAME, room_id)], + write=True, + ): # first check that we have no users in this room if not force: joined = await self.store.is_host_joined(room_id, self._server_name) @@ -483,7 +494,9 @@ async def get_messages( room_token = from_token.room_key - async with self.pagination_lock.read(room_id): + async with self._worker_locks.acquire_read_write_lock( + PURGE_HISTORY_LOCK_NAME, room_id, write=False + ): (membership, member_event_id) = (None, None) if not use_admin_priviledge: ( @@ -759,7 +772,9 @@ async def _shutdown_and_purge_room( self._purges_in_progress_by_room.add(room_id) try: - async with self.pagination_lock.write(room_id): + async with self._worker_locks.acquire_read_write_lock( + PURGE_HISTORY_LOCK_NAME, room_id, write=True + ): self._delete_by_id[delete_id].status = DeleteStatus.STATUS_SHUTTING_DOWN self._delete_by_id[ delete_id diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index a7f8c5e636f8..c2109036ec38 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -68,7 +68,7 @@ async def get_profile(self, user_id: str, ignore_backoff: bool = True) -> JsonDi if self.hs.is_mine(target_user): profileinfo = await self.store.get_profileinfo(target_user) - if profileinfo.display_name is None: + if profileinfo.display_name is None and profileinfo.avatar_url is None: raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND) return { @@ -163,7 +163,7 @@ async def set_displayname( 400, "Displayname is too long (max %i)" % (MAX_DISPLAYNAME_LEN,) ) - displayname_to_set: Optional[str] = new_displayname + displayname_to_set: Optional[str] = new_displayname.strip() if new_displayname == "": displayname_to_set = None diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 496e701f13d3..e3cdf2bc6169 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -39,6 +39,7 @@ from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler +from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME from synapse.logging import opentracing from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process @@ -94,6 +95,7 @@ def __init__(self, hs: "HomeServer"): self.event_creation_handler = hs.get_event_creation_handler() self.account_data_handler = hs.get_account_data_handler() self.event_auth_handler = hs.get_event_auth_handler() + self._worker_lock_handler = hs.get_worker_locks_handler() self.member_linearizer: Linearizer = Linearizer(name="member") self.member_as_limiter = Linearizer(max_count=10, name="member_as_limiter") @@ -174,8 +176,6 @@ def __init__(self, hs: "HomeServer"): self.request_ratelimiter = hs.get_request_ratelimiter() hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room) - self._msc3970_enabled = hs.config.experimental.msc3970_enabled - def _on_user_joined_room(self, event_id: str, room_id: str) -> None: """Notify the rate limiter that a room join has occurred. @@ -416,29 +416,11 @@ async def _local_membership_update( # do this check just before we persist an event as well, but may as well # do it up front for efficiency.) if txn_id: - existing_event_id = None - if self._msc3970_enabled and requester.device_id: - # When MSC3970 is enabled, we lookup for events sent by the same device - # first, and fallback to the old behaviour if none were found. - existing_event_id = ( - await self.store.get_event_id_from_transaction_id_and_device_id( - room_id, - requester.user.to_string(), - requester.device_id, - txn_id, - ) + existing_event_id = ( + await self.event_creation_handler.get_event_id_from_transaction( + requester, txn_id, room_id ) - - if requester.access_token_id and not existing_event_id: - existing_event_id = ( - await self.store.get_event_id_from_transaction_id_and_token_id( - room_id, - requester.user.to_string(), - requester.access_token_id, - txn_id, - ) - ) - + ) if existing_event_id: event_pos = await self.store.get_position_for_event(existing_event_id) return existing_event_id, event_pos.stream @@ -638,26 +620,29 @@ async def update_membership( # by application services), and then by room ID. async with self.member_as_limiter.queue(as_id): async with self.member_linearizer.queue(key): - with opentracing.start_active_span("update_membership_locked"): - result = await self.update_membership_locked( - requester, - target, - room_id, - action, - txn_id=txn_id, - remote_room_hosts=remote_room_hosts, - third_party_signed=third_party_signed, - ratelimit=ratelimit, - content=content, - new_room=new_room, - require_consent=require_consent, - outlier=outlier, - allow_no_prev_events=allow_no_prev_events, - prev_event_ids=prev_event_ids, - state_event_ids=state_event_ids, - depth=depth, - origin_server_ts=origin_server_ts, - ) + async with self._worker_lock_handler.acquire_read_write_lock( + DELETE_ROOM_LOCK_NAME, room_id, write=False + ): + with opentracing.start_active_span("update_membership_locked"): + result = await self.update_membership_locked( + requester, + target, + room_id, + action, + txn_id=txn_id, + remote_room_hosts=remote_room_hosts, + third_party_signed=third_party_signed, + ratelimit=ratelimit, + content=content, + new_room=new_room, + require_consent=require_consent, + outlier=outlier, + allow_no_prev_events=allow_no_prev_events, + prev_event_ids=prev_event_ids, + state_event_ids=state_event_ids, + depth=depth, + origin_server_ts=origin_server_ts, + ) return result diff --git a/synapse/handlers/worker_lock.py b/synapse/handlers/worker_lock.py new file mode 100644 index 000000000000..72df773a8623 --- /dev/null +++ b/synapse/handlers/worker_lock.py @@ -0,0 +1,333 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from types import TracebackType +from typing import ( + TYPE_CHECKING, + AsyncContextManager, + Collection, + Dict, + Optional, + Tuple, + Type, + Union, +) +from weakref import WeakSet + +import attr + +from twisted.internet import defer +from twisted.internet.interfaces import IReactorTime + +from synapse.logging.context import PreserveLoggingContext +from synapse.logging.opentracing import start_active_span +from synapse.metrics.background_process_metrics import wrap_as_background_process +from synapse.storage.databases.main.lock import Lock, LockStore +from synapse.util.async_helpers import timeout_deferred + +if TYPE_CHECKING: + from synapse.logging.opentracing import opentracing + from synapse.server import HomeServer + + +DELETE_ROOM_LOCK_NAME = "delete_room_lock" + + +class WorkerLocksHandler: + """A class for waiting on taking out locks, rather than using the storage + functions directly (which don't support awaiting). + """ + + def __init__(self, hs: "HomeServer") -> None: + self._reactor = hs.get_reactor() + self._store = hs.get_datastores().main + self._clock = hs.get_clock() + self._notifier = hs.get_notifier() + self._instance_name = hs.get_instance_name() + + # Map from lock name/key to set of `WaitingLock` that are active for + # that lock. + self._locks: Dict[ + Tuple[str, str], WeakSet[Union[WaitingLock, WaitingMultiLock]] + ] = {} + + self._clock.looping_call(self._cleanup_locks, 30_000) + + self._notifier.add_lock_released_callback(self._on_lock_released) + + def acquire_lock(self, lock_name: str, lock_key: str) -> "WaitingLock": + """Acquire a standard lock, returns a context manager that will block + until the lock is acquired. + + Note: Care must be taken to avoid deadlocks. In particular, this + function does *not* timeout. + + Usage: + async with handler.acquire_lock(name, key): + # Do work while holding the lock... + """ + + lock = WaitingLock( + reactor=self._reactor, + store=self._store, + handler=self, + lock_name=lock_name, + lock_key=lock_key, + write=None, + ) + + self._locks.setdefault((lock_name, lock_key), WeakSet()).add(lock) + + return lock + + def acquire_read_write_lock( + self, + lock_name: str, + lock_key: str, + *, + write: bool, + ) -> "WaitingLock": + """Acquire a read/write lock, returns a context manager that will block + until the lock is acquired. + + Note: Care must be taken to avoid deadlocks. In particular, this + function does *not* timeout. + + Usage: + async with handler.acquire_read_write_lock(name, key, write=True): + # Do work while holding the lock... + """ + + lock = WaitingLock( + reactor=self._reactor, + store=self._store, + handler=self, + lock_name=lock_name, + lock_key=lock_key, + write=write, + ) + + self._locks.setdefault((lock_name, lock_key), WeakSet()).add(lock) + + return lock + + def acquire_multi_read_write_lock( + self, + lock_names: Collection[Tuple[str, str]], + *, + write: bool, + ) -> "WaitingMultiLock": + """Acquires multi read/write locks at once, returns a context manager + that will block until all the locks are acquired. + + This will try and acquire all locks at once, and will never hold on to a + subset of the locks. (This avoids accidentally creating deadlocks). + + Note: Care must be taken to avoid deadlocks. In particular, this + function does *not* timeout. + """ + + lock = WaitingMultiLock( + lock_names=lock_names, + write=write, + reactor=self._reactor, + store=self._store, + handler=self, + ) + + for lock_name, lock_key in lock_names: + self._locks.setdefault((lock_name, lock_key), WeakSet()).add(lock) + + return lock + + def notify_lock_released(self, lock_name: str, lock_key: str) -> None: + """Notify that a lock has been released. + + Pokes both the notifier and replication. + """ + + self._notifier.notify_lock_released(self._instance_name, lock_name, lock_key) + + def _on_lock_released( + self, instance_name: str, lock_name: str, lock_key: str + ) -> None: + """Called when a lock has been released. + + Wakes up any locks that might be waiting on this. + """ + locks = self._locks.get((lock_name, lock_key)) + if not locks: + return + + def _wake_deferred(deferred: defer.Deferred) -> None: + if not deferred.called: + deferred.callback(None) + + for lock in locks: + self._clock.call_later(0, _wake_deferred, lock.deferred) + + @wrap_as_background_process("_cleanup_locks") + async def _cleanup_locks(self) -> None: + """Periodically cleans out stale entries in the locks map""" + self._locks = {key: value for key, value in self._locks.items() if value} + + +@attr.s(auto_attribs=True, eq=False) +class WaitingLock: + reactor: IReactorTime + store: LockStore + handler: WorkerLocksHandler + lock_name: str + lock_key: str + write: Optional[bool] + deferred: "defer.Deferred[None]" = attr.Factory(defer.Deferred) + _inner_lock: Optional[Lock] = None + _retry_interval: float = 0.1 + _lock_span: "opentracing.Scope" = attr.Factory( + lambda: start_active_span("WaitingLock.lock") + ) + + async def __aenter__(self) -> None: + self._lock_span.__enter__() + + with start_active_span("WaitingLock.waiting_for_lock"): + while self._inner_lock is None: + self.deferred = defer.Deferred() + + if self.write is not None: + lock = await self.store.try_acquire_read_write_lock( + self.lock_name, self.lock_key, write=self.write + ) + else: + lock = await self.store.try_acquire_lock( + self.lock_name, self.lock_key + ) + + if lock: + self._inner_lock = lock + break + + try: + # Wait until the we get notified the lock might have been + # released (by the deferred being resolved). We also + # periodically wake up in case the lock was released but we + # weren't notified. + with PreserveLoggingContext(): + await timeout_deferred( + deferred=self.deferred, + timeout=self._get_next_retry_interval(), + reactor=self.reactor, + ) + except Exception: + pass + + return await self._inner_lock.__aenter__() + + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + tb: Optional[TracebackType], + ) -> Optional[bool]: + assert self._inner_lock + + self.handler.notify_lock_released(self.lock_name, self.lock_key) + + try: + r = await self._inner_lock.__aexit__(exc_type, exc, tb) + finally: + self._lock_span.__exit__(exc_type, exc, tb) + + return r + + def _get_next_retry_interval(self) -> float: + next = self._retry_interval + self._retry_interval = max(5, next * 2) + return next * random.uniform(0.9, 1.1) + + +@attr.s(auto_attribs=True, eq=False) +class WaitingMultiLock: + lock_names: Collection[Tuple[str, str]] + + write: bool + + reactor: IReactorTime + store: LockStore + handler: WorkerLocksHandler + + deferred: "defer.Deferred[None]" = attr.Factory(defer.Deferred) + + _inner_lock_cm: Optional[AsyncContextManager] = None + _retry_interval: float = 0.1 + _lock_span: "opentracing.Scope" = attr.Factory( + lambda: start_active_span("WaitingLock.lock") + ) + + async def __aenter__(self) -> None: + self._lock_span.__enter__() + + with start_active_span("WaitingLock.waiting_for_lock"): + while self._inner_lock_cm is None: + self.deferred = defer.Deferred() + + lock_cm = await self.store.try_acquire_multi_read_write_lock( + self.lock_names, write=self.write + ) + + if lock_cm: + self._inner_lock_cm = lock_cm + break + + try: + # Wait until the we get notified the lock might have been + # released (by the deferred being resolved). We also + # periodically wake up in case the lock was released but we + # weren't notified. + with PreserveLoggingContext(): + await timeout_deferred( + deferred=self.deferred, + timeout=self._get_next_retry_interval(), + reactor=self.reactor, + ) + except Exception: + pass + + assert self._inner_lock_cm + await self._inner_lock_cm.__aenter__() + return + + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + tb: Optional[TracebackType], + ) -> Optional[bool]: + assert self._inner_lock_cm + + for lock_name, lock_key in self.lock_names: + self.handler.notify_lock_released(lock_name, lock_key) + + try: + r = await self._inner_lock_cm.__aexit__(exc_type, exc, tb) + finally: + self._lock_span.__exit__(exc_type, exc, tb) + + return r + + def _get_next_retry_interval(self) -> float: + next = self._retry_interval + self._retry_interval = max(5, next * 2) + return next * random.uniform(0.9, 1.1) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 95f780011153..acee1dafd3ae 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -34,6 +34,7 @@ from typing_extensions import ParamSpec from twisted.internet import defer +from twisted.internet.interfaces import IDelayedCall from twisted.web.resource import Resource from synapse.api import errors @@ -1230,6 +1231,58 @@ def looping_background_call( f, ) + def should_run_background_tasks(self) -> bool: + """ + Return true if and only if the current worker is configured to run + background tasks. + There should only be one worker configured to run background tasks, so + this is helpful when you need to only run a task on one worker but don't + have any other good way to choose which one. + + Added in Synapse v1.89.0. + """ + return self._hs.config.worker.run_background_tasks + + def delayed_background_call( + self, + msec: float, + f: Callable, + *args: object, + desc: Optional[str] = None, + **kwargs: object, + ) -> IDelayedCall: + """Wraps a function as a background process and calls it in a given number of milliseconds. + + The scheduled call is not persistent: if the current Synapse instance is + restarted before the call is made, the call will not be made. + + Added in Synapse v1.90.0. + + Args: + msec: How long to wait before calling, in milliseconds. + f: The function to call once. f can be either synchronous or + asynchronous, and must follow Synapse's logcontext rules. + More info about logcontexts is available at + https://matrix-org.github.io/synapse/latest/log_contexts.html + *args: Positional arguments to pass to function. + desc: The background task's description. Default to the function's name. + **kwargs: Keyword arguments to pass to function. + + Returns: + IDelayedCall handle from twisted, which allows to cancel the delayed call if desired. + """ + + if desc is None: + desc = f.__name__ + + return self._clock.call_later( + # convert ms to seconds as needed by call_later. + msec * 0.001, + run_as_background_process, + desc, + lambda: maybe_awaitable(f(*args, **kwargs)), + ) + async def sleep(self, seconds: float) -> None: """Sleeps for the given number of seconds. diff --git a/synapse/notifier.py b/synapse/notifier.py index 897272ad5be2..68115bca7061 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -234,6 +234,9 @@ def __init__(self, hs: "HomeServer"): self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules + # List of callbacks to be notified when a lock is released + self._lock_released_callback: List[Callable[[str, str, str], None]] = [] + self.clock = hs.get_clock() self.appservice_handler = hs.get_application_service_handler() self._pusher_pool = hs.get_pusherpool() @@ -785,6 +788,19 @@ def notify_remote_server_up(self, server: str) -> None: # that any in flight requests can be immediately retried. self._federation_client.wake_destination(server) + def add_lock_released_callback( + self, callback: Callable[[str, str, str], None] + ) -> None: + """Add a function to be called whenever we are notified about a released lock.""" + self._lock_released_callback.append(callback) + + def notify_lock_released( + self, instance_name: str, lock_name: str, lock_key: str + ) -> None: + """Notify the callbacks that a lock has been released.""" + for cb in self._lock_released_callback: + cb(instance_name, lock_name, lock_key) + @attr.s(auto_attribs=True) class ReplicationNotifier: diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py index f874f072f901..73f3de364205 100644 --- a/synapse/replication/http/devices.py +++ b/synapse/replication/http/devices.py @@ -107,8 +107,7 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint): Calls to e2e_keys_handler.upload_keys_for_user(user_id, device_id, keys) on the main process to accomplish this. - Defined in https://spec.matrix.org/v1.4/client-server-api/#post_matrixclientv3keysupload - Request format(borrowed and expanded from KeyUploadServlet): + Request format for this endpoint (borrowed and expanded from KeyUploadServlet): POST /_synapse/replication/upload_keys_for_user @@ -117,6 +116,7 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint): "device_id": "", "keys": { ....this part can be found in KeyUploadServlet in rest/client/keys.py.... + or as defined in https://spec.matrix.org/v1.4/client-server-api/#post_matrixclientv3keysupload } } diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 32f52e54d8c7..10f5c98ff8a9 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -422,6 +422,36 @@ class RemoteServerUpCommand(_SimpleCommand): NAME = "REMOTE_SERVER_UP" +class LockReleasedCommand(Command): + """Sent to inform other instances that a given lock has been dropped. + + Format:: + + LOCK_RELEASED ["", "", ""] + """ + + NAME = "LOCK_RELEASED" + + def __init__( + self, + instance_name: str, + lock_name: str, + lock_key: str, + ): + self.instance_name = instance_name + self.lock_name = lock_name + self.lock_key = lock_key + + @classmethod + def from_line(cls: Type["LockReleasedCommand"], line: str) -> "LockReleasedCommand": + instance_name, lock_name, lock_key = json_decoder.decode(line) + + return cls(instance_name, lock_name, lock_key) + + def to_line(self) -> str: + return json_encoder.encode([self.instance_name, self.lock_name, self.lock_key]) + + _COMMANDS: Tuple[Type[Command], ...] = ( ServerCommand, RdataCommand, @@ -435,6 +465,7 @@ class RemoteServerUpCommand(_SimpleCommand): UserIpCommand, RemoteServerUpCommand, ClearUserSyncsCommand, + LockReleasedCommand, ) # Map of command name to command type. @@ -448,6 +479,7 @@ class RemoteServerUpCommand(_SimpleCommand): ErrorCommand.NAME, PingCommand.NAME, RemoteServerUpCommand.NAME, + LockReleasedCommand.NAME, ) # The commands the client is allowed to send @@ -461,6 +493,7 @@ class RemoteServerUpCommand(_SimpleCommand): UserIpCommand.NAME, ErrorCommand.NAME, RemoteServerUpCommand.NAME, + LockReleasedCommand.NAME, ) diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index b216c69fc5ef..4111552a95b3 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -39,6 +39,7 @@ ClearUserSyncsCommand, Command, FederationAckCommand, + LockReleasedCommand, PositionCommand, RdataCommand, RemoteServerUpCommand, @@ -248,6 +249,9 @@ def __init__(self, hs: "HomeServer"): if self._is_master or self._should_insert_client_ips: self.subscribe_to_channel("USER_IP") + if hs.config.redis.redis_enabled: + self._notifier.add_lock_released_callback(self.on_lock_released) + def subscribe_to_channel(self, channel_name: str) -> None: """ Indicates that we wish to subscribe to a Redis channel by name. @@ -650,6 +654,17 @@ def on_REMOTE_SERVER_UP( self._notifier.notify_remote_server_up(cmd.data) + def on_LOCK_RELEASED( + self, conn: IReplicationConnection, cmd: LockReleasedCommand + ) -> None: + """Called when we get a new LOCK_RELEASED command.""" + if cmd.instance_name == self._instance_name: + return + + self._notifier.notify_lock_released( + cmd.instance_name, cmd.lock_name, cmd.lock_key + ) + def new_connection(self, connection: IReplicationConnection) -> None: """Called when we have a new connection.""" self._connections.append(connection) @@ -756,6 +771,13 @@ def stream_update(self, stream_name: str, token: Optional[int], data: Any) -> No """ self.send_command(RdataCommand(stream_name, self._instance_name, token, data)) + def on_lock_released( + self, instance_name: str, lock_name: str, lock_key: str + ) -> None: + """Called when we released a lock and should notify other instances.""" + if instance_name == self._instance_name: + self.send_command(LockReleasedCommand(instance_name, lock_name, lock_key)) + UpdateToken = TypeVar("UpdateToken") UpdateRow = TypeVar("UpdateRow") diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py index 690d2ec406fc..51f17f80da53 100644 --- a/synapse/rest/client/devices.py +++ b/synapse/rest/client/devices.py @@ -232,7 +232,7 @@ class Config: class DehydratedDeviceServlet(RestServlet): """Retrieve or store a dehydrated device. - Implements either MSC2697 or MSC3814. + Implements MSC2697. GET /org.matrix.msc2697.v2/dehydrated_device @@ -266,7 +266,12 @@ class DehydratedDeviceServlet(RestServlet): """ - def __init__(self, hs: "HomeServer", msc2697: bool = True): + PATTERNS = client_patterns( + "/org.matrix.msc2697.v2/dehydrated_device$", + releases=(), + ) + + def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() @@ -274,13 +279,6 @@ def __init__(self, hs: "HomeServer", msc2697: bool = True): assert isinstance(handler, DeviceHandler) self.device_handler = handler - self.PATTERNS = client_patterns( - "/org.matrix.msc2697.v2/dehydrated_device$" - if msc2697 - else "/org.matrix.msc3814.v1/dehydrated_device$", - releases=(), - ) - async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) dehydrated_device = await self.device_handler.get_dehydrated_device( @@ -513,10 +511,8 @@ async def on_DELETE(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if dehydrated_device is not None: (device_id, device_data) = dehydrated_device - result = await self.device_handler.rehydrate_device( - requester.user.to_string(), - self.auth.get_access_token_from_request(request), - device_id, + await self.device_handler.delete_dehydrated_device( + requester.user.to_string(), device_id ) result = {"device_id": device_id} @@ -538,6 +534,14 @@ async def on_PUT(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user_id = requester.user.to_string() + old_dehydrated_device = await self.device_handler.get_dehydrated_device(user_id) + + # if an old device exists, delete it before creating a new one + if old_dehydrated_device: + await self.device_handler.delete_dehydrated_device( + user_id, old_dehydrated_device[0] + ) + device_info = submission.dict() if "device_keys" not in device_info.keys(): raise SynapseError( @@ -573,7 +577,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: if hs.config.worker.worker_app is None: DeviceRestServlet(hs).register(http_server) if hs.config.experimental.msc2697_enabled: - DehydratedDeviceServlet(hs, msc2697=True).register(http_server) + DehydratedDeviceServlet(hs).register(http_server) ClaimDehydratedDeviceServlet(hs).register(http_server) if hs.config.experimental.msc3814_enabled: DehydratedDeviceV2Servlet(hs).register(http_server) diff --git a/synapse/rest/client/room_upgrade_rest_servlet.py b/synapse/rest/client/room_upgrade_rest_servlet.py index 6a7792e18b2e..4a5d9e13e736 100644 --- a/synapse/rest/client/room_upgrade_rest_servlet.py +++ b/synapse/rest/client/room_upgrade_rest_servlet.py @@ -17,6 +17,7 @@ from synapse.api.errors import Codes, ShadowBanError, SynapseError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME from synapse.http.server import HttpServer from synapse.http.servlet import ( RestServlet, @@ -60,6 +61,7 @@ def __init__(self, hs: "HomeServer"): self._hs = hs self._room_creation_handler = hs.get_room_creation_handler() self._auth = hs.get_auth() + self._worker_lock_handler = hs.get_worker_locks_handler() async def on_POST( self, request: SynapseRequest, room_id: str @@ -78,9 +80,12 @@ async def on_POST( ) try: - new_room_id = await self._room_creation_handler.upgrade_room( - requester, room_id, new_version - ) + async with self._worker_lock_handler.acquire_read_write_lock( + DELETE_ROOM_LOCK_NAME, room_id, write=False + ): + new_room_id = await self._room_creation_handler.upgrade_room( + requester, room_id, new_version + ) except ShadowBanError: # Generate a random room ID. new_room_id = stringutils.random_string(18) diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py index 0d8a63d8beda..3d814c404d98 100644 --- a/synapse/rest/client/transactions.py +++ b/synapse/rest/client/transactions.py @@ -50,8 +50,6 @@ def __init__(self, hs: "HomeServer"): # for at *LEAST* 30 mins, and at *MOST* 60 mins. self.cleaner = self.clock.looping_call(self._cleanup, CLEANUP_PERIOD_MS) - self._msc3970_enabled = hs.config.experimental.msc3970_enabled - def _get_transaction_key(self, request: IRequest, requester: Requester) -> Hashable: """A helper function which returns a transaction key that can be used with TransactionCache for idempotent requests. @@ -78,18 +76,20 @@ def _get_transaction_key(self, request: IRequest, requester: Requester) -> Hasha elif requester.app_service is not None: return (path, "appservice", requester.app_service.id) - # With MSC3970, we use the user ID and device ID as the transaction key - elif self._msc3970_enabled: + # Use the user ID and device ID as the transaction key. + elif requester.device_id: assert requester.user, "Requester must have a user" assert requester.device_id, "Requester must have a device_id" return (path, "user", requester.user, requester.device_id) - # Otherwise, the pre-MSC3970 behaviour is to use the access token ID + # Some requsters don't have device IDs, these are mostly handled above + # (appservice and guest users), but does not cover access tokens minted + # by the admin API. Use the access token ID instead. else: assert ( requester.access_token_id is not None ), "Requester must have an access_token_id" - return (path, "user", requester.access_token_id) + return (path, "user_admin", requester.access_token_id) def fetch_or_execute_request( self, diff --git a/synapse/server.py b/synapse/server.py index 3ad4402de250..d8a46056efd0 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -107,6 +107,7 @@ from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler from synapse.handlers.user_directory import UserDirectoryHandler +from synapse.handlers.worker_lock import WorkerLocksHandler from synapse.http.client import ( InsecureInterceptableContextFactory, ReplicationClient, @@ -785,9 +786,7 @@ def get_oidc_handler(self) -> "OidcHandler": @cache_in_self def get_event_client_serializer(self) -> EventClientSerializer: - return EventClientSerializer( - msc3970_enabled=self.config.experimental.msc3970_enabled - ) + return EventClientSerializer() @cache_in_self def get_password_policy_handler(self) -> PasswordPolicyHandler: @@ -917,3 +916,7 @@ def get_request_ratelimiter(self) -> RequestRatelimiter: def get_common_usage_metrics_manager(self) -> CommonUsageMetricsManager: """Usage metrics shared between phone home stats and the prometheus exporter.""" return CommonUsageMetricsManager(self) + + @cache_in_self + def get_worker_locks_handler(self) -> WorkerLocksHandler: + return WorkerLocksHandler(self) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 35c068036599..35cd1089d6c9 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -45,6 +45,7 @@ from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext +from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable from synapse.logging.opentracing import ( SynapseTags, @@ -338,6 +339,7 @@ def __init__( ) self._state_resolution_handler = hs.get_state_resolution_handler() self._state_controller = state_controller + self.hs = hs async def _process_event_persist_queue_task( self, @@ -350,15 +352,22 @@ async def _process_event_persist_queue_task( A dictionary of event ID to event ID we didn't persist as we already had another event persisted with the same TXN ID. """ - if isinstance(task, _PersistEventsTask): - return await self._persist_event_batch(room_id, task) - elif isinstance(task, _UpdateCurrentStateTask): - await self._update_current_state(room_id, task) - return {} - else: - raise AssertionError( - f"Found an unexpected task type in event persistence queue: {task}" - ) + + # Ensure that the room can't be deleted while we're persisting events to + # it. We might already have taken out the lock, but since this is just a + # "read" lock its inherently reentrant. + async with self.hs.get_worker_locks_handler().acquire_read_write_lock( + DELETE_ROOM_LOCK_NAME, room_id, write=False + ): + if isinstance(task, _PersistEventsTask): + return await self._persist_event_batch(room_id, task) + elif isinstance(task, _UpdateCurrentStateTask): + await self._update_current_state(room_id, task) + return {} + else: + raise AssertionError( + f"Found an unexpected task type in event persistence queue: {task}" + ) @trace async def persist_events( diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 4f919a8b25b4..70e3b22bcc6b 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Tuple from synapse.api.constants import EventTypes +from synapse.config._base import Config +from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.tcp.streams import BackfillStream, CachesStream from synapse.replication.tcp.streams.events import ( EventsStream, @@ -52,6 +54,21 @@ # As above, but for invalidating room caches on room deletion DELETE_ROOM_CACHE_NAME = "dr_cache_fake" +# How long between cache invalidation table cleanups, once we have caught up +# with the backlog. +REGULAR_CLEANUP_INTERVAL_MS = Config.parse_duration("1h") + +# How long between cache invalidation table cleanups, before we have caught +# up with the backlog. +CATCH_UP_CLEANUP_INTERVAL_MS = Config.parse_duration("1m") + +# Maximum number of cache invalidation rows to delete at once. +CLEAN_UP_MAX_BATCH_SIZE = 20_000 + +# Keep cache invalidations for 7 days +# (This is likely to be quite excessive.) +RETENTION_PERIOD_OF_CACHE_INVALIDATIONS_MS = Config.parse_duration("7d") + class CacheInvalidationWorkerStore(SQLBaseStore): def __init__( @@ -98,6 +115,18 @@ def __init__( else: self._cache_id_gen = None + # Occasionally clean up the cache invalidations stream table by deleting + # old rows. + # This is only applicable when Postgres is in use; this table is unused + # and not populated at all when SQLite is the active database engine. + if hs.config.worker.run_background_tasks and isinstance( + self.database_engine, PostgresEngine + ): + self.hs.get_clock().call_later( + CATCH_UP_CLEANUP_INTERVAL_MS / 1000, + self._clean_up_cache_invalidation_wrapper, + ) + async def get_all_updated_caches( self, instance_name: str, last_id: int, current_id: int, limit: int ) -> Tuple[List[Tuple[int, tuple]], int, bool]: @@ -562,3 +591,104 @@ def get_cache_stream_token_for_writer(self, instance_name: str) -> int: return self._cache_id_gen.get_current_token_for_writer(instance_name) else: return 0 + + @wrap_as_background_process("clean_up_old_cache_invalidations") + async def _clean_up_cache_invalidation_wrapper(self) -> None: + """ + Clean up cache invalidation stream table entries occasionally. + If we are behind (i.e. there are entries old enough to + be deleted but too many of them to be deleted in one go), + then we run slightly more frequently. + """ + delete_up_to: int = ( + self.hs.get_clock().time_msec() - RETENTION_PERIOD_OF_CACHE_INVALIDATIONS_MS + ) + + in_backlog = await self._clean_up_batch_of_old_cache_invalidations(delete_up_to) + + # Vary how long we wait before calling again depending on whether we + # are still sifting through backlog or we have caught up. + if in_backlog: + next_interval = CATCH_UP_CLEANUP_INTERVAL_MS + else: + next_interval = REGULAR_CLEANUP_INTERVAL_MS + + self.hs.get_clock().call_later( + next_interval / 1000, self._clean_up_cache_invalidation_wrapper + ) + + async def _clean_up_batch_of_old_cache_invalidations( + self, delete_up_to_millisec: int + ) -> bool: + """ + Remove old rows from the `cache_invalidation_stream_by_instance` table automatically (this table is unused in SQLite). + + Up to `CLEAN_UP_BATCH_SIZE` rows will be deleted at once. + + Returns true if and only if we were limited by batch size (i.e. we are in backlog: + there are more things to clean up). + """ + + def _clean_up_batch_of_old_cache_invalidations_txn( + txn: LoggingTransaction, + ) -> bool: + # First get the earliest stream ID + txn.execute( + """ + SELECT stream_id FROM cache_invalidation_stream_by_instance + ORDER BY stream_id ASC + LIMIT 1 + """ + ) + row = txn.fetchone() + if row is None: + return False + earliest_stream_id: int = row[0] + + # Then find the last stream ID of the range we will delete + txn.execute( + """ + SELECT stream_id FROM cache_invalidation_stream_by_instance + WHERE stream_id <= ? AND invalidation_ts <= ? + ORDER BY stream_id DESC + LIMIT 1 + """, + (earliest_stream_id + CLEAN_UP_MAX_BATCH_SIZE, delete_up_to_millisec), + ) + row = txn.fetchone() + if row is None: + return False + cutoff_stream_id: int = row[0] + + # Determine whether we are caught up or still catching up + txn.execute( + """ + SELECT invalidation_ts FROM cache_invalidation_stream_by_instance + WHERE stream_id > ? + ORDER BY stream_id ASC + LIMIT 1 + """, + (cutoff_stream_id,), + ) + row = txn.fetchone() + if row is None: + in_backlog = False + else: + # We are in backlog if the next row could have been deleted + # if we didn't have such a small batch size + in_backlog = row[0] <= delete_up_to_millisec + + txn.execute( + """ + DELETE FROM cache_invalidation_stream_by_instance + WHERE ? <= stream_id AND stream_id <= ? + """, + (earliest_stream_id, cutoff_stream_id), + ) + + return in_backlog + + return await self.db_pool.runInteraction( + "clean_up_old_cache_invalidations", + _clean_up_batch_of_old_cache_invalidations_txn, + ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b2cda52ce593..534dc3241318 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -843,7 +843,7 @@ def get_backfill_points_in_room_txn( * because the schema change is in a background update, it's not * necessarily safe to assume that it will have been completed. */ - AND edge.is_state is ? /* False */ + AND edge.is_state is FALSE /** * We only want backwards extremities that are older than or at * the same position of the given `current_depth` (where older @@ -886,7 +886,6 @@ def get_backfill_points_in_room_txn( sql, ( room_id, - False, current_depth, self._clock.time_msec(), BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f17207bae43e..ed9321a80c7c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -127,8 +127,6 @@ def __init__( self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen - self._msc3970_enabled = hs.config.experimental.msc3970_enabled - @trace async def _persist_events_and_state_updates( self, @@ -1012,9 +1010,11 @@ def _persist_transaction_ids_txn( ) ) - # Pre-MSC3970, we rely on the access_token_id to scope the txn_id for events. - # Since this is an experimental flag, we still store the mapping even if the - # flag is disabled. + # Synapse usually relies on the device_id to scope transactions for events, + # except for users without device IDs (appservice, guests, and access + # tokens minted with the admin API) which use the access token ID instead. + # + # TODO https://github.com/matrix-org/synapse/issues/16042 if to_insert_token_id: self.db_pool.simple_insert_many_txn( txn, @@ -1030,10 +1030,7 @@ def _persist_transaction_ids_txn( values=to_insert_token_id, ) - # With MSC3970, we rely on the device_id instead to scope the txn_id for events. - # We're only inserting if MSC3970 is *enabled*, because else the pre-MSC3970 - # behaviour would allow for a UNIQUE constraint violation on this table - if to_insert_device_id and self._msc3970_enabled: + if to_insert_device_id: self.db_pool.simple_insert_many_txn( txn, table="event_txn_id_device_id", @@ -1455,8 +1452,8 @@ def _update_outliers_txn( }, ) - sql = "UPDATE events SET outlier = ? WHERE event_id = ?" - txn.execute(sql, (False, event.event_id)) + sql = "UPDATE events SET outlier = FALSE WHERE event_id = ?" + txn.execute(sql, (event.event_id,)) # Update the event_backward_extremities table now that this # event isn't an outlier any more. @@ -1549,13 +1546,13 @@ def event_dict(event: EventBase) -> JsonDict: for event, _ in events_and_contexts if not event.internal_metadata.is_redacted() ] - sql = "UPDATE redactions SET have_censored = ? WHERE " + sql = "UPDATE redactions SET have_censored = FALSE WHERE " clause, args = make_in_list_sql_clause( self.database_engine, "redacts", unredacted_events, ) - txn.execute(sql + clause, [False] + args) + txn.execute(sql + clause, args) self.db_pool.simple_insert_many_txn( txn, @@ -2320,14 +2317,14 @@ def _update_backward_extremeties( " SELECT 1 FROM events" " LEFT JOIN event_edges edge" " ON edge.event_id = events.event_id" - " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = ? OR edge.event_id IS NULL)" + " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = FALSE OR edge.event_id IS NULL)" " )" ) txn.execute_batch( query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id) for ev in events for e_id in ev.prev_event_ids() if not ev.internal_metadata.is_outlier() diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py index c89b4f7919a4..1680bf6168f9 100644 --- a/synapse/storage/databases/main/lock.py +++ b/synapse/storage/databases/main/lock.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from contextlib import AsyncExitStack from types import TracebackType -from typing import TYPE_CHECKING, Optional, Set, Tuple, Type +from typing import TYPE_CHECKING, Collection, Optional, Set, Tuple, Type from weakref import WeakValueDictionary from twisted.internet.interfaces import IReactorCore @@ -208,76 +209,85 @@ async def try_acquire_read_write_lock( used (otherwise the lock will leak). """ + try: + lock = await self.db_pool.runInteraction( + "try_acquire_read_write_lock", + self._try_acquire_read_write_lock_txn, + lock_name, + lock_key, + write, + ) + except self.database_engine.module.IntegrityError: + return None + + return lock + + def _try_acquire_read_write_lock_txn( + self, + txn: LoggingTransaction, + lock_name: str, + lock_key: str, + write: bool, + ) -> "Lock": + # We attempt to acquire the lock by inserting into + # `worker_read_write_locks` and seeing if that fails any + # constraints. If it doesn't then we have acquired the lock, + # otherwise we haven't. + # + # Before that though we clear the table of any stale locks. + now = self._clock.time_msec() token = random_string(6) - def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None: - # We attempt to acquire the lock by inserting into - # `worker_read_write_locks` and seeing if that fails any - # constraints. If it doesn't then we have acquired the lock, - # otherwise we haven't. - # - # Before that though we clear the table of any stale locks. - - delete_sql = """ - DELETE FROM worker_read_write_locks - WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?; - """ - - insert_sql = """ - INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts) - VALUES (?, ?, ?, ?, ?, ?) - """ - - if isinstance(self.database_engine, PostgresEngine): - # For Postgres we can send these queries at the same time. - txn.execute( - delete_sql + ";" + insert_sql, - ( - # DELETE args - now - _LOCK_TIMEOUT_MS, - lock_name, - lock_key, - # UPSERT args - lock_name, - lock_key, - write, - self._instance_name, - token, - now, - ), - ) - else: - # For SQLite these need to be two queries. - txn.execute( - delete_sql, - ( - now - _LOCK_TIMEOUT_MS, - lock_name, - lock_key, - ), - ) - txn.execute( - insert_sql, - ( - lock_name, - lock_key, - write, - self._instance_name, - token, - now, - ), - ) + delete_sql = """ + DELETE FROM worker_read_write_locks + WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?; + """ - return + insert_sql = """ + INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts) + VALUES (?, ?, ?, ?, ?, ?) + """ - try: - await self.db_pool.runInteraction( - "try_acquire_read_write_lock", - _try_acquire_read_write_lock_txn, + if isinstance(self.database_engine, PostgresEngine): + # For Postgres we can send these queries at the same time. + txn.execute( + delete_sql + ";" + insert_sql, + ( + # DELETE args + now - _LOCK_TIMEOUT_MS, + lock_name, + lock_key, + # UPSERT args + lock_name, + lock_key, + write, + self._instance_name, + token, + now, + ), + ) + else: + # For SQLite these need to be two queries. + txn.execute( + delete_sql, + ( + now - _LOCK_TIMEOUT_MS, + lock_name, + lock_key, + ), + ) + txn.execute( + insert_sql, + ( + lock_name, + lock_key, + write, + self._instance_name, + token, + now, + ), ) - except self.database_engine.module.IntegrityError: - return None lock = Lock( self._reactor, @@ -289,10 +299,58 @@ def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None: token=token, ) - self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock + def set_lock() -> None: + self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock + + txn.call_after(set_lock) return lock + async def try_acquire_multi_read_write_lock( + self, + lock_names: Collection[Tuple[str, str]], + write: bool, + ) -> Optional[AsyncExitStack]: + """Try to acquire multiple locks for the given names/keys. Will return + an async context manager if the locks are successfully acquired, which + *must* be used (otherwise the lock will leak). + + If only a subset of the locks can be acquired then it will immediately + drop them and return `None`. + """ + try: + locks = await self.db_pool.runInteraction( + "try_acquire_multi_read_write_lock", + self._try_acquire_multi_read_write_lock_txn, + lock_names, + write, + ) + except self.database_engine.module.IntegrityError: + return None + + stack = AsyncExitStack() + + for lock in locks: + await stack.enter_async_context(lock) + + return stack + + def _try_acquire_multi_read_write_lock_txn( + self, + txn: LoggingTransaction, + lock_names: Collection[Tuple[str, str]], + write: bool, + ) -> Collection["Lock"]: + locks = [] + + for lock_name, lock_key in lock_names: + lock = self._try_acquire_read_write_lock_txn( + txn, lock_name, lock_key, write + ) + locks.append(lock) + + return locks + class Lock: """An async context manager that manages an acquired lock, ensuring it is diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 9773c1fcd28a..b52f48cf0444 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -249,12 +249,11 @@ def _purge_history_txn( # Mark all state and own events as outliers logger.info("[purge] marking remaining events as outliers") txn.execute( - "UPDATE events SET outlier = ?" + "UPDATE events SET outlier = TRUE" " WHERE event_id IN (" - " SELECT event_id FROM events_to_purge " - " WHERE NOT should_delete" - ")", - (True,), + " SELECT event_id FROM events_to_purge " + " WHERE NOT should_delete" + ")" ) # synapse tries to take out an exclusive lock on room_depth whenever it diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 355b2568f511..600a4d62a287 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -559,19 +559,19 @@ def _upsert_push_rule_txn( if isinstance(self.database_engine, PostgresEngine): sql = """ INSERT INTO push_rules_enable (id, user_name, rule_id, enabled) - VALUES (?, ?, ?, ?) + VALUES (?, ?, ?, 1) ON CONFLICT DO NOTHING """ elif isinstance(self.database_engine, Sqlite3Engine): sql = """ INSERT OR IGNORE INTO push_rules_enable (id, user_name, rule_id, enabled) - VALUES (?, ?, ?, ?) + VALUES (?, ?, ?, 1) """ else: raise RuntimeError("Unknown database engine") new_enable_id = self._push_rules_enable_id_gen.get_next() - txn.execute(sql, (new_enable_id, user_id, rule_id, 1)) + txn.execute(sql, (new_enable_id, user_id, rule_id)) async def delete_push_rule(self, user_id: str, rule_id: str) -> None: """ diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 676d03bb7e14..c582cf05732d 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -454,9 +454,9 @@ def select_users_txn( ) -> List[Tuple[str, int]]: sql = ( "SELECT user_id, expiration_ts_ms FROM account_validity" - " WHERE email_sent = ? AND (expiration_ts_ms - ?) <= ?" + " WHERE email_sent = FALSE AND (expiration_ts_ms - ?) <= ?" ) - values = [False, now_ms, renew_at] + values = [now_ms, renew_at] txn.execute(sql, values) return cast(List[Tuple[str, int]], txn.fetchall()) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 830658f328b4..719e11aea61d 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -936,11 +936,11 @@ def _get_media_mxcs_in_room_txn( JOIN event_json USING (room_id, event_id) WHERE room_id = ? %(where_clause)s - AND contains_url = ? AND outlier = ? + AND contains_url = TRUE AND outlier = FALSE ORDER BY stream_ordering DESC LIMIT ? """ - txn.execute(sql % {"where_clause": ""}, (room_id, True, False, 100)) + txn.execute(sql % {"where_clause": ""}, (room_id, 100)) local_media_mxcs = [] remote_media_mxcs = [] @@ -976,7 +976,7 @@ def _get_media_mxcs_in_room_txn( txn.execute( sql % {"where_clause": "AND stream_ordering < ?"}, - (room_id, next_token, True, False, 100), + (room_id, next_token, 100), ) return local_media_mxcs, remote_media_mxcs @@ -1086,9 +1086,9 @@ def _quarantine_media_txn( # set quarantine if quarantined_by is not None: - sql += "AND safe_from_quarantine = ?" + sql += "AND safe_from_quarantine = FALSE" txn.executemany( - sql, [(quarantined_by, media_id, False) for media_id in local_mxcs] + sql, [(quarantined_by, media_id) for media_id in local_mxcs] ) # remove from quarantine else: diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 5df72afb5991..b08578447534 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1401,7 +1401,7 @@ def _paginate_room_events_txn( `to_token`), or `limit` is zero. """ - args = [False, room_id] + args: List[Any] = [room_id] order, from_bound, to_bound = generate_pagination_bounds( direction, from_token, to_token @@ -1475,7 +1475,7 @@ def _paginate_room_events_txn( event.topological_ordering, event.stream_ordering FROM events AS event %(join_clause)s - WHERE event.outlier = ? AND event.room_id = ? AND %(bounds)s + WHERE event.outlier = FALSE AND event.room_id = ? AND %(bounds)s ORDER BY event.topological_ordering %(order)s, event.stream_ordering %(order)s LIMIT ? """ % { diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index d3ec648f6d0f..7de9949a5b7f 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 79 # remember to update the list below when updating +SCHEMA_VERSION = 80 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -110,6 +110,9 @@ Changes in SCHEMA_VERSION = 79 - Add tables to handle in DB read-write locks. - Add some mitigations for a painful race between foreground and background updates, cf #15677. + +Changes in SCHEMA_VERSION = 80 + - The event_txn_id_device_id is always written to for new events. """ diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index fdfd465c8dea..39a1ae4ac347 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -117,11 +117,12 @@ class Requester: Attributes: user: id of the user making the request - access_token_id: *ID* of the access token used for this - request, or None if it came via the appservice API or similar + access_token_id: *ID* of the access token used for this request, or + None for appservices, guests, and tokens generated by the admin API is_guest: True if the user making this request is a guest user shadow_banned: True if the user making this request has been shadow-banned. - device_id: device_id which was set at authentication time + device_id: device_id which was set at authentication time, or + None for appservices, guests, and tokens generated by the admin API app_service: the AS requesting on behalf of the user authenticated_entity: The entity that authenticated when making the request. This is different to the user_id when an admin user or the server is diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index 340e5e914533..0cb46700a9ab 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -36,7 +36,7 @@ ) from synapse.util import Clock from synapse.util.async_helpers import AbstractObservableDeferred, ObservableDeferred -from synapse.util.caches import register_cache +from synapse.util.caches import EvictionReason, register_cache logger = logging.getLogger(__name__) @@ -167,7 +167,7 @@ def on_complete(r: RV) -> RV: # the should_cache bit, we leave it in the cache for now and schedule # its removal later. if self.timeout_sec and context.should_cache: - self.clock.call_later(self.timeout_sec, self.unset, key) + self.clock.call_later(self.timeout_sec, self._entry_timeout, key) else: # otherwise, remove the result immediately. self.unset(key) @@ -185,6 +185,12 @@ def unset(self, key: KV) -> None: Args: key: key used to remove the cached value """ + self._metrics.inc_evictions(EvictionReason.invalidation) + self._result_cache.pop(key, None) + + def _entry_timeout(self, key: KV) -> None: + """For the call_later to remove from the cache""" + self._metrics.inc_evictions(EvictionReason.time) self._result_cache.pop(key, None) async def wrap( diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index dcc037b9822e..27e9fc976c10 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -27,15 +27,6 @@ logger = logging.getLogger(__name__) -# the initial backoff, after the first transaction fails -MIN_RETRY_INTERVAL = 10 * 60 * 1000 - -# how much we multiply the backoff by after each subsequent fail -RETRY_MULTIPLIER = 5 - -# a cap on the backoff. (Essentially none) -MAX_RETRY_INTERVAL = 2**62 - class NotRetryingDestination(Exception): def __init__(self, retry_last_ts: int, retry_interval: int, destination: str): @@ -169,6 +160,16 @@ def __init__( self.notifier = notifier self.replication_client = replication_client + self.destination_min_retry_interval_ms = ( + self.store.hs.config.federation.destination_min_retry_interval_ms + ) + self.destination_retry_multiplier = ( + self.store.hs.config.federation.destination_retry_multiplier + ) + self.destination_max_retry_interval_ms = ( + self.store.hs.config.federation.destination_max_retry_interval_ms + ) + def __enter__(self) -> None: pass @@ -220,13 +221,15 @@ def __exit__( # We couldn't connect. if self.retry_interval: self.retry_interval = int( - self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4) + self.retry_interval + * self.destination_retry_multiplier + * random.uniform(0.8, 1.4) ) - if self.retry_interval >= MAX_RETRY_INTERVAL: - self.retry_interval = MAX_RETRY_INTERVAL + if self.retry_interval >= self.destination_max_retry_interval_ms: + self.retry_interval = self.destination_max_retry_interval_ms else: - self.retry_interval = MIN_RETRY_INTERVAL + self.retry_interval = self.destination_min_retry_interval_ms logger.info( "Connection to %s was unsuccessful (%s(%s)); backoff now %i", diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py index 15fce165b611..3c635e3dcbdb 100644 --- a/tests/appservice/test_api.py +++ b/tests/appservice/test_api.py @@ -11,18 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List, Mapping, Sequence, Union +from typing import Any, List, Mapping, Optional, Sequence, Union from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactor -from synapse.api.errors import HttpResponseException from synapse.appservice import ApplicationService from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock from tests import unittest +from tests.unittest import override_config PROTOCOL = "myproto" TOKEN = "myastoken" @@ -40,7 +40,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: hs_token=TOKEN, ) - def test_query_3pe_authenticates_token(self) -> None: + def test_query_3pe_authenticates_token_via_header(self) -> None: """ Tests that 3pe queries to the appservice are authenticated with the appservice's token. @@ -75,12 +75,16 @@ async def get_json( args: Mapping[Any, Any], headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]], ) -> List[JsonDict]: - # Ensure the access token is passed as both a header and query arg. - if not headers.get("Authorization") or not args.get(b"access_token"): + # Ensure the access token is passed as a header. + if not headers or not headers.get("Authorization"): raise RuntimeError("Access token not provided") + # ... and not as a query param + if b"access_token" in args: + raise RuntimeError( + "Access token should not be passed as a query param." + ) self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"]) - self.assertEqual(args.get(b"access_token"), TOKEN) self.request_url = url if url == URL_USER: return SUCCESS_RESULT_USER @@ -107,10 +111,13 @@ async def get_json( self.assertEqual(self.request_url, URL_LOCATION) self.assertEqual(result, SUCCESS_RESULT_LOCATION) - def test_fallback(self) -> None: + @override_config({"use_appservice_legacy_authorization": True}) + def test_query_3pe_authenticates_token_via_param(self) -> None: """ - Tests that the fallback to legacy URLs works. + Tests that 3pe queries to the appservice are authenticated + with the appservice's token. """ + SUCCESS_RESULT_USER = [ { "protocol": PROTOCOL, @@ -120,30 +127,41 @@ def test_fallback(self) -> None: }, } ] + SUCCESS_RESULT_LOCATION = [ + { + "protocol": PROTOCOL, + "alias": "#a:room", + "fields": { + "more": "fields", + }, + } + ] URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}" - FALLBACK_URL_USER = f"{URL}/_matrix/app/unstable/thirdparty/user/{PROTOCOL}" + URL_LOCATION = f"{URL}/_matrix/app/v1/thirdparty/location/{PROTOCOL}" self.request_url = None - self.v1_seen = False async def get_json( url: str, args: Mapping[Any, Any], - headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]], + headers: Optional[ + Mapping[Union[str, bytes], Sequence[Union[str, bytes]]] + ] = None, ) -> List[JsonDict]: - # Ensure the access token is passed as both a header and query arg. - if not headers.get("Authorization") or not args.get(b"access_token"): - raise RuntimeError("Access token not provided") + # Ensure the access token is passed as a both a query param and in the headers. + if not args.get(b"access_token"): + raise RuntimeError("Access token should be provided in query params.") + if not headers or not headers.get("Authorization"): + raise RuntimeError("Access token should be provided in auth headers.") - self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"]) self.assertEqual(args.get(b"access_token"), TOKEN) + self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"]) self.request_url = url if url == URL_USER: - self.v1_seen = True - raise HttpResponseException(404, "NOT_FOUND", b"NOT_FOUND") - elif url == FALLBACK_URL_USER: return SUCCESS_RESULT_USER + elif url == URL_LOCATION: + return SUCCESS_RESULT_LOCATION else: raise RuntimeError( "URL provided was invalid. This should never be seen." @@ -155,9 +173,15 @@ async def get_json( result = self.get_success( self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]}) ) - self.assertTrue(self.v1_seen) - self.assertEqual(self.request_url, FALLBACK_URL_USER) + self.assertEqual(self.request_url, URL_USER) self.assertEqual(result, SUCCESS_RESULT_USER) + result = self.get_success( + self.api.query_3pe( + self.service, "location", PROTOCOL, {b"some": [b"field"]} + ) + ) + self.assertEqual(self.request_url, URL_LOCATION) + self.assertEqual(result, SUCCESS_RESULT_LOCATION) def test_claim_keys(self) -> None: """ diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 196ceb0b82d0..ec2f5d30bea9 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -179,6 +179,16 @@ def test_get_my_avatar(self) -> None: self.assertEqual("http://my.server/me.png", avatar_url) + def test_get_profile_empty_displayname(self) -> None: + self.get_success(self.store.set_profile_displayname(self.frank, None)) + self.get_success( + self.store.set_profile_avatar_url(self.frank, "http://my.server/me.png") + ) + + profile = self.get_success(self.handler.get_profile(self.frank.to_string())) + + self.assertEqual("http://my.server/me.png", profile["avatar_url"]) + def test_set_my_avatar(self) -> None: self.get_success( self.handler.set_avatar_url( diff --git a/tests/handlers/test_worker_lock.py b/tests/handlers/test_worker_lock.py new file mode 100644 index 000000000000..73e548726cbb --- /dev/null +++ b/tests/handlers/test_worker_lock.py @@ -0,0 +1,74 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer +from twisted.test.proto_helpers import MemoryReactor + +from synapse.server import HomeServer +from synapse.util import Clock + +from tests import unittest +from tests.replication._base import BaseMultiWorkerStreamTestCase + + +class WorkerLockTestCase(unittest.HomeserverTestCase): + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.worker_lock_handler = self.hs.get_worker_locks_handler() + + def test_wait_for_lock_locally(self) -> None: + """Test waiting for a lock on a single worker""" + + lock1 = self.worker_lock_handler.acquire_lock("name", "key") + self.get_success(lock1.__aenter__()) + + lock2 = self.worker_lock_handler.acquire_lock("name", "key") + d2 = defer.ensureDeferred(lock2.__aenter__()) + self.assertNoResult(d2) + + self.get_success(lock1.__aexit__(None, None, None)) + + self.get_success(d2) + self.get_success(lock2.__aexit__(None, None, None)) + + +class WorkerLockWorkersTestCase(BaseMultiWorkerStreamTestCase): + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.main_worker_lock_handler = self.hs.get_worker_locks_handler() + + def test_wait_for_lock_worker(self) -> None: + """Test waiting for a lock on another worker""" + + worker = self.make_worker_hs( + "synapse.app.generic_worker", + extra_config={ + "redis": {"enabled": True}, + }, + ) + worker_lock_handler = worker.get_worker_locks_handler() + + lock1 = self.main_worker_lock_handler.acquire_lock("name", "key") + self.get_success(lock1.__aenter__()) + + lock2 = worker_lock_handler.acquire_lock("name", "key") + d2 = defer.ensureDeferred(lock2.__aenter__()) + self.assertNoResult(d2) + + self.get_success(lock1.__aexit__(None, None, None)) + + self.get_success(d2) + self.get_success(lock2.__aexit__(None, None, None)) diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 1e06f8607159..829b9df83d4e 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -409,12 +409,12 @@ def test_suppress_edits(self) -> None: ) ) - # Room mentions from those without power should not notify. + # The edit should not cause a notification. self.assertFalse( self._create_and_process( bulk_evaluator, { - "body": self.alice, + "body": "Test message", "m.relates_to": { "rel_type": RelationTypes.REPLACE, "event_id": event.event_id, @@ -422,3 +422,20 @@ def test_suppress_edits(self) -> None: }, ) ) + + # An edit which is a mention will cause a notification. + self.assertTrue( + self._create_and_process( + bulk_evaluator, + { + "body": "Test message", + "m.relates_to": { + "rel_type": RelationTypes.REPLACE, + "event_id": event.event_id, + }, + "m.mentions": { + "user_ids": [self.alice], + }, + }, + ) + ) diff --git a/tests/rest/client/test_devices.py b/tests/rest/client/test_devices.py index b7d420cfec02..3cf29c10ea6d 100644 --- a/tests/rest/client/test_devices.py +++ b/tests/rest/client/test_devices.py @@ -379,4 +379,141 @@ def test_dehydrate_msc3814(self) -> None: access_token=token, shorthand=False, ) - self.assertEqual(channel.code, 404) + self.assertEqual(channel.code, 401) + + @unittest.override_config( + {"experimental_features": {"msc2697_enabled": False, "msc3814_enabled": True}} + ) + def test_msc3814_dehydrated_device_delete_works(self) -> None: + user = self.register_user("mikey", "pass") + token = self.login(user, "pass", device_id="device1") + content: JsonDict = { + "device_data": { + "algorithm": "m.dehydration.v1.olm", + }, + "device_id": "device2", + "initial_device_display_name": "foo bar", + "device_keys": { + "user_id": "@mikey:test", + "device_id": "device2", + "valid_until_ts": "80", + "algorithms": [ + "m.olm.curve25519-aes-sha2", + ], + "keys": { + ":": "", + }, + "signatures": { + "": {":": ""} + }, + }, + } + channel = self.make_request( + "PUT", + "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device", + content=content, + access_token=token, + shorthand=False, + ) + self.assertEqual(channel.code, 200) + device_id = channel.json_body.get("device_id") + assert device_id is not None + self.assertIsInstance(device_id, str) + self.assertEqual("device2", device_id) + + # ensure that keys were uploaded and available + channel = self.make_request( + "POST", + "/_matrix/client/r0/keys/query", + { + "device_keys": { + user: ["device2"], + }, + }, + token, + ) + self.assertEqual( + channel.json_body["device_keys"][user]["device2"]["keys"], + { + ":": "", + }, + ) + + # delete the dehydrated device + channel = self.make_request( + "DELETE", + "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device", + access_token=token, + shorthand=False, + ) + self.assertEqual(channel.code, 200) + + # ensure that keys are no longer available for deleted device + channel = self.make_request( + "POST", + "/_matrix/client/r0/keys/query", + { + "device_keys": { + user: ["device2"], + }, + }, + token, + ) + self.assertEqual(channel.json_body["device_keys"], {"@mikey:test": {}}) + + # check that an old device is deleted when user PUTs a new device + # First, create a device + content["device_id"] = "device3" + content["device_keys"]["device_id"] = "device3" + channel = self.make_request( + "PUT", + "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device", + content=content, + access_token=token, + shorthand=False, + ) + self.assertEqual(channel.code, 200) + device_id = channel.json_body.get("device_id") + assert device_id is not None + self.assertIsInstance(device_id, str) + self.assertEqual("device3", device_id) + + # create a second device without deleting first device + content["device_id"] = "device4" + content["device_keys"]["device_id"] = "device4" + channel = self.make_request( + "PUT", + "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device", + content=content, + access_token=token, + shorthand=False, + ) + self.assertEqual(channel.code, 200) + device_id = channel.json_body.get("device_id") + assert device_id is not None + self.assertIsInstance(device_id, str) + self.assertEqual("device4", device_id) + + # check that the second device that was created is what is returned when we GET + channel = self.make_request( + "GET", + "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device", + access_token=token, + shorthand=False, + ) + self.assertEqual(channel.code, 200) + returned_device_id = channel.json_body["device_id"] + self.assertEqual(returned_device_id, "device4") + + # and that if we query the keys for the first device they are not there + channel = self.make_request( + "POST", + "/_matrix/client/r0/keys/query", + { + "device_keys": { + user: ["device3"], + }, + }, + token, + ) + self.assertEqual(channel.json_body["device_keys"], {"@mikey:test": {}}) diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py index 27c93ad76122..ecae092b477a 100644 --- a/tests/rest/client/test_profile.py +++ b/tests/rest/client/test_profile.py @@ -68,6 +68,18 @@ def test_set_displayname(self) -> None: res = self._get_displayname() self.assertEqual(res, "test") + def test_set_displayname_with_extra_spaces(self) -> None: + channel = self.make_request( + "PUT", + "/profile/%s/displayname" % (self.owner,), + content={"displayname": " test "}, + access_token=self.owner_tok, + ) + self.assertEqual(channel.code, 200, channel.result) + + res = self._get_displayname() + self.assertEqual(res, "test") + def test_set_displayname_noauth(self) -> None: channel = self.make_request( "PUT", diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py index 6028886bd62e..180b635ea694 100644 --- a/tests/rest/client/test_redactions.py +++ b/tests/rest/client/test_redactions.py @@ -13,10 +13,12 @@ # limitations under the License. from typing import List, Optional +from parameterized import parameterized + from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventTypes, RelationTypes -from synapse.api.room_versions import RoomVersions +from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.rest import admin from synapse.rest.client import login, room, sync from synapse.server import HomeServer @@ -569,50 +571,81 @@ def test_redact_relations_txn_id_reuse(self) -> None: self.assertIn("body", event_dict["content"], event_dict) self.assertEqual("I'm in a thread!", event_dict["content"]["body"]) - def test_content_redaction(self) -> None: - """MSC2174 moved the redacts property to the content.""" + @parameterized.expand( + [ + # Tuples of: + # Room version + # Boolean: True if the redaction event content should include the event ID. + # Boolean: true if the resulting redaction event is expected to include the + # event ID in the content. + (RoomVersions.V10, False, False), + (RoomVersions.V11, True, True), + (RoomVersions.V11, False, True), + ] + ) + def test_redaction_content( + self, room_version: RoomVersion, include_content: bool, expect_content: bool + ) -> None: + """ + Room version 11 moved the redacts property to the content. + + Ensure that the event gets created properly and that the Client-Server + API servers the proper backwards-compatible version. + """ # Create a room with the newer room version. room_id = self.helper.create_room_as( self.mod_user_id, tok=self.mod_access_token, - room_version=RoomVersions.V11.identifier, + room_version=room_version.identifier, ) # Create an event. b = self.helper.send(room_id=room_id, tok=self.mod_access_token) event_id = b["event_id"] - # Attempt to redact it with a bogus event ID. - self._redact_event( + # Ensure the event ID in the URL and the content must match. + if include_content: + self._redact_event( + self.mod_access_token, + room_id, + event_id, + expect_code=400, + content={"redacts": "foo"}, + ) + + # Redact it for real. + result = self._redact_event( self.mod_access_token, room_id, event_id, - expect_code=400, - content={"redacts": "foo"}, + content={"redacts": event_id} if include_content else {}, ) - - # Redact it for real. - self._redact_event(self.mod_access_token, room_id, event_id) + redaction_event_id = result["event_id"] # Sync the room, to get the id of the create event timeline = self._sync_room_timeline(self.mod_access_token, room_id) redact_event = timeline[-1] self.assertEqual(redact_event["type"], EventTypes.Redaction) - # The redacts key should be in the content. + # The redacts key should be in the content and the redacts keys. self.assertEquals(redact_event["content"]["redacts"], event_id) - - # It should also be copied as the top-level redacts field for backwards - # compatibility. self.assertEquals(redact_event["redacts"], event_id) # But it isn't actually part of the event. def get_event(txn: LoggingTransaction) -> JsonDict: return db_to_json( - main_datastore._fetch_event_rows(txn, [event_id])[event_id].json + main_datastore._fetch_event_rows(txn, [redaction_event_id])[ + redaction_event_id + ].json ) main_datastore = self.hs.get_datastores().main event_json = self.get_success( main_datastore.db_pool.runInteraction("get_event", get_event) ) - self.assertNotIn("redacts", event_json) + self.assertEquals(event_json["type"], EventTypes.Redaction) + if expect_content: + self.assertNotIn("redacts", event_json) + self.assertEquals(event_json["content"]["redacts"], event_id) + else: + self.assertEquals(event_json["redacts"], event_id) + self.assertNotIn("redacts", event_json["content"]) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index d013e75d55d7..4f6347be15a0 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -711,7 +711,7 @@ def test_post_room_no_keys(self) -> None: self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(30, channel.resource_usage.db_txn_count) + self.assertEqual(32, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -724,7 +724,7 @@ def test_post_room_initial_state(self) -> None: self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(32, channel.resource_usage.db_txn_count) + self.assertEqual(34, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py index ad454f6dd81d..383da83dfb2a 100644 --- a/tests/storage/databases/main/test_lock.py +++ b/tests/storage/databases/main/test_lock.py @@ -448,3 +448,55 @@ def test_shutdown(self) -> None: self.get_success(self.store._on_shutdown()) self.assertEqual(self.store._live_read_write_lock_tokens, {}) + + def test_acquire_multiple_locks(self) -> None: + """Tests that acquiring multiple locks at once works.""" + + # Take out multiple locks and ensure that we can't get those locks out + # again. + lock = self.get_success( + self.store.try_acquire_multi_read_write_lock( + [("name1", "key1"), ("name2", "key2")], write=True + ) + ) + self.assertIsNotNone(lock) + + assert lock is not None + self.get_success(lock.__aenter__()) + + lock2 = self.get_success( + self.store.try_acquire_read_write_lock("name1", "key1", write=True) + ) + self.assertIsNone(lock2) + + lock3 = self.get_success( + self.store.try_acquire_read_write_lock("name2", "key2", write=False) + ) + self.assertIsNone(lock3) + + # Overlapping locks attempts will fail, and won't lock any locks. + lock4 = self.get_success( + self.store.try_acquire_multi_read_write_lock( + [("name1", "key1"), ("name3", "key3")], write=True + ) + ) + self.assertIsNone(lock4) + + lock5 = self.get_success( + self.store.try_acquire_read_write_lock("name3", "key3", write=True) + ) + self.assertIsNotNone(lock5) + assert lock5 is not None + self.get_success(lock5.__aenter__()) + self.get_success(lock5.__aexit__(None, None, None)) + + # Once we release the lock we can take out the locks again. + self.get_success(lock.__aexit__(None, None, None)) + + lock6 = self.get_success( + self.store.try_acquire_read_write_lock("name1", "key1", write=True) + ) + self.assertIsNotNone(lock6) + assert lock6 is not None + self.get_success(lock6.__aenter__()) + self.get_success(lock6.__aexit__(None, None, None)) diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py index 2fab84a52939..ef06b50dbb7b 100644 --- a/tests/storage/test_transactions.py +++ b/tests/storage/test_transactions.py @@ -17,7 +17,6 @@ from synapse.server import HomeServer from synapse.storage.databases.main.transactions import DestinationRetryTimings from synapse.util import Clock -from synapse.util.retryutils import MAX_RETRY_INTERVAL from tests.unittest import HomeserverTestCase @@ -57,8 +56,14 @@ def test_initial_set_transactions(self) -> None: self.get_success(d) def test_large_destination_retry(self) -> None: + max_retry_interval_ms = ( + self.hs.config.federation.destination_max_retry_interval_ms + ) d = self.store.set_destination_retry_timings( - "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL + "example.com", + max_retry_interval_ms, + max_retry_interval_ms, + max_retry_interval_ms, ) self.get_success(d) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 5f8f4e76b544..1277e1a865ff 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -11,12 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.retryutils import ( - MIN_RETRY_INTERVAL, - RETRY_MULTIPLIER, - NotRetryingDestination, - get_retry_limiter, -) +from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from tests.unittest import HomeserverTestCase @@ -42,6 +37,11 @@ def test_limiter(self) -> None: limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) + min_retry_interval_ms = ( + self.hs.config.federation.destination_min_retry_interval_ms + ) + retry_multiplier = self.hs.config.federation.destination_retry_multiplier + self.pump(1) try: with limiter: @@ -57,7 +57,7 @@ def test_limiter(self) -> None: assert new_timings is not None self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, failure_ts) - self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL) + self.assertEqual(new_timings.retry_interval, min_retry_interval_ms) # now if we try again we should get a failure self.get_failure( @@ -68,7 +68,7 @@ def test_limiter(self) -> None: # advance the clock and try again # - self.pump(MIN_RETRY_INTERVAL) + self.pump(min_retry_interval_ms) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1) @@ -87,16 +87,16 @@ def test_limiter(self) -> None: self.assertEqual(new_timings.failure_ts, failure_ts) self.assertEqual(new_timings.retry_last_ts, retry_ts) self.assertGreaterEqual( - new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5 + new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 0.5 ) self.assertLessEqual( - new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0 + new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 2.0 ) # # one more go, with success # - self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) + self.reactor.advance(min_retry_interval_ms * retry_multiplier * 2.0) limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) self.pump(1)