Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Reserved users for MAU limits #3662

Merged
merged 8 commits into from
Aug 8, 2018
1 change: 1 addition & 0 deletions changelog.d/3662.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Ability to whitelist specific threepids against monthly active user limiting
6 changes: 6 additions & 0 deletions synapse/app/homeserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@ def generate_user_daily_visit_stats():
# If you increase the loop period, the accuracy of user_daily_visits
# table will decrease
clock.looping_call(generate_user_daily_visit_stats, 5 * 60 * 1000)

# monthly active user limiting functionality
clock.looping_call(
hs.get_datastore().reap_monthly_active_users, 1000 * 60 * 60
)
Expand All @@ -530,9 +532,13 @@ def generate_monthly_active_users():
current_mau_gauge.set(float(count))
max_mau_value_gauge.set(float(hs.config.max_mau_value))

hs.get_datastore().initialise_reserved_users(
hs.config.mau_limits_reserved_threepids
)
generate_monthly_active_users()
if hs.config.limit_usage_by_mau:
clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
# End of monthly active user settings

if hs.config.report_stats:
logger.info("Scheduling stats reporting for 3 hour intervals")
Expand Down
3 changes: 3 additions & 0 deletions synapse/config/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def read_config(self, config):
self.max_mau_value = config.get(
"max_mau_value", 0,
)
self.mau_limits_reserved_threepids = config.get(
"mau_limit_reserved_threepids", []
)

# FIXME: federation_domain_whitelist needs sytests
self.federation_domain_whitelist = None
Expand Down
51 changes: 45 additions & 6 deletions synapse/storage/monthly_active_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging

from twisted.internet import defer

from synapse.util.caches.descriptors import cached

from ._base import SQLBaseStore

logger = logging.getLogger(__name__)

# Number of msec of granularity to store the monthly_active_user timestamp
# This means it is not necessary to update the table on every request
LAST_SEEN_GRANULARITY = 60 * 60 * 1000
Expand All @@ -29,7 +32,29 @@ def __init__(self, dbconn, hs):
super(MonthlyActiveUsersStore, self).__init__(None, hs)
self._clock = hs.get_clock()
self.hs = hs
self.reserved_users = ()

@defer.inlineCallbacks
def initialise_reserved_users(self, threepids):
# TODO Why can't I do this in init?
store = self.hs.get_datastore()
reserved_user_list = []

# Do not add more reserved users than the total allowable number
for tp in threepids[:self.hs.config.max_mau_value]:
user_id = yield store.get_user_id_by_threepid(
tp["medium"], tp["address"]
)
if user_id:
self.upsert_monthly_active_user(user_id)
reserved_user_list.append(user_id)
else:
logger.warning(
"mau limit reserved threepid %s not found in db" % tp
)
self.reserved_users = tuple(reserved_user_list)

@defer.inlineCallbacks
def reap_monthly_active_users(self):
"""
Cleans out monthly active user table to ensure that no stale
Expand All @@ -44,8 +69,20 @@ def _reap_users(txn):
int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
)
# Purge stale users
sql = "DELETE FROM monthly_active_users WHERE timestamp < ?"
txn.execute(sql, (thirty_days_ago,))

# questionmarks is a hack to overcome sqlite not supporting
# tuples in 'WHERE IN %s'
questionmarks = '?' * len(self.reserved_users)
query_args = [thirty_days_ago]
query_args.extend(self.reserved_users)

sql = """
DELETE FROM monthly_active_users
WHERE timestamp < ?
AND user_id NOT IN ({})
""".format(','.join(questionmarks))

txn.execute(sql, query_args)

# If MAU user count still exceeds the MAU threshold, then delete on
# a least recently active basis.
Expand All @@ -55,15 +92,18 @@ def _reap_users(txn):
# While Postgres does not require 'LIMIT', but also does not support
# negative LIMIT values. So there is no way to write it that both can
# support
query_args = [self.hs.config.max_mau_value]
query_args.extend(self.reserved_users)
sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
ORDER BY timestamp DESC
LIMIT ?
)
"""
txn.execute(sql, (self.hs.config.max_mau_value,))
AND user_id NOT IN ({})
""".format(','.join(questionmarks))
txn.execute(sql, query_args)

yield self.runInteraction("reap_monthly_active_users", _reap_users)
# It seems poor to invalidate the whole cache, Postgres supports
Expand Down Expand Up @@ -122,7 +162,7 @@ def _user_last_seen_monthly_active(self, user_id):
Arguments:
user_id (str): user to add/update
Return:
int : timestamp since last seen, None if never seen
Deferred[int] : timestamp since last seen, None if never seen

"""

Expand All @@ -144,7 +184,6 @@ def populate_monthly_active_users(self, user_id):
Args:
user_id(str): the user_id to query
"""

if self.hs.config.limit_usage_by_mau:
last_seen_timestamp = yield self._user_last_seen_monthly_active(user_id)
now = self.hs.get_clock().time_msec()
Expand Down
59 changes: 58 additions & 1 deletion tests/storage/test_monthly_active_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import tests.utils
from tests.utils import setup_test_homeserver

FORTY_DAYS = 40 * 24 * 60 * 60


class MonthlyActiveUsersTestCase(tests.unittest.TestCase):
def __init__(self, *args, **kwargs):
Expand All @@ -29,6 +31,56 @@ def setUp(self):
self.hs = yield setup_test_homeserver()
self.store = self.hs.get_datastore()

@defer.inlineCallbacks
def test_initialise_reserved_users(self):

user1 = "@user1:server"
user1_email = "user1@matrix.org"
user2 = "@user2:server"
user2_email = "user2@matrix.org"
threepids = [
{'medium': 'email', 'address': user1_email},
{'medium': 'email', 'address': user2_email}
]
user_num = len(threepids)

yield self.store.register(
user_id=user1,
token="123",
password_hash=None)

yield self.store.register(
user_id=user2,
token="456",
password_hash=None)

now = int(self.hs.get_clock().time_msec())
yield self.store.user_add_threepid(user1, "email", user1_email, now, now)
yield self.store.user_add_threepid(user2, "email", user2_email, now, now)
yield self.store.initialise_reserved_users(threepids)

active_count = yield self.store.get_monthly_active_count()

# Test total counts
self.assertEquals(active_count, user_num)

# Test user is marked as active

timestamp = yield self.store._user_last_seen_monthly_active(user1)
self.assertTrue(timestamp)
timestamp = yield self.store._user_last_seen_monthly_active(user2)
self.assertTrue(timestamp)

# Test that users are never removed from the db.
self.hs.config.max_mau_value = 0

self.hs.get_clock().advance_time(FORTY_DAYS)

yield self.store.reap_monthly_active_users()

active_count = yield self.store.get_monthly_active_count()
self.assertEquals(active_count, user_num)

@defer.inlineCallbacks
def test_can_insert_and_count_mau(self):
count = yield self.store.get_monthly_active_count()
Expand Down Expand Up @@ -63,4 +115,9 @@ def test_reap_monthly_active_users(self):
self.assertTrue(count, initial_users)
yield self.store.reap_monthly_active_users()
count = yield self.store.get_monthly_active_count()
self.assertTrue(count, initial_users - self.hs.config.max_mau_value)
self.assertEquals(count, initial_users - self.hs.config.max_mau_value)

self.hs.get_clock().advance_time(FORTY_DAYS)
yield self.store.reap_monthly_active_users()
count = yield self.store.get_monthly_active_count()
self.assertEquals(count, 0)
2 changes: 2 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None
config.media_storage_providers = []
config.auto_join_rooms = []
config.limit_usage_by_mau = False
config.max_mau_value = 50
config.mau_limits_reserved_threepids = []

# disable user directory updates, because they get done in the
# background, which upsets the test runner.
Expand Down