Skip to content

Commit

Permalink
Merge pull request #260 from edx/jibsheet/xqueue-cloudwatch-alarms
Browse files Browse the repository at this point in the history
Add a cloudwatch argument to count_queued_submissions
  • Loading branch information
jibsheet authored Jun 22, 2018
2 parents b0356de + 0c2b33e commit f031bcf
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 27 deletions.
104 changes: 104 additions & 0 deletions queue/management/commands/count_queued_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@

from __future__ import unicode_literals

from itertools import izip_longest
from queue.models import Submission

import backoff
import boto3
import botocore
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.db.models import Count
Expand All @@ -27,6 +31,11 @@ def add_arguments(self, parser):
action='store_true',
help='Submit New Relic custom metrics'
)
parser.add_argument(
'--cloudwatch',
action='store_true',
help='Submit CloudWatch custom metrics'
)

def handle(self, *args, **options):
"""
Expand All @@ -48,6 +57,10 @@ def handle(self, *args, **options):
if use_newrelic:
self.send_nr_metrics(queue_counts)

use_cloudwatch = options.get('cloudwatch')
if use_cloudwatch:
self.send_cloudwatch_metrics(queue_counts)

def pretty_print_queues(self, queue_counts):
"""
Send a tabulated log output of the queues and the counts to the console
Expand All @@ -73,3 +86,94 @@ def send_nr_metrics(self, queue_counts):
'Custom/XQueueLength/{}[submissions]'.format(queue['queue_name']),
queue['queue_count'],
application=nr_app)

def send_cloudwatch_metrics(self, queue_counts):
"""
Send custom metrics to AWS CloudWatch
"""
cloudwatch = CwBotoWrapper()
cloudwatch_configuration = settings.CLOUDWATCH_QUEUE_COUNT_METRICS
metric_name = 'queue_length'
dimension = 'queue'
environment = cloudwatch_configuration['environment']
deployment = cloudwatch_configuration['deployment']
namespace = "xqueue/{}-{}".format(environment,
deployment)

# iterate 10 at a time through the list of queues to stay under AWS limits.
for queues in grouper(queue_counts, 10):
# grouper can return a bunch of Nones and we want to skip those
queues = [q for q in queues if q is not None]
metric_data = []
for queue in queues:
metric_data.append({
'MetricName': metric_name,
'Dimensions': [{
"Name": dimension,
"Value": queue['queue_name']
}],
'Value': queue['queue_count']
})

if len(metric_data) > 0:
cloudwatch.put_metric_data(Namespace=namespace, MetricData=metric_data)

for queue in queues:
dimensions = [{'Name': dimension, 'Value': queue['queue_name']}]
threshold = cloudwatch_configuration['default_threshold']
if queue['queue_name'] in cloudwatch_configuration['thresholds']:
threshold = cloudwatch_configuration['thresholds'][queue['queue_name']]
# Period is in seconds - has to be over the max for an hour
period = 600
evaluation_periods = 6
comparison_operator = "GreaterThanThreshold"
treat_missing_data = "notBreaching"
statistic = "Maximum"
actions = [cloudwatch_configuration['sns_arn']]
alarm_name = "{}-{} {} queue length over threshold".format(environment,
deployment,
queue['queue_name'])

print('Creating or updating alarm "{}"'.format(alarm_name))
cloudwatch.put_metric_alarm(AlarmName=alarm_name,
AlarmDescription=alarm_name,
Namespace=namespace,
MetricName=metric_name,
Dimensions=dimensions,
Period=period,
EvaluationPeriods=evaluation_periods,
TreatMissingData=treat_missing_data,
Threshold=threshold,
ComparisonOperator=comparison_operator,
Statistic=statistic,
InsufficientDataActions=actions,
OKActions=actions,
AlarmActions=actions)


class CwBotoWrapper(object):
max_tries = 5

def __init__(self):
self.client = boto3.client('cloudwatch')

@backoff.on_exception(backoff.expo,
(botocore.exceptions.ClientError),
max_tries=max_tries)
def put_metric_data(self, *args, **kwargs):
return self.client.put_metric_data(*args, **kwargs)

@backoff.on_exception(backoff.expo,
(botocore.exceptions.ClientError),
max_tries=max_tries)
def put_metric_alarm(self, *args, **kwargs):
return self.client.put_metric_alarm(*args, **kwargs)


# Stolen right from the itertools recipes
# https://docs.python.org/3/library/itertools.html#itertools-recipes
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(*args, fillvalue=fillvalue)
30 changes: 30 additions & 0 deletions queue/management/commands/tests/test_count_queued_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,33 @@ def test_push_to_new_relic(self, mock_newrelic_agent):
mock_newrelic_agent.record_custom_metric.call_count)

mock_newrelic_agent.record_custom_metric.has_calls(expected_nr_calls, any_order=True)

@patch('boto3.client')
def test_push_to_cloudwatch(self, mock_boto3):
self._create_submission(queue_name="test-pull")
self._create_submission(queue_name="test2")
self._create_submission(queue_name="test2")
call_command('count_queued_submissions', '--cloudwatch', stdout=self.stdout)
self.assertRegexpMatches(self.stdout.getvalue(), r'test2\s*2\s*\ntest-pull\s*1')

metric_alarm_kwargs = []
for call in mock_boto3.mock_calls:
name, args, kwargs = call
if 'put_metric_name' in name:
self.assertEquals(len(kwargs['Metricdata']), 2)
self.assertEquals(kwargs,
{'Namespace': u'xqueue/dev-stack',
'MetricData': [
{u'Dimensions': [{u'Name': u'queue', u'Value': u'test2'}],
u'Value': 2,
u'MetricName': u'queue_length'
},
{u'Dimensions': [{u'Name': u'queue', u'Value': u'test-pull'}],
u'Value': 1,
u'MetricName': u'queue_length'}]})
if 'put_metric_alarm' in name:
metric_alarm_kwargs.append(kwargs)

self.assertEquals(len(metric_alarm_kwargs), 2)
self.assertEquals(metric_alarm_kwargs[0]['AlarmName'], u'dev-stack test2 queue length over threshold')
self.assertEquals(metric_alarm_kwargs[1]['AlarmName'], u'dev-stack test-pull queue length over threshold')
13 changes: 7 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
#
# make upgrade
#
boto3==1.7.32
botocore==1.10.32 # via boto3, s3transfer
backoff==1.5.0
boto3==1.7.41
botocore==1.10.41 # via boto3, s3transfer
certifi==2018.4.16 # via requests
chardet==3.0.4 # via requests
django-storages==1.6.6
Expand All @@ -14,19 +15,19 @@ docutils==0.14 # via botocore
edx-django-release-util==0.3.1
futures==3.2.0 # via isort, s3transfer
gunicorn==0.16.1
idna==2.6 # via requests
idna==2.7 # via requests
isort==4.3.4
jmespath==0.9.3 # via boto3, botocore
mysql-python==1.2.5
newrelic==3.2.1.93
newrelic==3.2.2.94
path.py==11.0.1
python-dateutil==2.7.3 # via botocore
python-memcached==1.48
python-termstyle==0.1.10
pytz==2018.4
pyyaml==3.12 # via edx-django-release-util
requests==2.18.4
requests==2.19.1
s3transfer==0.1.13 # via boto3
six==1.11.0 # via edx-django-release-util, python-dateutil
urllib3==1.22 # via requests
urllib3==1.23 # via requests
wsgiref==0.1.2
1 change: 1 addition & 0 deletions requirements/base.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Core requirements for using this service

MySQL-python==1.2.5
backoff
boto3
django>=1.11,<1.12
django-storages
Expand Down
19 changes: 10 additions & 9 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#
atomicwrites==1.1.5
attrs==18.1.0
boto3==1.7.32
botocore==1.10.32
backoff==1.5.0
boto3==1.7.41
botocore==1.10.41
certifi==2018.4.16
chardet==3.0.4
click==6.7
Expand All @@ -21,32 +22,32 @@ first==2.0.1
funcsigs==1.0.2
futures==3.2.0
gunicorn==0.16.1
idna==2.6
idna==2.7
isort==4.3.4
jmespath==0.9.3
mock==2.0.0
more-itertools==4.2.0
mysql-python==1.2.5
newrelic==3.2.1.93
newrelic==3.2.2.94
path.py==11.0.1
pbr==4.0.3
pbr==4.0.4
pip-tools==2.0.2
pluggy==0.6.0
py==1.5.3
pycodestyle==2.4.0
pytest-cov==2.5.1
pytest-django==3.2.1
pytest==3.6.1
pytest-django==3.3.0
pytest==3.6.2
python-dateutil==2.7.3
python-memcached==1.48
python-termstyle==0.1.10
pytz==2018.4
pyyaml==3.12
requests==2.18.4
requests==2.19.1
s3transfer==0.1.13
six==1.11.0
tox-battery==0.5.1
tox==3.0.0
urllib3==1.22
urllib3==1.23
virtualenv==16.0.0
wsgiref==0.1.2
19 changes: 10 additions & 9 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#
atomicwrites==1.1.5 # via pytest
attrs==18.1.0 # via pytest
boto3==1.7.32
botocore==1.10.32
backoff==1.5.0
boto3==1.7.41
botocore==1.10.41
certifi==2018.4.16
chardet==3.0.4
coverage==4.5.1 # via pytest-cov
Expand All @@ -17,27 +18,27 @@ edx-django-release-util==0.3.1
funcsigs==1.0.2 # via mock, pytest
futures==3.2.0
gunicorn==0.16.1
idna==2.6
idna==2.7
isort==4.3.4
jmespath==0.9.3
mock==2.0.0
more-itertools==4.2.0 # via pytest
mysql-python==1.2.5
newrelic==3.2.1.93
newrelic==3.2.2.94
path.py==11.0.1
pbr==4.0.3 # via mock
pbr==4.0.4 # via mock
pluggy==0.6.0 # via pytest
py==1.5.3 # via pytest
pytest-cov==2.5.1
pytest-django==3.2.1
pytest==3.6.1
pytest-django==3.3.0
pytest==3.6.2
python-dateutil==2.7.3
python-memcached==1.48
python-termstyle==0.1.10
pytz==2018.4
pyyaml==3.12
requests==2.18.4
requests==2.19.1
s3transfer==0.1.13
six==1.11.0
urllib3==1.22
urllib3==1.23
wsgiref==0.1.2
6 changes: 3 additions & 3 deletions requirements/travis.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ certifi==2018.4.16 # via requests
chardet==3.0.4 # via requests
codecov==2.0.15
coverage==4.5.1 # via codecov
idna==2.6 # via requests
idna==2.7 # via requests
pluggy==0.6.0 # via tox
py==1.5.3 # via tox
requests==2.18.4 # via codecov
requests==2.19.1 # via codecov
six==1.11.0 # via tox
tox-battery==0.5.1
tox==3.0.0
urllib3==1.22 # via requests
urllib3==1.23 # via requests
virtualenv==16.0.0 # via tox
16 changes: 16 additions & 0 deletions xqueue/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,19 @@

# This is the list of users managed by update_users
USERS = None

# If you use count_queue_submissions to submit data to AWS CloudWatch you'll need to
# provide some information for how to construct the metrics and alarms.
# It will store metrics in a namespace of xqueue/environment-deployment and create an alarm
# for each queue with an alarm on the default_threshold. If you want a different threshold
# for a given queue, thresholds has a dictionary of "queue name" : "custom limit".
# All thresholds share the sns_arn.
CLOUDWATCH_QUEUE_COUNT_METRICS = {
'environment': 'dev',
'deployment': 'stack',
'sns_arn': 'arn:aws:sns:::',
'default_threshold': 50,
'thresholds': {
'test-pull': 100
}
}

0 comments on commit f031bcf

Please sign in to comment.