Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleaner/easier way for user to specify Cloudwatch metric percentiles #599

Merged
merged 4 commits into from
Aug 16, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 24 additions & 17 deletions metrics/cloudwatch/cloudwatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/generic"
"github.com/go-kit/kit/metrics/internal/lv"
"strconv"
)

const (
Expand All @@ -38,7 +39,7 @@ type CloudWatch struct {
counters *lv.Space
gauges *lv.Space
histograms *lv.Space
percentiles Percentiles
percentiles []float64 // percentiles to track
logger log.Logger
numConcurrentRequests int
}
Expand All @@ -57,16 +58,19 @@ func WithLogger(logger log.Logger) option {
}
}

func WithPercentiles(p Percentiles) option {
// WithPercentiles registers the percentiles to track, overriding the
// existing/default values.
// Reason is that Cloudwatch makes you pay per metric, so you can save half the money
// by only using 2 metrics instead of the default 4.
func WithPercentiles(percentiles ...float64) option {
return func(c *CloudWatch) {
validated := Percentiles{}
for _, entry := range p {
if entry.f < 0 || entry.f > 1 {
continue // illegal entry
c.percentiles = make([]float64, 0, len(percentiles))
for _, p := range percentiles {
if p < 0 || p > 1 {
continue // illegal entry; ignore
}
validated = append(validated, entry)
c.percentiles = append(c.percentiles, p)
}
c.percentiles = validated
}
}

Expand All @@ -93,12 +97,7 @@ func New(namespace string, svc cloudwatchiface.CloudWatchAPI, options ...option)
histograms: lv.NewSpace(),
numConcurrentRequests: 10,
logger: log.NewLogfmtLogger(os.Stderr),
percentiles: Percentiles{
{"50", 0.50},
{"90", 0.90},
{"95", 0.95},
{"99", 0.99},
},
percentiles: []float64{0.50, 0.90, 0.95, 0.99},
}

for _, optFunc := range options {
Expand Down Expand Up @@ -179,17 +178,25 @@ func (cw *CloudWatch) Send() error {
return true
})

// format a [0,1]-float value to a percentile value, with minimum nr of decimals
// 0.90 -> "90"
// 0.95 -> "95"
// 0.999 -> "99.9"
formatPerc := func(p float64) string {
return strconv.FormatFloat(p*100, 'f', -1, 64)
}

cw.histograms.Reset().Walk(func(name string, lvs lv.LabelValues, values []float64) bool {
histogram := generic.NewHistogram(name, 50)

for _, v := range values {
histogram.Observe(v)
}

for _, p := range cw.percentiles {
value := histogram.Quantile(p.f)
for _, perc := range cw.percentiles {
value := histogram.Quantile(perc)
datums = append(datums, &cloudwatch.MetricDatum{
MetricName: aws.String(fmt.Sprintf("%s_%s", name, p.s)),
MetricName: aws.String(fmt.Sprintf("%s_p%s", name, formatPerc(perc))),
Dimensions: makeDimensions(lvs...),
Value: aws.Float64(value),
Timestamp: aws.Time(now),
Expand Down
56 changes: 52 additions & 4 deletions metrics/cloudwatch/cloudwatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ func TestHistogram(t *testing.T) {
svc := newMockCloudWatch()
cw := New(namespace, svc, WithLogger(log.NewNopLogger()))
histogram := cw.NewHistogram(name).With(label, value)
n50 := fmt.Sprintf("%s_50", name)
n90 := fmt.Sprintf("%s_90", name)
n95 := fmt.Sprintf("%s_95", name)
n99 := fmt.Sprintf("%s_99", name)
n50 := fmt.Sprintf("%s_p50", name)
n90 := fmt.Sprintf("%s_p90", name)
n95 := fmt.Sprintf("%s_p95", name)
n99 := fmt.Sprintf("%s_p99", name)
quantiles := func() (p50, p90, p95, p99 float64) {
err := cw.Send()
if err != nil {
Expand Down Expand Up @@ -193,4 +193,52 @@ func TestHistogram(t *testing.T) {
if err := svc.testDimensions(n99, label, value); err != nil {
t.Fatal(err)
}

// now test with only 2 custom percentiles
//
svc = newMockCloudWatch()
cw = New(namespace, svc, WithLogger(log.NewNopLogger()), WithPercentiles(0.50, 0.90))
histogram = cw.NewHistogram(name).With(label, value)

customQuantiles := func() (p50, p90, p95, p99 float64) {
err := cw.Send()
if err != nil {
t.Fatal(err)
}
svc.mtx.RLock()
defer svc.mtx.RUnlock()
p50 = svc.valuesReceived[n50]
p90 = svc.valuesReceived[n90]

// our teststat.TestHistogram wants us to give p95 and p99,
// but with custom percentiles we don't have those.
// So fake them. Maybe we should make teststat.nvq() public and use that?
p95 = 541.121341
p99 = 558.158697
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peterbourgon this may not be the nicest to hardcode; we can also make teststat.nvq() public. Do you have a preference?

Copy link
Member

@peterbourgon peterbourgon Aug 11, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's fine to just leave this [part of the] test out, for this particular metric?

I'm tentatively fine with the hard-coded percentiles, only because this whole package is getting a ground-up refactor in the short/medium term.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for you comment @peterbourgon ; Do you mean leave out the whole added part about testing WithPercentile() ?
I think that's the only way.

We can also leave it hardcoded for now; it's all fine with me. I actually do think it's useful to test the WithPercentile(), but I'm not really attached to it.

If you have a preference, let me know; otherwise I'll leave it as-is.


// but fail if they are actually set (because that would mean the
// WithPercentiles() is not respected)
if _, isSet := svc.valuesReceived[n95]; isSet {
t.Fatal("p95 should not be set")
}
if _, isSet := svc.valuesReceived[n99]; isSet {
t.Fatal("p99 should not be set")
}
return
}
if err := teststat.TestHistogram(histogram, customQuantiles, 0.01); err != nil {
t.Fatal(err)
}
if err := svc.testDimensions(n50, label, value); err != nil {
t.Fatal(err)
}
if err := svc.testDimensions(n90, label, value); err != nil {
t.Fatal(err)
}
if err := svc.testDimensions(n95, label, value); err != nil {
t.Fatal(err)
}
if err := svc.testDimensions(n99, label, value); err != nil {
t.Fatal(err)
}
}