From 70c2b83f0001718de8ce9e9484ad8846142b0af1 Mon Sep 17 00:00:00 2001
From: Daniel Nelson <daniel@wavesofdawn.com>
Date: Fri, 18 Aug 2017 13:24:05 -0700
Subject: [PATCH] Update histogram aggregator documentation (#3133)

---
 etc/telegraf.conf                          |  36 +++---
 plugins/aggregators/histogram/README.md    | 125 ++++++++-------------
 plugins/aggregators/histogram/histogram.go |  40 +++----
 3 files changed, 85 insertions(+), 116 deletions(-)

diff --git a/etc/telegraf.conf b/etc/telegraf.conf
index dcb2f158..ee0c00be 100644
--- a/etc/telegraf.conf
+++ b/etc/telegraf.conf
@@ -602,30 +602,30 @@
 #                            AGGREGATOR PLUGINS                               #
 ###############################################################################
 
-# # Keep the aggregate histogram of each metric passing through.
+# # Create aggregate histograms.
 # [[aggregators.histogram]]
-#   ## General Aggregator Arguments:
-#   ## The period on which to flush & clear the aggregator.
+#   ## The period in which to flush the aggregator.
 #   period = "30s"
+#
 #   ## If true, the original metric will be dropped by the
 #   ## aggregator and will not get sent to the output plugins.
 #   drop_original = false
 #
-#   ## The example of config to aggregate histogram for all fields of specified metric.
-#   [[aggregators.histogram.config]]
-#   ## The set of buckets.
-#   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
-#   ## The name of metric.
-#   metric_name = "cpu"
-#
-#   ## The example of config to aggregate for specified fields of metric.
-#   [[aggregators.histogram.config]]
-#   ## The set of buckets.
-#   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
-#   ## The name of metric.
-#   metric_name = "diskio"
-#   ## The concrete fields of metric
-#   metric_fields = ["io_time", "read_time", "write_time"]
+#   ## Example config that aggregates all fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "cpu"
+#
+#   ## Example config that aggregates only specific fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "diskio"
+#   #   ## The concrete fields of metric
+#   #   fields = ["io_time", "read_time", "write_time"]
 
 
 # # Keep the aggregate min/max of each metric passing through.
diff --git a/plugins/aggregators/histogram/README.md b/plugins/aggregators/histogram/README.md
index 29b7a6dc..b4525681 100644
--- a/plugins/aggregators/histogram/README.md
+++ b/plugins/aggregators/histogram/README.md
@@ -1,38 +1,25 @@
 # Histogram Aggregator Plugin
 
-#### Goal
+The histogram aggregator plugin creates histograms containing the counts of
+field values within a range.
 
-This plugin was added for ability to build histograms.
+Values added to a bucket are also added to the larger buckets in the
+distribution.  This creates a [cumulative histogram](https://en.wikipedia.org/wiki/Histogram#/media/File:Cumulative_vs_normal_histogram.svg).
 
-#### Description
+Like other Telegraf aggregators, the metric is emitted every `period` seconds.
+Bucket counts however are not reset between periods and will be non-strictly
+increasing while Telegraf is running.
 
-The histogram aggregator plugin aggregates values of specified metric's
-fields. The metric is emitted every `period` seconds. All you need to do
-is to specify borders of histogram buckets and fields, for which you want
-to aggregate histogram.
+#### Design
 
-#### How it works
-
-The each metric is passed to the aggregator and this aggregator searches
+Each metric is passed to the aggregator and this aggregator searches
 histogram buckets for those fields, which have been specified in the
-config. If buckets are found, the aggregator will put +1 to appropriate
-bucket. Otherwise, nothing will happen. Every `period` seconds these data
-will be pushed to output.
-
-Note, that the all hits of current bucket will be also added to all next
-buckets in final result of distribution. Why does it work this way? In
-configuration you define right borders for each bucket in a ascending
-sequence. Internally buckets are presented as ranges with borders
-(0..bucketBorder]: 0..1, 0..10, 0..50, …, 0..+Inf. So the value "+1" will be
-put into those buckets, in which the metric value fell with such ranges of
-buckets.
-
-This plugin creates cumulative histograms. It means, that the hits in the 
-buckets will always increase from the moment of telegraf start. But if you
-restart telegraf, all hits in the buckets will be reset to 0.
-
-Also, the algorithm of hit counting to buckets was implemented on the base
-of the algorithm, which is implemented in the Prometheus
+config. If buckets are found, the aggregator will increment +1 to the appropriate
+bucket otherwise it will be added to the `+Inf` bucket.  Every `period`
+seconds this data will be forwarded to the outputs.
+
+The algorithm of hit counting to buckets was implemented on the base
+of the algorithm which is implemented in the Prometheus
 [client](https://github.com/prometheus/client_golang/blob/master/prometheus/histogram.go).
 
 ### Configuration
@@ -40,61 +27,44 @@ of the algorithm, which is implemented in the Prometheus
 ```toml
 # Configuration for aggregate histogram metrics
 [[aggregators.histogram]]
-  ## General Aggregator Arguments:
-  ## The period on which to flush & clear the aggregator.
+  ## The period in which to flush the aggregator.
   period = "30s"
+
   ## If true, the original metric will be dropped by the
   ## aggregator and will not get sent to the output plugins.
   drop_original = false
 
-  ## The example of config to aggregate histogram for all fields of specified metric.
-  [[aggregators.histogram.config]]
-    ## The set of buckets.
-    buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
-    ## The name of metric.
-    metric_name = "cpu"
-
-  ## The example of config to aggregate histogram for concrete fields of specified metric.
-  [[aggregators.histogram.config]]
-    ## The set of buckets.
-    buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
-    ## The name of metric.
-    metric_name = "diskio"
-    ## The concrete fields of metric.
-    metric_fields = ["io_time", "read_time", "write_time"]
+  ## Example config that aggregates all fields of the metric.
+  # [[aggregators.histogram.config]]
+  #   ## The set of buckets.
+  #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+  #   ## The name of metric.
+  #   measurement_name = "cpu"
+
+  ## Example config that aggregates only specific fields of the metric.
+  # [[aggregators.histogram.config]]
+  #   ## The set of buckets.
+  #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+  #   ## The name of metric.
+  #   measurement_name = "diskio"
+  #   ## The concrete fields of metric
+  #   fields = ["io_time", "read_time", "write_time"]
 ```
 
-#### Explanation
-
-The field `metric_fields` is the list of metric fields. For example, the
-metric `cpu` has the following fields: usage_user, usage_system,
-usage_idle, usage_nice, usage_iowait, usage_irq, usage_softirq, usage_steal,
-usage_guest, usage_guest_nice.
+The user is responsible for defining the bounds of the histogram bucket as
+well as the measurement name and fields to aggregate.
 
-Note that histogram metrics will be pushed every `period` seconds. 
-As you know telegraf calls aggregator `Reset()` func each `period` seconds.
-Histogram aggregator ignores `Reset()` and continues to count hits. 
+Each histogram config section must contain a `buckets` and `measurement_name`
+option.  Optionally, if `fields` is set only the fields listed will be
+aggregated.  If `fields` is not set all fields are aggregated.
 
-#### Use cases
+The `buckets` option contains a list of floats which specify the bucket
+boundaries.  Each float value defines the inclusive upper bound of the bucket.
+The `+Inf` bucket is added automatically and does not need to be defined.
 
-You can specify fields using two cases:
-
- 1. The specifying only metric name. In this case all fields of metric
-    will be aggregated.
- 2. The specifying metric name and concrete field.
- 
-#### Some rules
- 
- - The setting of each histogram must be in separate section with title
-   `aggregators.histogram.config`.
-
- - The each value of bucket must be float value.
- 
- - Don\`t include the border bucket `+Inf`. It will be done automatically.
- 
 ### Measurements & Fields:
 
-The postfix `bucket` will be added to each field.
+The postfix `bucket` will be added to each field key.
 
 - measurement1
     - field1_bucket
@@ -102,16 +72,15 @@ The postfix `bucket` will be added to each field.
 
 ### Tags:
 
-All measurements have tag `le`. This tag has the border value of bucket. It
-means that the metric value is less or equal to the value of this tag. For
-example, let assume that we have the metric value 10 and the following
-buckets: [5, 10, 30, 70, 100]. Then the tag `le` will have the value 10,
-because the metrics value is passed into bucket with right border value `10`.
+All measurements are given the tag `le`. This tag has the border value of
+bucket. It means that the metric value is less than or equal to the value of
+this tag.  For example, let assume that we have the metric value 10 and the
+following buckets: [5, 10, 30, 70, 100]. Then the tag `le` will have the value
+10, because the metrics value is passed into bucket with right border value
+`10`.
 
 ### Example Output:
 
-The following output will return to the Prometheus client.
-
 ```
 cpu,cpu=cpu1,host=localhost,le=0.0 usage_idle_bucket=0i 1486998330000000000
 cpu,cpu=cpu1,host=localhost,le=10.0 usage_idle_bucket=0i 1486998330000000000
diff --git a/plugins/aggregators/histogram/histogram.go b/plugins/aggregators/histogram/histogram.go
index 49195533..a60cede3 100644
--- a/plugins/aggregators/histogram/histogram.go
+++ b/plugins/aggregators/histogram/histogram.go
@@ -24,8 +24,8 @@ type HistogramAggregator struct {
 
 // config is the config, which contains name, field of metric and histogram buckets.
 type config struct {
-	Metric  string   `toml:"metric_name"`
-	Fields  []string `toml:"metric_fields"`
+	Metric  string   `toml:"measurement_name"`
+	Fields  []string `toml:"fields"`
 	Buckets buckets  `toml:"buckets"`
 }
 
@@ -65,28 +65,28 @@ func NewHistogramAggregator() telegraf.Aggregator {
 }
 
 var sampleConfig = `
-  ## General Aggregator Arguments:
-  ## The period on which to flush & clear the aggregator.
+  ## The period in which to flush the aggregator.
   period = "30s"
+
   ## If true, the original metric will be dropped by the
   ## aggregator and will not get sent to the output plugins.
   drop_original = false
 
-  ## The example of config to aggregate histogram for all fields of specified metric.
-  [[aggregators.histogram.config]]
-  ## The set of buckets.
-  buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
-  ## The name of metric.
-  metric_name = "cpu"
-
-  ## The example of config to aggregate for specified fields of metric.
-  [[aggregators.histogram.config]]
-  ## The set of buckets.
-  buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
-  ## The name of metric.
-  metric_name = "diskio"
-  ## The concrete fields of metric
-  metric_fields = ["io_time", "read_time", "write_time"]
+  ## Example config that aggregates all fields of the metric.
+  # [[aggregators.histogram.config]]
+  #   ## The set of buckets.
+  #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+  #   ## The name of metric.
+  #   measurement_name = "cpu"
+
+  ## Example config that aggregates only specific fields of the metric.
+  # [[aggregators.histogram.config]]
+  #   ## The set of buckets.
+  #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+  #   ## The name of metric.
+  #   measurement_name = "diskio"
+  #   ## The concrete fields of metric
+  #   fields = ["io_time", "read_time", "write_time"]
 `
 
 // SampleConfig returns sample of config
@@ -96,7 +96,7 @@ func (h *HistogramAggregator) SampleConfig() string {
 
 // Description returns description of aggregator plugin
 func (h *HistogramAggregator) Description() string {
-	return "Keep the aggregate histogram of each metric passing through."
+	return "Create aggregate histograms."
 }
 
 // Add adds new hit to the buckets
-- 
GitLab