From 52d5b19219f2441364457abf59241c064221ca5a Mon Sep 17 00:00:00 2001
From: Rene Zbinden <rene.zbinden@gmail.com>
Date: Tue, 24 May 2016 10:55:25 +0200
Subject: [PATCH] add chrony support (#1238)

* add chrony support

* remove path definition

* add changelog
---
 CHANGELOG.md                             |   1 +
 README.md                                |   1 +
 plugins/inputs/all/all.go                |   1 +
 plugins/inputs/chrony/README.md          |  91 +++++++++++++++++
 plugins/inputs/chrony/chrony.go          | 118 +++++++++++++++++++++++
 plugins/inputs/chrony/chrony_notlinux.go |   3 +
 plugins/inputs/chrony/chrony_test.go     |  95 ++++++++++++++++++
 7 files changed, 310 insertions(+)
 create mode 100644 plugins/inputs/chrony/README.md
 create mode 100644 plugins/inputs/chrony/chrony.go
 create mode 100644 plugins/inputs/chrony/chrony_notlinux.go
 create mode 100644 plugins/inputs/chrony/chrony_test.go

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 667679a5..762309ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ to "stdout".
 - [#1139](https://github.com/influxdata/telegraf/pull/1139): instrumental output plugin. Thanks @jasonroelofs!
 - [#1172](https://github.com/influxdata/telegraf/pull/1172): Ceph storage stats. Thanks @robinpercy!
 - [#1233](https://github.com/influxdata/telegraf/pull/1233): Updated golint gopsutil dependency.
+- [#1238](https://github.com/influxdata/telegraf/pull/1238): chrony input plugin. Thanks @zbindenren!
 - [#479](https://github.com/influxdata/telegraf/issues/479): per-plugin execution time added to debug output.
 
 ### Bugfixes
diff --git a/README.md b/README.md
index 3b969639..c01fa0c6 100644
--- a/README.md
+++ b/README.md
@@ -162,6 +162,7 @@ Currently implemented sources:
 * [bcache](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/bcache)
 * [cassandra](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cassandra)
 * [ceph](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/ceph)
+* [chrony](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/chrony)
 * [couchbase](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchbase)
 * [couchdb](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/couchdb)
 * [disque](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/disque)
diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go
index df739a6b..36d0724a 100644
--- a/plugins/inputs/all/all.go
+++ b/plugins/inputs/all/all.go
@@ -6,6 +6,7 @@ import (
 	_ "github.com/influxdata/telegraf/plugins/inputs/bcache"
 	_ "github.com/influxdata/telegraf/plugins/inputs/cassandra"
 	_ "github.com/influxdata/telegraf/plugins/inputs/ceph"
+	_ "github.com/influxdata/telegraf/plugins/inputs/chrony"
 	_ "github.com/influxdata/telegraf/plugins/inputs/cloudwatch"
 	_ "github.com/influxdata/telegraf/plugins/inputs/couchbase"
 	_ "github.com/influxdata/telegraf/plugins/inputs/couchdb"
diff --git a/plugins/inputs/chrony/README.md b/plugins/inputs/chrony/README.md
new file mode 100644
index 00000000..e12506ec
--- /dev/null
+++ b/plugins/inputs/chrony/README.md
@@ -0,0 +1,91 @@
+# chrony Input Plugin
+
+Get standard chrony metrics, requires chronyc executable.
+
+Below is the documentation of the various headers returned by `chronyc tracking`.
+
+- Reference ID - This is the refid and name (or IP address) if available, of the
+server to which the computer is currently synchronised. If this is 127.127.1.1
+it means the computer is not synchronised to any external source and that you
+have the ‘local’ mode operating (via the local command in chronyc (see section local),
+or the local directive in the ‘/etc/chrony.conf’ file (see section local)).
+- Stratum - The stratum indicates how many hops away from a computer with an attached
+reference clock we are. Such a computer is a stratum-1 computer, so the computer in the
+example is two hops away (i.e. a.b.c is a stratum-2 and is synchronised from a stratum-1).
+- Ref time - This is the time (UTC) at which the last measurement from the reference
+source was processed.
+- System time - In normal operation, chronyd never steps the system clock, because any
+jump in the timescale can have adverse consequences for certain application programs.
+Instead, any error in the system clock is corrected by slightly speeding up or slowing
+down the system clock until the error has been removed, and then returning to the system
+clock’s normal speed. A consequence of this is that there will be a period when the
+system clock (as read by other programs using the gettimeofday() system call, or by the
+date command in the shell) will be different from chronyd's estimate of the current true
+time (which it reports to NTP clients when it is operating in server mode). The value
+reported on this line is the difference due to this effect.
+- Last offset - This is the estimated local offset on the last clock update.
+- RMS offset - This is a long-term average of the offset value.
+- Frequency - The ‘frequency’ is the rate by which the system’s clock would be
+wrong if chronyd was not correcting it. It is expressed in ppm (parts per million).
+For example, a value of 1ppm would mean that when the system’s clock thinks it has
+advanced 1 second, it has actually advanced by 1.000001 seconds relative to true time.
+- Residual freq - This shows the ‘residual frequency’ for the currently selected
+reference source. This reflects any difference between what the measurements from the
+reference source indicate the frequency should be and the frequency currently being used.
+The reason this is not always zero is that a smoothing procedure is applied to the
+frequency. Each time a measurement from the reference source is obtained and a new
+residual frequency computed, the estimated accuracy of this residual is compared with the
+estimated accuracy (see ‘skew’ next) of the existing frequency value. A weighted average
+is computed for the new frequency, with weights depending on these accuracies. If the
+measurements from the reference source follow a consistent trend, the residual will be
+driven to zero over time.
+- Skew - This is the estimated error bound on the frequency.
+- Root delay -This is the total of the network path delays to the stratum-1 computer
+from which the computer is ultimately synchronised. In certain extreme situations, this
+value can be negative. (This can arise in a symmetric peer arrangement where the computers’
+frequencies are not tracking each other and the network delay is very short relative to the
+turn-around time at each computer.)
+- Root dispersion - This is the total dispersion accumulated through all the computers
+back to the stratum-1 computer from which the computer is ultimately synchronised.
+Dispersion is due to system clock resolution, statistical measurement variations etc.
+- Leap status - This is the leap status, which can be Normal, Insert second,
+Delete second or Not synchronised.
+
+### Configuration:
+
+```toml
+# Get standard chrony metrics, requires chronyc executable.
+[[inputs.chrony]]
+  # no configuration
+```
+
+### Measurements & Fields:
+
+- chrony
+    - last_offset (float, seconds)
+    - rms_offset (float, seconds)
+    - frequency (float, ppm)
+    - residual_freq (float, ppm)
+    - skew (float, ppm)
+    - root_delay (float, seconds)
+    - root_dispersion (float, seconds)
+    - update_interval (float, seconds)
+
+### Tags:
+
+- All measurements have the following tags:
+    - reference_id
+    - stratum
+    - leap_status
+
+### Example Output:
+
+```
+$ telegraf -config telegraf.conf -input-filter chrony -test
+* Plugin: chrony, Collection 1
+> chrony,leap_status=normal,reference_id=192.168.1.1,stratum=3 frequency=-35.657,last_offset=-0.000013616,residual_freq=-0,rms_offset=0.000027073,root_delay=0.000644,root_dispersion=0.003444,skew=0.001,update_interval=1031.2 1463750789687639161
+```
+
+
+
+
diff --git a/plugins/inputs/chrony/chrony.go b/plugins/inputs/chrony/chrony.go
new file mode 100644
index 00000000..b4d874e6
--- /dev/null
+++ b/plugins/inputs/chrony/chrony.go
@@ -0,0 +1,118 @@
+// +build linux
+
+package chrony
+
+import (
+	"errors"
+	"fmt"
+	"os/exec"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/internal"
+	"github.com/influxdata/telegraf/plugins/inputs"
+)
+
+var (
+	execCommand = exec.Command // execCommand is used to mock commands in tests.
+)
+
+type Chrony struct {
+	path string
+}
+
+func (*Chrony) Description() string {
+	return "Get standard chrony metrics, requires chronyc executable."
+}
+
+func (*Chrony) SampleConfig() string {
+	return ""
+}
+
+func (c *Chrony) Gather(acc telegraf.Accumulator) error {
+	if len(c.path) == 0 {
+		return errors.New("chronyc not found: verify that chrony is installed and that chronyc is in your PATH")
+	}
+	cmd := execCommand(c.path, "tracking")
+	out, err := internal.CombinedOutputTimeout(cmd, time.Second*5)
+	if err != nil {
+		return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
+	}
+	fields, tags, err := processChronycOutput(string(out))
+	if err != nil {
+		return err
+	}
+	acc.AddFields("chrony", fields, tags)
+	return nil
+}
+
+// processChronycOutput takes in a string output from the chronyc command, like:
+//
+//     Reference ID    : 192.168.1.22 (ntp.example.com)
+//     Stratum         : 3
+//     Ref time (UTC)  : Thu May 12 14:27:07 2016
+//     System time     : 0.000020390 seconds fast of NTP time
+//     Last offset     : +0.000012651 seconds
+//     RMS offset      : 0.000025577 seconds
+//     Frequency       : 16.001 ppm slow
+//     Residual freq   : -0.000 ppm
+//     Skew            : 0.006 ppm
+//     Root delay      : 0.001655 seconds
+//     Root dispersion : 0.003307 seconds
+//     Update interval : 507.2 seconds
+//     Leap status     : Normal
+//
+// The value on the left side of the colon is used as field name, if the first field on
+// the right side is a float. If it cannot be parsed as float, it is a tag name.
+//
+// Ref time is ignored and all names are converted to snake case.
+//
+// It returns (<fields>, <tags>)
+func processChronycOutput(out string) (map[string]interface{}, map[string]string, error) {
+	tags := map[string]string{}
+	fields := map[string]interface{}{}
+	lines := strings.Split(strings.TrimSpace(out), "\n")
+	for _, line := range lines {
+		stats := strings.Split(line, ":")
+		if len(stats) < 2 {
+			return nil, nil, fmt.Errorf("unexpected output from chronyc, expected ':' in %s", out)
+		}
+		name := strings.ToLower(strings.Replace(strings.TrimSpace(stats[0]), " ", "_", -1))
+		// ignore reference time
+		if strings.Contains(name, "time") {
+			continue
+		}
+		valueFields := strings.Fields(stats[1])
+		if len(valueFields) == 0 {
+			return nil, nil, fmt.Errorf("unexpected output from chronyc: %s", out)
+		}
+		if strings.Contains(strings.ToLower(name), "stratum") {
+			tags["stratum"] = valueFields[0]
+			continue
+		}
+		value, err := strconv.ParseFloat(valueFields[0], 64)
+		if err != nil {
+			tags[name] = strings.ToLower(valueFields[0])
+			continue
+		}
+		if strings.Contains(stats[1], "slow") {
+			value = -value
+		}
+		fields[name] = value
+	}
+
+	return fields, tags, nil
+}
+
+func init() {
+	c := Chrony{}
+	path, _ := exec.LookPath("chronyc")
+	if len(path) > 0 {
+		c.path = path
+	}
+	inputs.Add("chrony", func() telegraf.Input {
+		return &c
+	})
+}
diff --git a/plugins/inputs/chrony/chrony_notlinux.go b/plugins/inputs/chrony/chrony_notlinux.go
new file mode 100644
index 00000000..5a29cc85
--- /dev/null
+++ b/plugins/inputs/chrony/chrony_notlinux.go
@@ -0,0 +1,3 @@
+// +build !linux
+
+package chrony
diff --git a/plugins/inputs/chrony/chrony_test.go b/plugins/inputs/chrony/chrony_test.go
new file mode 100644
index 00000000..0e7d8a1a
--- /dev/null
+++ b/plugins/inputs/chrony/chrony_test.go
@@ -0,0 +1,95 @@
+// +build linux
+
+package chrony
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"testing"
+
+	"github.com/influxdata/telegraf/testutil"
+)
+
+func TestGather(t *testing.T) {
+	c := Chrony{
+		path: "chronyc",
+	}
+	// overwriting exec commands with mock commands
+	execCommand = fakeExecCommand
+	defer func() { execCommand = exec.Command }()
+	var acc testutil.Accumulator
+
+	err := c.Gather(&acc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	tags := map[string]string{
+		"reference_id": "192.168.1.22",
+		"leap_status":  "normal",
+		"stratum":      "3",
+	}
+	fields := map[string]interface{}{
+		"last_offset":     0.000012651,
+		"rms_offset":      0.000025577,
+		"frequency":       -16.001,
+		"residual_freq":   0.0,
+		"skew":            0.006,
+		"root_delay":      0.001655,
+		"root_dispersion": 0.003307,
+		"update_interval": 507.2,
+	}
+
+	acc.AssertContainsTaggedFields(t, "chrony", fields, tags)
+}
+
+// fackeExecCommand is a helper function that mock
+// the exec.Command call (and call the test binary)
+func fakeExecCommand(command string, args ...string) *exec.Cmd {
+	cs := []string{"-test.run=TestHelperProcess", "--", command}
+	cs = append(cs, args...)
+	cmd := exec.Command(os.Args[0], cs...)
+	cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
+	return cmd
+}
+
+// TestHelperProcess isn't a real test. It's used to mock exec.Command
+// For example, if you run:
+// GO_WANT_HELPER_PROCESS=1 go test -test.run=TestHelperProcess -- chrony tracking
+// it returns below mockData.
+func TestHelperProcess(t *testing.T) {
+	if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
+		return
+	}
+
+	mockData := `Reference ID    : 192.168.1.22 (ntp.example.com)
+Stratum         : 3
+Ref time (UTC)  : Thu May 12 14:27:07 2016
+System time     : 0.000020390 seconds fast of NTP time
+Last offset     : +0.000012651 seconds
+RMS offset      : 0.000025577 seconds
+Frequency       : 16.001 ppm slow
+Residual freq   : -0.000 ppm
+Skew            : 0.006 ppm
+Root delay      : 0.001655 seconds
+Root dispersion : 0.003307 seconds
+Update interval : 507.2 seconds
+Leap status     : Normal
+`
+
+	args := os.Args
+
+	// Previous arguments are tests stuff, that looks like :
+	// /tmp/go-build970079519/…/_test/integration.test -test.run=TestHelperProcess --
+	cmd, args := args[3], args[4:]
+
+	if cmd == "chronyc" && args[0] == "tracking" {
+		fmt.Fprint(os.Stdout, mockData)
+	} else {
+		fmt.Fprint(os.Stdout, "command not found")
+		os.Exit(1)
+
+	}
+	os.Exit(0)
+}
-- 
GitLab