From 8364417009da541e3842875ff263bcb12fc48c23 Mon Sep 17 00:00:00 2001
From: Pierre Fersing <pierref@pierref.org>
Date: Wed, 15 Nov 2017 23:44:20 +0100
Subject: [PATCH] Whitelist allowed char classes for graphite output (#3473)

---
 CHANGELOG.md                                  |  1 +
 plugins/serializers/graphite/graphite.go      | 28 +++++-
 plugins/serializers/graphite/graphite_test.go | 93 +++++++++++++++++++
 3 files changed, 118 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 420ed007..2bc5e23f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,6 +67,7 @@
 - [#3351](https://github.com/influxdata/telegraf/issues/3351): Fix prometheus passthrough for existing value types.
 - [#3430](https://github.com/influxdata/telegraf/issues/3430): Always ignore autofs filesystems in disk input.
 - [#3326](https://github.com/influxdata/telegraf/issues/3326): Fail metrics parsing on unescaped quotes.
+- [#3473](https://github.com/influxdata/telegraf/pull/3473): Whitelist allowed char classes for graphite output.
 
 ## v1.4.4 [2017-11-08]
 
diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go
index 084ba54e..157add41 100644
--- a/plugins/serializers/graphite/graphite.go
+++ b/plugins/serializers/graphite/graphite.go
@@ -2,6 +2,7 @@ package graphite
 
 import (
 	"fmt"
+	"regexp"
 	"sort"
 	"strings"
 
@@ -11,8 +12,18 @@ import (
 const DEFAULT_TEMPLATE = "host.tags.measurement.field"
 
 var (
-	fieldDeleter   = strings.NewReplacer(".FIELDNAME", "", "FIELDNAME.", "")
-	sanitizedChars = strings.NewReplacer("/", "-", "@", "-", "*", "-", " ", "_", "..", ".", `\`, "", ")", "_", "(", "_")
+	allowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
+	hypenChars   = strings.NewReplacer(
+		"/", "-",
+		"@", "-",
+		"*", "-",
+	)
+	dropChars = strings.NewReplacer(
+		`\`, "",
+		"..", ".",
+	)
+
+	fieldDeleter = strings.NewReplacer(".FIELDNAME", "", "FIELDNAME.", "")
 )
 
 type GraphiteSerializer struct {
@@ -44,7 +55,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
 		}
 		metricString := fmt.Sprintf("%s %#v %d\n",
 			// insert "field" section of template
-			sanitizedChars.Replace(InsertField(bucket, fieldName)),
+			sanitize(InsertField(bucket, fieldName)),
 			value,
 			timestamp)
 		point := []byte(metricString)
@@ -122,7 +133,7 @@ func InsertField(bucket, fieldName string) string {
 	if fieldName == "value" {
 		return fieldDeleter.Replace(bucket)
 	}
-	return strings.Replace(bucket, "FIELDNAME", fieldName, 1)
+	return strings.Replace(bucket, "FIELDNAME", strings.Replace(fieldName, ".", "_", -1), 1)
 }
 
 func buildTags(tags map[string]string) string {
@@ -143,3 +154,12 @@ func buildTags(tags map[string]string) string {
 	}
 	return tag_str
 }
+
+func sanitize(value string) string {
+	// Apply special hypenation rules to preserve backwards compatibility
+	value = hypenChars.Replace(value)
+	// Apply rule to drop some chars to preserve backwards compatibility
+	value = dropChars.Replace(value)
+	// Replace any remaining illegal chars
+	return allowedChars.ReplaceAllLiteralString(value, "_")
+}
diff --git a/plugins/serializers/graphite/graphite_test.go b/plugins/serializers/graphite/graphite_test.go
index a6dd1aaa..94792112 100644
--- a/plugins/serializers/graphite/graphite_test.go
+++ b/plugins/serializers/graphite/graphite_test.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	"github.com/influxdata/telegraf/metric"
 )
@@ -468,3 +469,95 @@ func TestTemplate6(t *testing.T) {
 	expS := "localhost.cpu0.us-west-2.cpu.FIELDNAME"
 	assert.Equal(t, expS, mS)
 }
+
+func TestClean(t *testing.T) {
+	now := time.Unix(1234567890, 0)
+	tests := []struct {
+		name        string
+		metric_name string
+		tags        map[string]string
+		fields      map[string]interface{}
+		expected    string
+	}{
+		{
+			"Base metric",
+			"cpu",
+			map[string]string{"host": "localhost"},
+			map[string]interface{}{"usage_busy": float64(8.5)},
+			"localhost.cpu.usage_busy 8.5 1234567890\n",
+		},
+		{
+			"Dot and whitespace in tags",
+			"cpu",
+			map[string]string{"host": "localhost", "label.dot and space": "value with.dot"},
+			map[string]interface{}{"usage_busy": float64(8.5)},
+			"localhost.value_with_dot.cpu.usage_busy 8.5 1234567890\n",
+		},
+		{
+			"Field with space",
+			"system",
+			map[string]string{"host": "localhost"},
+			map[string]interface{}{"uptime_format": "20 days, 23:26"},
+			"", // yes nothing. graphite don't serialize string fields
+		},
+		{
+			"Allowed punct",
+			"cpu",
+			map[string]string{"host": "localhost", "tag": "-_:="},
+			map[string]interface{}{"usage_busy": float64(10)},
+			"localhost.-_:=.cpu.usage_busy 10 1234567890\n",
+		},
+		{
+			"Special conversions to hyphen",
+			"cpu",
+			map[string]string{"host": "localhost", "tag": "/@*"},
+			map[string]interface{}{"usage_busy": float64(10)},
+			"localhost.---.cpu.usage_busy 10 1234567890\n",
+		},
+		{
+			"Special drop chars",
+			"cpu",
+			map[string]string{"host": "localhost", "tag": `\no slash`},
+			map[string]interface{}{"usage_busy": float64(10)},
+			"localhost.no_slash.cpu.usage_busy 10 1234567890\n",
+		},
+		{
+			"Empty tag & value field",
+			"cpu",
+			map[string]string{"host": "localhost"},
+			map[string]interface{}{"value": float64(10)},
+			"localhost.cpu 10 1234567890\n",
+		},
+		{
+			"Unicode Letters allowed",
+			"cpu",
+			map[string]string{"host": "localhost", "tag": "μnicodε_letters"},
+			map[string]interface{}{"value": float64(10)},
+			"localhost.μnicodε_letters.cpu 10 1234567890\n",
+		},
+		{
+			"Other Unicode not allowed",
+			"cpu",
+			map[string]string{"host": "localhost", "tag": "“☢”"},
+			map[string]interface{}{"value": float64(10)},
+			"localhost.___.cpu 10 1234567890\n",
+		},
+		{
+			"Newline in tags",
+			"cpu",
+			map[string]string{"host": "localhost", "label": "some\nthing\nwith\nnewline"},
+			map[string]interface{}{"usage_busy": float64(8.5)},
+			"localhost.some_thing_with_newline.cpu.usage_busy 8.5 1234567890\n",
+		},
+	}
+
+	s := GraphiteSerializer{}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m, err := metric.New(tt.metric_name, tt.tags, tt.fields, now)
+			assert.NoError(t, err)
+			actual, _ := s.Serialize(m)
+			require.Equal(t, tt.expected, string(actual))
+		})
+	}
+}
-- 
GitLab