From cf568487c8dc6d20a6e353e291d1f6184a200c08 Mon Sep 17 00:00:00 2001
From: Wu Taizeng <wutz@paratera.com>
Date: Tue, 26 Jan 2016 16:19:34 +0800
Subject: [PATCH] Fix some inputs panic will lead to the telegraf exit

closes #585
closes #584
---
 CHANGELOG.md |  1 +
 agent.go     | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63611e3e..bc0f3fdd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,7 @@ specifying a docker endpoint to get metrics from.
 - [#440](https://github.com/influxdata/telegraf/issues/440): Don't query filtered devices for disk stats.
 - [#463](https://github.com/influxdata/telegraf/issues/463): Docker plugin not working on AWS Linux
 - [#568](https://github.com/influxdata/telegraf/issues/568): Multiple output race condition.
+- [#585](https://github.com/influxdata/telegraf/pull/585): Log stack trace and continue on Telegraf panic. Thanks @wutaizeng!
 
 ## v0.10.0 [2016-01-12]
 
diff --git a/agent.go b/agent.go
index d0f82145..ee5f4502 100644
--- a/agent.go
+++ b/agent.go
@@ -7,6 +7,7 @@ import (
 	"math/big"
 	"math/rand"
 	"os"
+	"runtime"
 	"sync"
 	"time"
 
@@ -87,6 +88,18 @@ func (a *Agent) Close() error {
 	return err
 }
 
+func panicRecover(input *models.RunningInput) {
+	if err := recover(); err != nil {
+		trace := make([]byte, 2048)
+		runtime.Stack(trace, true)
+		log.Printf("FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
+			input.Name, err, trace)
+		log.Println("PLEASE REPORT THIS PANIC ON GITHUB with " +
+			"stack trace, configuration, and OS information: " +
+			"https://github.com/influxdata/telegraf/issues/new")
+	}
+}
+
 // gatherParallel runs the inputs that are using the same reporting interval
 // as the telegraf agent.
 func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
@@ -103,6 +116,7 @@ func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
 		wg.Add(1)
 		counter++
 		go func(input *models.RunningInput) {
+			defer panicRecover(input)
 			defer wg.Done()
 
 			acc := NewAccumulator(input.Config, pointChan)
@@ -148,6 +162,8 @@ func (a *Agent) gatherSeparate(
 	input *models.RunningInput,
 	pointChan chan *client.Point,
 ) error {
+	defer panicRecover(input)
+
 	ticker := time.NewTicker(input.Config.Interval)
 
 	for {
-- 
GitLab