swarm: integrate OpenTracing; propagate ctx to internal APIs (#17169)

* swarm: propagate ctx, enable opentracing * swarm/tracing: log error when tracing is misconfigured
2018-07-13 17:40:28 +02:00
parent f7d3678c28
commit 7c9314f231
170 changed files with 21762 additions and 249 deletions
--- a/vendor/github.com/uber/jaeger-client-go/rpcmetrics/README.md
+++ b/vendor/github.com/uber/jaeger-client-go/rpcmetrics/README.md
@ -0,0 +1,5 @@
+An Observer that can be used to emit RPC metrics
+================================================
+
+It can be attached to the tracer during tracer construction.
+See `ExampleObserver` function in [observer_test.go](./observer_test.go).
--- a/vendor/github.com/uber/jaeger-client-go/rpcmetrics/doc.go
+++ b/vendor/github.com/uber/jaeger-client-go/rpcmetrics/doc.go
@ -0,0 +1,16 @@
+// Copyright (c) 2017 Uber Technologies, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package rpcmetrics implements an Observer that can be used to emit RPC metrics.
+package rpcmetrics
--- a/vendor/github.com/uber/jaeger-client-go/rpcmetrics/endpoints.go
+++ b/vendor/github.com/uber/jaeger-client-go/rpcmetrics/endpoints.go
@ -0,0 +1,63 @@
+// Copyright (c) 2017 Uber Technologies, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rpcmetrics
+
+import "sync"
+
+// normalizedEndpoints is a cache for endpointName -> safeName mappings.
+type normalizedEndpoints struct {
+	names       map[string]string
+	maxSize     int
+	defaultName string
+	normalizer  NameNormalizer
+	mux         sync.RWMutex
+}
+
+func newNormalizedEndpoints(maxSize int, normalizer NameNormalizer) *normalizedEndpoints {
+	return &normalizedEndpoints{
+		maxSize:    maxSize,
+		normalizer: normalizer,
+		names:      make(map[string]string, maxSize),
+	}
+}
+
+// normalize looks up the name in the cache, if not found it uses normalizer
+// to convert the name to a safe name. If called with more than maxSize unique
+// names it returns "" for all other names beyond those already cached.
+func (n *normalizedEndpoints) normalize(name string) string {
+	n.mux.RLock()
+	norm, ok := n.names[name]
+	l := len(n.names)
+	n.mux.RUnlock()
+	if ok {
+		return norm
+	}
+	if l >= n.maxSize {
+		return ""
+	}
+	return n.normalizeWithLock(name)
+}
+
+func (n *normalizedEndpoints) normalizeWithLock(name string) string {
+	norm := n.normalizer.Normalize(name)
+	n.mux.Lock()
+	defer n.mux.Unlock()
+	// cache may have grown while we were not holding the lock
+	if len(n.names) >= n.maxSize {
+		return ""
+	}
+	n.names[name] = norm
+	return norm
+}
--- a/vendor/github.com/uber/jaeger-client-go/rpcmetrics/metrics.go
+++ b/vendor/github.com/uber/jaeger-client-go/rpcmetrics/metrics.go
@ -0,0 +1,124 @@
+// Copyright (c) 2017 Uber Technologies, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rpcmetrics
+
+import (
+	"sync"
+
+	"github.com/uber/jaeger-lib/metrics"
+)
+
+const (
+	otherEndpointsPlaceholder = "other"
+	endpointNameMetricTag     = "endpoint"
+)
+
+// Metrics is a collection of metrics for an endpoint describing
+// throughput, success, errors, and performance.
+type Metrics struct {
+	// RequestCountSuccess is a counter of the total number of successes.
+	RequestCountSuccess metrics.Counter `metric:"requests" tags:"error=false"`
+
+	// RequestCountFailures is a counter of the number of times any failure has been observed.
+	RequestCountFailures metrics.Counter `metric:"requests" tags:"error=true"`
+
+	// RequestLatencySuccess is a latency histogram of succesful requests.
+	RequestLatencySuccess metrics.Timer `metric:"request_latency" tags:"error=false"`
+
+	// RequestLatencyFailures is a latency histogram of failed requests.
+	RequestLatencyFailures metrics.Timer `metric:"request_latency" tags:"error=true"`
+
+	// HTTPStatusCode2xx is a counter of the total number of requests with HTTP status code 200-299
+	HTTPStatusCode2xx metrics.Counter `metric:"http_requests" tags:"status_code=2xx"`
+
+	// HTTPStatusCode3xx is a counter of the total number of requests with HTTP status code 300-399
+	HTTPStatusCode3xx metrics.Counter `metric:"http_requests" tags:"status_code=3xx"`
+
+	// HTTPStatusCode4xx is a counter of the total number of requests with HTTP status code 400-499
+	HTTPStatusCode4xx metrics.Counter `metric:"http_requests" tags:"status_code=4xx"`
+
+	// HTTPStatusCode5xx is a counter of the total number of requests with HTTP status code 500-599
+	HTTPStatusCode5xx metrics.Counter `metric:"http_requests" tags:"status_code=5xx"`
+}
+
+func (m *Metrics) recordHTTPStatusCode(statusCode uint16) {
+	if statusCode >= 200 && statusCode < 300 {
+		m.HTTPStatusCode2xx.Inc(1)
+	} else if statusCode >= 300 && statusCode < 400 {
+		m.HTTPStatusCode3xx.Inc(1)
+	} else if statusCode >= 400 && statusCode < 500 {
+		m.HTTPStatusCode4xx.Inc(1)
+	} else if statusCode >= 500 && statusCode < 600 {
+		m.HTTPStatusCode5xx.Inc(1)
+	}
+}
+
+// MetricsByEndpoint is a registry/cache of metrics for each unique endpoint name.
+// Only maxNumberOfEndpoints Metrics are stored, all other endpoint names are mapped
+// to a generic endpoint name "other".
+type MetricsByEndpoint struct {
+	metricsFactory    metrics.Factory
+	endpoints         *normalizedEndpoints
+	metricsByEndpoint map[string]*Metrics
+	mux               sync.RWMutex
+}
+
+func newMetricsByEndpoint(
+	metricsFactory metrics.Factory,
+	normalizer NameNormalizer,
+	maxNumberOfEndpoints int,
+) *MetricsByEndpoint {
+	return &MetricsByEndpoint{
+		metricsFactory:    metricsFactory,
+		endpoints:         newNormalizedEndpoints(maxNumberOfEndpoints, normalizer),
+		metricsByEndpoint: make(map[string]*Metrics, maxNumberOfEndpoints+1), // +1 for "other"
+	}
+}
+
+func (m *MetricsByEndpoint) get(endpoint string) *Metrics {
+	safeName := m.endpoints.normalize(endpoint)
+	if safeName == "" {
+		safeName = otherEndpointsPlaceholder
+	}
+	m.mux.RLock()
+	met := m.metricsByEndpoint[safeName]
+	m.mux.RUnlock()
+	if met != nil {
+		return met
+	}
+
+	return m.getWithWriteLock(safeName)
+}
+
+// split to make easier to test
+func (m *MetricsByEndpoint) getWithWriteLock(safeName string) *Metrics {
+	m.mux.Lock()
+	defer m.mux.Unlock()
+
+	// it is possible that the name has been already registered after we released
+	// the read lock and before we grabbed the write lock, so check for that.
+	if met, ok := m.metricsByEndpoint[safeName]; ok {
+		return met
+	}
+
+	// it would be nice to create the struct before locking, since Init() is somewhat
+	// expensive, however some metrics backends (e.g. expvar) may not like duplicate metrics.
+	met := &Metrics{}
+	tags := map[string]string{endpointNameMetricTag: safeName}
+	metrics.Init(met, m.metricsFactory, tags)
+
+	m.metricsByEndpoint[safeName] = met
+	return met
+}
--- a/vendor/github.com/uber/jaeger-client-go/rpcmetrics/normalizer.go
+++ b/vendor/github.com/uber/jaeger-client-go/rpcmetrics/normalizer.go
@ -0,0 +1,101 @@
+// Copyright (c) 2017 Uber Technologies, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rpcmetrics
+
+// NameNormalizer is used to convert the endpoint names to strings
+// that can be safely used as tags in the metrics.
+type NameNormalizer interface {
+	Normalize(name string) string
+}
+
+// DefaultNameNormalizer converts endpoint names so that they contain only characters
+// from the safe charset [a-zA-Z0-9-./_]. All other characters are replaced with '-'.
+var DefaultNameNormalizer = &SimpleNameNormalizer{
+	SafeSets: []SafeCharacterSet{
+		&Range{From: 'a', To: 'z'},
+		&Range{From: 'A', To: 'Z'},
+		&Range{From: '0', To: '9'},
+		&Char{'-'},
+		&Char{'_'},
+		&Char{'/'},
+		&Char{'.'},
+	},
+	Replacement: '-',
+}
+
+// SimpleNameNormalizer uses a set of safe character sets.
+type SimpleNameNormalizer struct {
+	SafeSets    []SafeCharacterSet
+	Replacement byte
+}
+
+// SafeCharacterSet determines if the given character is "safe"
+type SafeCharacterSet interface {
+	IsSafe(c byte) bool
+}
+
+// Range implements SafeCharacterSet
+type Range struct {
+	From, To byte
+}
+
+// IsSafe implements SafeCharacterSet
+func (r *Range) IsSafe(c byte) bool {
+	return c >= r.From && c <= r.To
+}
+
+// Char implements SafeCharacterSet
+type Char struct {
+	Val byte
+}
+
+// IsSafe implements SafeCharacterSet
+func (ch *Char) IsSafe(c byte) bool {
+	return c == ch.Val
+}
+
+// Normalize checks each character in the string against SafeSets,
+// and if it's not safe substitutes it with Replacement.
+func (n *SimpleNameNormalizer) Normalize(name string) string {
+	var retMe []byte
+	nameBytes := []byte(name)
+	for i, b := range nameBytes {
+		if n.safeByte(b) {
+			if retMe != nil {
+				retMe[i] = b
+			}
+		} else {
+			if retMe == nil {
+				retMe = make([]byte, len(nameBytes))
+				copy(retMe[0:i], nameBytes[0:i])
+			}
+			retMe[i] = n.Replacement
+		}
+	}
+	if retMe == nil {
+		return name
+	}
+	return string(retMe)
+}
+
+// safeByte checks if b against all safe charsets.
+func (n *SimpleNameNormalizer) safeByte(b byte) bool {
+	for i := range n.SafeSets {
+		if n.SafeSets[i].IsSafe(b) {
+			return true
+		}
+	}
+	return false
+}
--- a/vendor/github.com/uber/jaeger-client-go/rpcmetrics/observer.go
+++ b/vendor/github.com/uber/jaeger-client-go/rpcmetrics/observer.go
@ -0,0 +1,171 @@
+// Copyright (c) 2017 Uber Technologies, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package rpcmetrics
+
+import (
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/opentracing/opentracing-go"
+	"github.com/opentracing/opentracing-go/ext"
+	"github.com/uber/jaeger-lib/metrics"
+
+	jaeger "github.com/uber/jaeger-client-go"
+)
+
+const defaultMaxNumberOfEndpoints = 200
+
+// Observer is an observer that can emit RPC metrics.
+type Observer struct {
+	metricsByEndpoint *MetricsByEndpoint
+}
+
+// NewObserver creates a new observer that can emit RPC metrics.
+func NewObserver(metricsFactory metrics.Factory, normalizer NameNormalizer) *Observer {
+	return &Observer{
+		metricsByEndpoint: newMetricsByEndpoint(
+			metricsFactory,
+			normalizer,
+			defaultMaxNumberOfEndpoints,
+		),
+	}
+}
+
+// OnStartSpan creates a new Observer for the span.
+func (o *Observer) OnStartSpan(
+	operationName string,
+	options opentracing.StartSpanOptions,
+) jaeger.SpanObserver {
+	return NewSpanObserver(o.metricsByEndpoint, operationName, options)
+}
+
+// SpanKind identifies the span as inboud, outbound, or internal
+type SpanKind int
+
+const (
+	// Local span kind
+	Local SpanKind = iota
+	// Inbound span kind
+	Inbound
+	// Outbound span kind
+	Outbound
+)
+
+// SpanObserver collects RPC metrics
+type SpanObserver struct {
+	metricsByEndpoint *MetricsByEndpoint
+	operationName     string
+	startTime         time.Time
+	mux               sync.Mutex
+	kind              SpanKind
+	httpStatusCode    uint16
+	err               bool
+}
+
+// NewSpanObserver creates a new SpanObserver that can emit RPC metrics.
+func NewSpanObserver(
+	metricsByEndpoint *MetricsByEndpoint,
+	operationName string,
+	options opentracing.StartSpanOptions,
+) *SpanObserver {
+	so := &SpanObserver{
+		metricsByEndpoint: metricsByEndpoint,
+		operationName:     operationName,
+		startTime:         options.StartTime,
+	}
+	for k, v := range options.Tags {
+		so.handleTagInLock(k, v)
+	}
+	return so
+}
+
+// handleTags watches for special tags
+// - SpanKind
+// - HttpStatusCode
+// - Error
+func (so *SpanObserver) handleTagInLock(key string, value interface{}) {
+	if key == string(ext.SpanKind) {
+		if v, ok := value.(ext.SpanKindEnum); ok {
+			value = string(v)
+		}
+		if v, ok := value.(string); ok {
+			if v == string(ext.SpanKindRPCClientEnum) {
+				so.kind = Outbound
+			} else if v == string(ext.SpanKindRPCServerEnum) {
+				so.kind = Inbound
+			}
+		}
+		return
+	}
+	if key == string(ext.HTTPStatusCode) {
+		if v, ok := value.(uint16); ok {
+			so.httpStatusCode = v
+		} else if v, ok := value.(int); ok {
+			so.httpStatusCode = uint16(v)
+		} else if v, ok := value.(string); ok {
+			if vv, err := strconv.Atoi(v); err == nil {
+				so.httpStatusCode = uint16(vv)
+			}
+		}
+		return
+	}
+	if key == string(ext.Error) {
+		if v, ok := value.(bool); ok {
+			so.err = v
+		} else if v, ok := value.(string); ok {
+			if vv, err := strconv.ParseBool(v); err == nil {
+				so.err = vv
+			}
+		}
+		return
+	}
+}
+
+// OnFinish emits the RPC metrics. It only has an effect when operation name
+// is not blank, and the span kind is an RPC server.
+func (so *SpanObserver) OnFinish(options opentracing.FinishOptions) {
+	so.mux.Lock()
+	defer so.mux.Unlock()
+
+	if so.operationName == "" || so.kind != Inbound {
+		return
+	}
+
+	mets := so.metricsByEndpoint.get(so.operationName)
+	latency := options.FinishTime.Sub(so.startTime)
+	if so.err {
+		mets.RequestCountFailures.Inc(1)
+		mets.RequestLatencyFailures.Record(latency)
+	} else {
+		mets.RequestCountSuccess.Inc(1)
+		mets.RequestLatencySuccess.Record(latency)
+	}
+	mets.recordHTTPStatusCode(so.httpStatusCode)
+}
+
+// OnSetOperationName records new operation name.
+func (so *SpanObserver) OnSetOperationName(operationName string) {
+	so.mux.Lock()
+	so.operationName = operationName
+	so.mux.Unlock()
+}
+
+// OnSetTag implements SpanObserver
+func (so *SpanObserver) OnSetTag(key string, value interface{}) {
+	so.mux.Lock()
+	so.handleTagInLock(key, value)
+	so.mux.Unlock()
+}