Matthias Andreas Benkard | 832a54e | 2019-01-29 09:27:38 +0100 | [diff] [blame^] | 1 | // Copyright 2018 The Kubernetes Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package summary |
| 16 | |
| 17 | import ( |
| 18 | "context" |
| 19 | "fmt" |
| 20 | "math" |
| 21 | "time" |
| 22 | |
| 23 | "github.com/golang/glog" |
| 24 | "github.com/kubernetes-incubator/metrics-server/pkg/sources" |
| 25 | "github.com/prometheus/client_golang/prometheus" |
| 26 | corev1 "k8s.io/api/core/v1" |
| 27 | "k8s.io/apimachinery/pkg/api/resource" |
| 28 | "k8s.io/apimachinery/pkg/labels" |
| 29 | utilerrors "k8s.io/apimachinery/pkg/util/errors" |
| 30 | v1listers "k8s.io/client-go/listers/core/v1" |
| 31 | stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" |
| 32 | ) |
| 33 | |
| 34 | var ( |
| 35 | summaryRequestLatency = prometheus.NewHistogramVec( |
| 36 | prometheus.HistogramOpts{ |
| 37 | Namespace: "metrics_server", |
| 38 | Subsystem: "kubelet_summary", |
| 39 | Name: "request_duration_seconds", |
| 40 | Help: "The Kubelet summary request latencies in seconds.", |
| 41 | // TODO(directxman12): it would be nice to calculate these buckets off of scrape duration, |
| 42 | // like we do elsewhere, but we're not passed the scrape duration at this level. |
| 43 | Buckets: prometheus.DefBuckets, |
| 44 | }, |
| 45 | []string{"node"}, |
| 46 | ) |
| 47 | scrapeTotal = prometheus.NewCounterVec( |
| 48 | prometheus.CounterOpts{ |
| 49 | Namespace: "metrics_server", |
| 50 | Subsystem: "kubelet_summary", |
| 51 | Name: "scrapes_total", |
| 52 | Help: "Total number of attempted Summary API scrapes done by Metrics Server", |
| 53 | }, |
| 54 | []string{"success"}, |
| 55 | ) |
| 56 | ) |
| 57 | |
| 58 | func init() { |
| 59 | prometheus.MustRegister(summaryRequestLatency) |
| 60 | prometheus.MustRegister(scrapeTotal) |
| 61 | } |
| 62 | |
| 63 | // NodeInfo contains the information needed to identify and connect to a particular node |
| 64 | // (node name and preferred address). |
| 65 | type NodeInfo struct { |
| 66 | Name string |
| 67 | ConnectAddress string |
| 68 | } |
| 69 | |
| 70 | // Kubelet-provided metrics for pod and system container. |
| 71 | type summaryMetricsSource struct { |
| 72 | node NodeInfo |
| 73 | kubeletClient KubeletInterface |
| 74 | } |
| 75 | |
| 76 | func NewSummaryMetricsSource(node NodeInfo, client KubeletInterface) sources.MetricSource { |
| 77 | return &summaryMetricsSource{ |
| 78 | node: node, |
| 79 | kubeletClient: client, |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | func (src *summaryMetricsSource) Name() string { |
| 84 | return src.String() |
| 85 | } |
| 86 | |
| 87 | func (src *summaryMetricsSource) String() string { |
| 88 | return fmt.Sprintf("kubelet_summary:%s", src.node.Name) |
| 89 | } |
| 90 | |
| 91 | func (src *summaryMetricsSource) Collect(ctx context.Context) (*sources.MetricsBatch, error) { |
| 92 | summary, err := func() (*stats.Summary, error) { |
| 93 | startTime := time.Now() |
| 94 | defer summaryRequestLatency.WithLabelValues(src.node.Name).Observe(float64(time.Since(startTime)) / float64(time.Second)) |
| 95 | return src.kubeletClient.GetSummary(ctx, src.node.ConnectAddress) |
| 96 | }() |
| 97 | |
| 98 | if err != nil { |
| 99 | scrapeTotal.WithLabelValues("false").Inc() |
| 100 | return nil, fmt.Errorf("unable to fetch metrics from Kubelet %s (%s): %v", src.node.Name, src.node.ConnectAddress, err) |
| 101 | } |
| 102 | |
| 103 | scrapeTotal.WithLabelValues("true").Inc() |
| 104 | |
| 105 | res := &sources.MetricsBatch{ |
| 106 | Nodes: make([]sources.NodeMetricsPoint, 1), |
| 107 | Pods: make([]sources.PodMetricsPoint, len(summary.Pods)), |
| 108 | } |
| 109 | |
| 110 | var errs []error |
| 111 | errs = append(errs, src.decodeNodeStats(&summary.Node, &res.Nodes[0])...) |
| 112 | if len(errs) != 0 { |
| 113 | // if we had errors providing node metrics, discard the data point |
| 114 | // so that we don't incorrectly report metric values as zero. |
| 115 | res.Nodes = res.Nodes[:1] |
| 116 | } |
| 117 | |
| 118 | num := 0 |
| 119 | for _, pod := range summary.Pods { |
| 120 | podErrs := src.decodePodStats(&pod, &res.Pods[num]) |
| 121 | errs = append(errs, podErrs...) |
| 122 | if len(podErrs) != 0 { |
| 123 | // NB: we explicitly want to discard pods with partial results, since |
| 124 | // the horizontal pod autoscaler takes special action when a pod is missing |
| 125 | // metrics (and zero CPU or memory does not count as "missing metrics") |
| 126 | |
| 127 | // we don't care if we reuse slots in the result array, |
| 128 | // because they get completely overwritten in decodePodStats |
| 129 | continue |
| 130 | } |
| 131 | num++ |
| 132 | } |
| 133 | res.Pods = res.Pods[:num] |
| 134 | |
| 135 | return res, utilerrors.NewAggregate(errs) |
| 136 | } |
| 137 | |
| 138 | func (src *summaryMetricsSource) decodeNodeStats(nodeStats *stats.NodeStats, target *sources.NodeMetricsPoint) []error { |
| 139 | timestamp, err := getScrapeTime(nodeStats.CPU, nodeStats.Memory) |
| 140 | if err != nil { |
| 141 | // if we can't get a timestamp, assume bad data in general |
| 142 | return []error{fmt.Errorf("unable to get valid timestamp for metric point for node %q, discarding data: %v", src.node.ConnectAddress, err)} |
| 143 | } |
| 144 | *target = sources.NodeMetricsPoint{ |
| 145 | Name: src.node.Name, |
| 146 | MetricsPoint: sources.MetricsPoint{ |
| 147 | Timestamp: timestamp, |
| 148 | }, |
| 149 | } |
| 150 | var errs []error |
| 151 | if err := decodeCPU(&target.CpuUsage, nodeStats.CPU); err != nil { |
| 152 | errs = append(errs, fmt.Errorf("unable to get CPU for node %q, discarding data: %v", src.node.ConnectAddress, err)) |
| 153 | } |
| 154 | if err := decodeMemory(&target.MemoryUsage, nodeStats.Memory); err != nil { |
| 155 | errs = append(errs, fmt.Errorf("unable to get memory for node %q, discarding data: %v", src.node.ConnectAddress, err)) |
| 156 | } |
| 157 | return errs |
| 158 | } |
| 159 | |
| 160 | func (src *summaryMetricsSource) decodePodStats(podStats *stats.PodStats, target *sources.PodMetricsPoint) []error { |
| 161 | // completely overwrite data in the target |
| 162 | *target = sources.PodMetricsPoint{ |
| 163 | Name: podStats.PodRef.Name, |
| 164 | Namespace: podStats.PodRef.Namespace, |
| 165 | Containers: make([]sources.ContainerMetricsPoint, len(podStats.Containers)), |
| 166 | } |
| 167 | |
| 168 | var errs []error |
| 169 | for i, container := range podStats.Containers { |
| 170 | timestamp, err := getScrapeTime(container.CPU, container.Memory) |
| 171 | if err != nil { |
| 172 | // if we can't get a timestamp, assume bad data in general |
| 173 | errs = append(errs, fmt.Errorf("unable to get a valid timestamp for metric point for container %q in pod %s/%s on node %q, discarding data: %v", container.Name, target.Namespace, target.Name, src.node.ConnectAddress, err)) |
| 174 | continue |
| 175 | } |
| 176 | point := sources.ContainerMetricsPoint{ |
| 177 | Name: container.Name, |
| 178 | MetricsPoint: sources.MetricsPoint{ |
| 179 | Timestamp: timestamp, |
| 180 | }, |
| 181 | } |
| 182 | if err := decodeCPU(&point.CpuUsage, container.CPU); err != nil { |
| 183 | errs = append(errs, fmt.Errorf("unable to get CPU for container %q in pod %s/%s on node %q, discarding data: %v", container.Name, target.Namespace, target.Name, src.node.ConnectAddress, err)) |
| 184 | } |
| 185 | if err := decodeMemory(&point.MemoryUsage, container.Memory); err != nil { |
| 186 | errs = append(errs, fmt.Errorf("unable to get memory for container %q in pod %s/%s on node %q: %v, discarding data", container.Name, target.Namespace, target.Name, src.node.ConnectAddress, err)) |
| 187 | } |
| 188 | |
| 189 | target.Containers[i] = point |
| 190 | } |
| 191 | |
| 192 | return errs |
| 193 | } |
| 194 | |
| 195 | func decodeCPU(target *resource.Quantity, cpuStats *stats.CPUStats) error { |
| 196 | if cpuStats == nil || cpuStats.UsageNanoCores == nil { |
| 197 | return fmt.Errorf("missing cpu usage metric") |
| 198 | } |
| 199 | |
| 200 | *target = *uint64Quantity(*cpuStats.UsageNanoCores, -9) |
| 201 | return nil |
| 202 | } |
| 203 | |
| 204 | func decodeMemory(target *resource.Quantity, memStats *stats.MemoryStats) error { |
| 205 | if memStats == nil || memStats.WorkingSetBytes == nil { |
| 206 | return fmt.Errorf("missing memory usage metric") |
| 207 | } |
| 208 | |
| 209 | *target = *uint64Quantity(*memStats.WorkingSetBytes, 0) |
| 210 | target.Format = resource.BinarySI |
| 211 | |
| 212 | return nil |
| 213 | } |
| 214 | |
| 215 | func getScrapeTime(cpu *stats.CPUStats, memory *stats.MemoryStats) (time.Time, error) { |
| 216 | // Ensure we get the earlier timestamp so that we can tell if a given data |
| 217 | // point was tainted by pod initialization. |
| 218 | |
| 219 | var earliest *time.Time |
| 220 | if cpu != nil && !cpu.Time.IsZero() && (earliest == nil || earliest.After(cpu.Time.Time)) { |
| 221 | earliest = &cpu.Time.Time |
| 222 | } |
| 223 | |
| 224 | if memory != nil && !memory.Time.IsZero() && (earliest == nil || earliest.After(memory.Time.Time)) { |
| 225 | earliest = &memory.Time.Time |
| 226 | } |
| 227 | |
| 228 | if earliest == nil { |
| 229 | return time.Time{}, fmt.Errorf("no non-zero timestamp on either CPU or memory") |
| 230 | } |
| 231 | |
| 232 | return *earliest, nil |
| 233 | } |
| 234 | |
| 235 | // uint64Quantity converts a uint64 into a Quantity, which only has constructors |
| 236 | // that work with int64 (except for parse, which requires costly round-trips to string). |
| 237 | // We lose precision until we fit in an int64 if greater than the max int64 value. |
| 238 | func uint64Quantity(val uint64, scale resource.Scale) *resource.Quantity { |
| 239 | // easy path -- we can safely fit val into an int64 |
| 240 | if val <= math.MaxInt64 { |
| 241 | return resource.NewScaledQuantity(int64(val), scale) |
| 242 | } |
| 243 | |
| 244 | glog.V(1).Infof("unexpectedly large resource value %v, loosing precision to fit in scaled resource.Quantity", val) |
| 245 | |
| 246 | // otherwise, lose an decimal order-of-magnitude precision, |
| 247 | // so we can fit into a scaled quantity |
| 248 | return resource.NewScaledQuantity(int64(val/10), resource.Scale(1)+scale) |
| 249 | } |
| 250 | |
| 251 | type summaryProvider struct { |
| 252 | nodeLister v1listers.NodeLister |
| 253 | kubeletClient KubeletInterface |
| 254 | addrResolver NodeAddressResolver |
| 255 | } |
| 256 | |
| 257 | func (p *summaryProvider) GetMetricSources() ([]sources.MetricSource, error) { |
| 258 | sources := []sources.MetricSource{} |
| 259 | nodes, err := p.nodeLister.List(labels.Everything()) |
| 260 | if err != nil { |
| 261 | return nil, fmt.Errorf("unable to list nodes: %v", err) |
| 262 | } |
| 263 | |
| 264 | var errs []error |
| 265 | for _, node := range nodes { |
| 266 | info, err := p.getNodeInfo(node) |
| 267 | if err != nil { |
| 268 | errs = append(errs, fmt.Errorf("unable to extract connection information for node %q: %v", node.Name, err)) |
| 269 | continue |
| 270 | } |
| 271 | sources = append(sources, NewSummaryMetricsSource(info, p.kubeletClient)) |
| 272 | } |
| 273 | return sources, utilerrors.NewAggregate(errs) |
| 274 | } |
| 275 | |
| 276 | func (p *summaryProvider) getNodeInfo(node *corev1.Node) (NodeInfo, error) { |
| 277 | addr, err := p.addrResolver.NodeAddress(node) |
| 278 | if err != nil { |
| 279 | return NodeInfo{}, err |
| 280 | } |
| 281 | info := NodeInfo{ |
| 282 | Name: node.Name, |
| 283 | ConnectAddress: addr, |
| 284 | } |
| 285 | |
| 286 | return info, nil |
| 287 | } |
| 288 | |
| 289 | func NewSummaryProvider(nodeLister v1listers.NodeLister, kubeletClient KubeletInterface, addrResolver NodeAddressResolver) sources.MetricSourceProvider { |
| 290 | return &summaryProvider{ |
| 291 | nodeLister: nodeLister, |
| 292 | kubeletClient: kubeletClient, |
| 293 | addrResolver: addrResolver, |
| 294 | } |
| 295 | } |