diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1aae1fff986..41530d46542 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,6 +1,6 @@ /web/ui @juliusv /web/ui/module @juliusv @nexucis -/storage/remote @csmarchbanks @cstyan @bwplotka @tomwilkie +/storage/remote @cstyan @bwplotka @tomwilkie /storage/remote/otlptranslator @gouthamve @jesusvazquez /discovery/kubernetes @brancz /tsdb @jesusvazquez diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index f6d5c9191a3..dc0694394bf 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -23,7 +23,7 @@ jobs: with: input: 'prompb' against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD~1,subdir=prompb' - - uses: bufbuild/buf-push-action@342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 # v1.1.1 + - uses: bufbuild/buf-push-action@a654ff18effe4641ebea4a4ce242c49800728459 # v1.1.1 with: input: 'prompb' buf_token: ${{ secrets.BUF_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 22d3f8ad56c..f1e2b66bf1c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,6 +151,7 @@ jobs: uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0 with: args: --verbose + # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. version: v1.55.2 fuzzing: uses: ./.github/workflows/fuzzing.yml @@ -196,7 +197,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - name: Install nodejs - uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d # v3.8.1 + uses: actions/setup-node@b39b52d1213e96004bfcb1c61a8a6fa8ab84f3e8 # v4.0.1 with: node-version-file: "web/ui/.nvmrc" registry-url: "https://registry.npmjs.org" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 5e14936a95c..fd1ef19ef32 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -30,12 +30,12 @@ jobs: go-version: 1.21.x - name: Initialize CodeQL - uses: github/codeql-action/init@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8 + uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8 + uses: github/codeql-action/autobuild@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8 + uses: github/codeql-action/analyze@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 13f04f772ed..59975706071 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index f71e1331b0b..a668a4ceb0c 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3.1.3 + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # tag=v4.0.0 with: name: SARIF file path: results.sarif @@ -45,6 +45,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@407ffafae6a767df3e0230c3df91b6443ae8df75 # tag=v2.22.8 + uses: github/codeql-action/upload-sarif@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # tag=v3.22.12 with: sarif_file: results.sarif diff --git a/.golangci.yml b/.golangci.yml index 01ba9deb048..166b2e0d480 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -36,13 +36,9 @@ issues: - path: _test.go linters: - errcheck - - path: tsdb/ + - path: "tsdb/head_wal.go" linters: - errorlint - - path: tsdb/ - text: "import 'github.com/pkg/errors' is not allowed" - linters: - - depguard - linters: - godot source: "^// ===" diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f71eb49ba7..0b8a42c8ff1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## 2.50.0 / 2024-02-22 + +* [CHANGE] Remote Write: Error `storage.ErrTooOldSample` is now generating HTTP error 400 instead of HTTP error 500. #13335 +* [FEATURE] Remote Write: Drop old inmemory samples. Activated using the config entry `sample_age_limit`. #13002 +* [FEATURE] **Experimental**: Add support for ingesting zeros as created timestamps. (enabled under the feature-flag `created-timestamp-zero-ingestion`). #12733 #13279 +* [FEATURE] Promtool: Add `analyze` histograms command. #12331 +* [FEATURE] TSDB/compaction: Add a way to enable overlapping compaction. #13282 #13393 #13398 +* [FEATURE] Add automatic memory limit handling. Activated using the feature flag. `auto-gomemlimit` #13395 +* [ENHANCEMENT] Promtool: allow specifying multiple matchers in `promtool tsdb dump`. #13296 +* [ENHANCEMENT] PromQL: Restore more efficient version of `NewPossibleNonCounterInfo` annotation. #13022 +* [ENHANCEMENT] Kuma SD: Extend configuration to allow users to specify client ID. #13278 +* [ENHANCEMENT] PromQL: Use natural sort in `sort_by_label` and `sort_by_label_desc`. This is **experimental**. #13411 +* [ENHANCEMENT] Native Histograms: support `native_histogram_min_bucket_factor` in scrape_config. #13222 +* [ENHANCEMENT] Native Histograms: Issue warning if histogramRate is applied to the wrong kind of histogram. #13392 +* [ENHANCEMENT] TSDB: Make transaction isolation data structures smaller. #13015 +* [ENHANCEMENT] TSDB/postings: Optimize merge using Loser Tree. #12878 +* [ENHANCEMENT] TSDB: Simplify internal series delete function. #13261 +* [ENHANCEMENT] Agent: Performance improvement by making the global hash lookup table smaller. #13262 +* [ENHANCEMENT] PromQL: faster execution of metric functions, e.g. abs(), rate() #13446 +* [ENHANCEMENT] TSDB: Optimize label values with matchers by taking shortcuts. #13426 +* [ENHANCEMENT] Kubernetes SD: Check preconditions earlier and avoid unnecessary checks or iterations in kube_sd. #13408 +* [ENHANCEMENT] Promtool: Improve visibility for `promtool test rules` with JSON colored formatting. #13342 +* [ENHANCEMENT] Consoles: Exclude iowait and steal from CPU Utilisation. #9593 +* [ENHANCEMENT] Various improvements and optimizations on Native Histograms. #13267, #13215, #13276 #13289, #13340 +* [BUGFIX] Scraping: Fix quality value in HTTP Accept header. #13313 +* [BUGFIX] UI: Fix usage of the function `time()` that was crashing. #13371 +* [BUGFIX] Azure SD: Fix SD crashing when it finds a VM scale set. #13578 + ## 2.49.1 / 2024-01-15 * [BUGFIX] TSDB: Fixed a wrong `q=` value in scrape accept header #13313 diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 902e9a6e949..e2fa9c29491 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -9,7 +9,7 @@ Julien Pivotto ( / @roidelapluie) and Levi Harrison * `documentation` * `prometheus-mixin`: Matthias Loibl ( / @metalmatze) * `storage` - * `remote`: Chris Marchbanks ( / @csmarchbanks), Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie ( / @tomwilkie) + * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie ( / @tomwilkie) * `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez) * `agent`: Robert Fratto ( / @rfratto) * `web` diff --git a/README.md b/README.md index 5fa6cc49e5b..0042793ff64 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ examples and guides.

[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486) [![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/prometheus/prometheus) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/prometheus.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:prometheus) -[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus) diff --git a/RELEASE.md b/RELEASE.md index 6ab2f638996..6815308f477 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -54,7 +54,8 @@ Release cadence of first pre-releases being cut is 6 weeks. | v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) | | v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) | | v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) | -| v2.50 | 2024-01-16 | **searching for volunteer** | +| v2.50 | 2024-01-16 | Augustin Husson (GitHub: @nexucis) | +| v2.51 | 2024-02-13 | **searching for volunteer** | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. diff --git a/VERSION b/VERSION index f5518081bd1..9e29315acba 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.49.1 +2.50.0 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index dfafe66c6e5..f7244646e23 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -33,6 +33,7 @@ import ( "syscall" "time" + "github.com/KimMachineGun/automemlimit/memlimit" "github.com/alecthomas/kingpin/v2" "github.com/alecthomas/units" "github.com/go-kit/log" @@ -147,13 +148,15 @@ type flagConfig struct { queryMaxSamples int RemoteFlushDeadline model.Duration - featureList []string + featureList []string + memlimitRatio float64 // These options are extracted from featureList // for ease of use. enableExpandExternalLabels bool enableNewSDManager bool enablePerStepStats bool enableAutoGOMAXPROCS bool + enableAutoGOMEMLIMIT bool prometheusURL string corsRegexString string @@ -197,6 +200,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "auto-gomaxprocs": c.enableAutoGOMAXPROCS = true level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota") + case "auto-gomemlimit": + c.enableAutoGOMEMLIMIT = true + level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit") case "no-default-scrape-port": c.scrape.NoDefaultPort = true level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.") @@ -206,9 +212,15 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "native-histograms": c.tsdb.EnableNativeHistograms = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. - config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols - config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols + config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + case "created-timestamp-zero-ingestion": + c.scrape.EnableCreatedTimestampZeroIngestion = true + // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. + config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols + level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "": continue case "promql-at-modifier", "promql-negative-offset": @@ -256,6 +268,9 @@ func main() { a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry."). Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress) + a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory"). + Default("0.9").FloatVar(&cfg.memlimitRatio) + webConfig := a.Flag( "web.config.file", "[EXPERIMENTAL] Path to configuration file that can enable TLS or authentication.", @@ -423,7 +438,7 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) promlogflag.AddFlags(a, &cfg.promlogConfig) @@ -461,6 +476,11 @@ func main() { os.Exit(3) } + if cfg.memlimitRatio <= 0.0 || cfg.memlimitRatio > 1.0 { + fmt.Fprintf(os.Stderr, "--auto-gomemlimit.ratio must be greater than 0 and less than or equal to 1.") + os.Exit(1) + } + localStoragePath := cfg.serverStoragePath if agentMode { localStoragePath = cfg.agentStoragePath @@ -614,14 +634,59 @@ func main() { discoveryManagerNotify discoveryManager ) + // Kubernetes client metrics are used by Kubernetes SD. + // They are registered here in the main function, because SD mechanisms + // can only register metrics specific to a SD instance. + // Kubernetes client metrics are the same for the whole process - + // they are not specific to an SD instance. + err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer) + if err != nil { + level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err) + os.Exit(1) + } + + sdMetrics, err := discovery.CreateAndRegisterSDMetrics(prometheus.DefaultRegisterer) + if err != nil { + level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + os.Exit(1) + } + if cfg.enableNewSDManager { - discovery.RegisterMetrics() - discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape")) - discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify")) + { + discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager scrape") + os.Exit(1) + } + discoveryManagerScrape = discMgr + } + + { + discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager notify") + os.Exit(1) + } + discoveryManagerNotify = discMgr + } } else { - legacymanager.RegisterMetrics() - discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), legacymanager.Name("scrape")) - discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify")) + { + discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("scrape")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager scrape") + os.Exit(1) + } + discoveryManagerScrape = discMgr + } + + { + discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("notify")) + if discMgr == nil { + level.Error(logger).Log("msg", "failed to create a discovery manager notify") + os.Exit(1) + } + discoveryManagerNotify = discMgr + } } scrapeManager, err := scrape.NewManager( @@ -651,6 +716,20 @@ func main() { } } + if cfg.enableAutoGOMEMLIMIT { + if _, err := memlimit.SetGoMemLimitWithOpts( + memlimit.WithRatio(cfg.memlimitRatio), + memlimit.WithProvider( + memlimit.ApplyFallback( + memlimit.FromCgroup, + memlimit.FromSystem, + ), + ), + ); err != nil { + level.Warn(logger).Log("component", "automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) + } + } + if !agentMode { opts := promql.EngineOpts{ Logger: log.With(logger, "component", "query engine"), @@ -1449,6 +1528,10 @@ func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, return 0, tsdb.ErrNotReady } +func (n notReadyAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + return 0, tsdb.ErrNotReady +} + func (n notReadyAppender) Commit() error { return tsdb.ErrNotReady } func (n notReadyAppender) Rollback() error { return tsdb.ErrNotReady } @@ -1587,7 +1670,6 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond), MaxBytes: int64(opts.MaxBytes), NoLockfile: opts.NoLockfile, - AllowOverlappingCompaction: true, WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType), HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize, SamplesPerChunk: opts.SamplesPerChunk, @@ -1599,6 +1681,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown, EnableNativeHistograms: opts.EnableNativeHistograms, OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow, + EnableOverlappingCompaction: true, } } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 7224e25d708..417d062d66a 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -12,7 +12,6 @@ // limitations under the License. // //go:build !windows -// +build !windows package main diff --git a/cmd/promtool/analyze.go b/cmd/promtool/analyze.go new file mode 100644 index 00000000000..c1f523de525 --- /dev/null +++ b/cmd/promtool/analyze.go @@ -0,0 +1,370 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "errors" + "fmt" + "io" + "math" + "net/http" + "net/url" + "os" + "sort" + "strconv" + "strings" + "time" + + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/labels" +) + +var ( + errNotNativeHistogram = fmt.Errorf("not a native histogram") + errNotEnoughData = fmt.Errorf("not enough data") + + outputHeader = `Bucket stats for each histogram series over time +------------------------------------------------ +First the min, avg, and max number of populated buckets, followed by the total +number of buckets (only if different from the max number of populated buckets +which is typical for classic but not native histograms).` + outputFooter = `Aggregated bucket stats +----------------------- +Each line shows min/avg/max over the series above.` +) + +type QueryAnalyzeConfig struct { + metricType string + duration time.Duration + time string + matchers []string +} + +// run retrieves metrics that look like conventional histograms (i.e. have _bucket +// suffixes) or native histograms, depending on metricType flag. +func (c *QueryAnalyzeConfig) run(url *url.URL, roundtripper http.RoundTripper) error { + if c.metricType != "histogram" { + return fmt.Errorf("analyze type is %s, must be 'histogram'", c.metricType) + } + + ctx := context.Background() + + api, err := newAPI(url, roundtripper, nil) + if err != nil { + return err + } + + var endTime time.Time + if c.time != "" { + endTime, err = parseTime(c.time) + if err != nil { + return fmt.Errorf("error parsing time '%s': %w", c.time, err) + } + } else { + endTime = time.Now() + } + + return c.getStatsFromMetrics(ctx, api, endTime, os.Stdout, c.matchers) +} + +func (c *QueryAnalyzeConfig) getStatsFromMetrics(ctx context.Context, api v1.API, endTime time.Time, out io.Writer, matchers []string) error { + fmt.Fprintf(out, "%s\n\n", outputHeader) + metastatsNative := newMetaStatistics() + metastatsClassic := newMetaStatistics() + for _, matcher := range matchers { + seriesSel := seriesSelector(matcher, c.duration) + matrix, err := querySamples(ctx, api, seriesSel, endTime) + if err != nil { + return err + } + + matrices := make(map[string]model.Matrix) + for _, series := range matrix { + // We do not handle mixed types. If there are float values, we assume it is a + // classic histogram, otherwise we assume it is a native histogram, and we + // ignore series with errors if they do not match the expected type. + if len(series.Values) == 0 { + stats, err := calcNativeBucketStatistics(series) + if err != nil { + if errors.Is(err, errNotNativeHistogram) || errors.Is(err, errNotEnoughData) { + continue + } + return err + } + fmt.Fprintf(out, "- %s (native): %v\n", series.Metric, *stats) + metastatsNative.update(stats) + } else { + lbs := model.LabelSet(series.Metric).Clone() + if _, ok := lbs["le"]; !ok { + continue + } + metricName := string(lbs[labels.MetricName]) + if !strings.HasSuffix(metricName, "_bucket") { + continue + } + delete(lbs, labels.MetricName) + delete(lbs, "le") + key := formatSeriesName(metricName, lbs) + matrices[key] = append(matrices[key], series) + } + } + + for key, matrix := range matrices { + stats, err := calcClassicBucketStatistics(matrix) + if err != nil { + if errors.Is(err, errNotEnoughData) { + continue + } + return err + } + fmt.Fprintf(out, "- %s (classic): %v\n", key, *stats) + metastatsClassic.update(stats) + } + } + fmt.Fprintf(out, "\n%s\n", outputFooter) + if metastatsNative.Count() > 0 { + fmt.Fprintf(out, "\nNative %s\n", metastatsNative) + } + if metastatsClassic.Count() > 0 { + fmt.Fprintf(out, "\nClassic %s\n", metastatsClassic) + } + return nil +} + +func seriesSelector(metricName string, duration time.Duration) string { + builder := strings.Builder{} + builder.WriteString(metricName) + builder.WriteRune('[') + builder.WriteString(duration.String()) + builder.WriteRune(']') + return builder.String() +} + +func formatSeriesName(metricName string, lbs model.LabelSet) string { + builder := strings.Builder{} + builder.WriteString(metricName) + builder.WriteString(lbs.String()) + return builder.String() +} + +func querySamples(ctx context.Context, api v1.API, query string, end time.Time) (model.Matrix, error) { + values, _, err := api.Query(ctx, query, end) + if err != nil { + return nil, err + } + + matrix, ok := values.(model.Matrix) + if !ok { + return nil, fmt.Errorf("query of buckets resulted in non-Matrix") + } + + return matrix, nil +} + +// minPop/avgPop/maxPop is for the number of populated (non-zero) buckets. +// total is the total number of buckets across all samples in the series, +// populated or not. +type statistics struct { + minPop, maxPop, total int + avgPop float64 +} + +func (s statistics) String() string { + if s.maxPop == s.total { + return fmt.Sprintf("%d/%.3f/%d", s.minPop, s.avgPop, s.maxPop) + } + return fmt.Sprintf("%d/%.3f/%d/%d", s.minPop, s.avgPop, s.maxPop, s.total) +} + +func calcClassicBucketStatistics(matrix model.Matrix) (*statistics, error) { + numBuckets := len(matrix) + + stats := &statistics{ + minPop: math.MaxInt, + total: numBuckets, + } + + if numBuckets == 0 || len(matrix[0].Values) < 2 { + return stats, errNotEnoughData + } + + numSamples := len(matrix[0].Values) + + sortMatrix(matrix) + + totalPop := 0 + for timeIdx := 0; timeIdx < numSamples; timeIdx++ { + curr, err := getBucketCountsAtTime(matrix, numBuckets, timeIdx) + if err != nil { + return stats, err + } + countPop := 0 + for _, b := range curr { + if b != 0 { + countPop++ + } + } + + totalPop += countPop + if stats.minPop > countPop { + stats.minPop = countPop + } + if stats.maxPop < countPop { + stats.maxPop = countPop + } + } + stats.avgPop = float64(totalPop) / float64(numSamples) + return stats, nil +} + +func sortMatrix(matrix model.Matrix) { + sort.SliceStable(matrix, func(i, j int) bool { + return getLe(matrix[i]) < getLe(matrix[j]) + }) +} + +func getLe(series *model.SampleStream) float64 { + lbs := model.LabelSet(series.Metric) + le, _ := strconv.ParseFloat(string(lbs["le"]), 64) + return le +} + +func getBucketCountsAtTime(matrix model.Matrix, numBuckets, timeIdx int) ([]int, error) { + counts := make([]int, numBuckets) + if timeIdx >= len(matrix[0].Values) { + // Just return zeroes instead of erroring out so we can get partial results. + return counts, nil + } + counts[0] = int(matrix[0].Values[timeIdx].Value) + for i, bucket := range matrix[1:] { + if timeIdx >= len(bucket.Values) { + // Just return zeroes instead of erroring out so we can get partial results. + return counts, nil + } + curr := bucket.Values[timeIdx] + prev := matrix[i].Values[timeIdx] + // Assume the results are nicely aligned. + if curr.Timestamp != prev.Timestamp { + return counts, fmt.Errorf("matrix result is not time aligned") + } + counts[i+1] = int(curr.Value - prev.Value) + } + return counts, nil +} + +type bucketBounds struct { + boundaries int32 + upper, lower float64 +} + +func makeBucketBounds(b *model.HistogramBucket) bucketBounds { + return bucketBounds{ + boundaries: b.Boundaries, + upper: float64(b.Upper), + lower: float64(b.Lower), + } +} + +func calcNativeBucketStatistics(series *model.SampleStream) (*statistics, error) { + stats := &statistics{ + minPop: math.MaxInt, + } + + overall := make(map[bucketBounds]struct{}) + totalPop := 0 + if len(series.Histograms) == 0 { + return nil, errNotNativeHistogram + } + if len(series.Histograms) == 1 { + return nil, errNotEnoughData + } + for _, histogram := range series.Histograms { + for _, bucket := range histogram.Histogram.Buckets { + bb := makeBucketBounds(bucket) + overall[bb] = struct{}{} + } + countPop := len(histogram.Histogram.Buckets) + + totalPop += countPop + if stats.minPop > countPop { + stats.minPop = countPop + } + if stats.maxPop < countPop { + stats.maxPop = countPop + } + } + stats.avgPop = float64(totalPop) / float64(len(series.Histograms)) + stats.total = len(overall) + return stats, nil +} + +type distribution struct { + min, max, count int + avg float64 +} + +func newDistribution() distribution { + return distribution{ + min: math.MaxInt, + } +} + +func (d *distribution) update(num int) { + if d.min > num { + d.min = num + } + if d.max < num { + d.max = num + } + d.count++ + d.avg += float64(num)/float64(d.count) - d.avg/float64(d.count) +} + +func (d distribution) String() string { + return fmt.Sprintf("%d/%.3f/%d", d.min, d.avg, d.max) +} + +type metaStatistics struct { + minPop, avgPop, maxPop, total distribution +} + +func newMetaStatistics() *metaStatistics { + return &metaStatistics{ + minPop: newDistribution(), + avgPop: newDistribution(), + maxPop: newDistribution(), + total: newDistribution(), + } +} + +func (ms metaStatistics) Count() int { + return ms.minPop.count +} + +func (ms metaStatistics) String() string { + if ms.maxPop == ms.total { + return fmt.Sprintf("histogram series (%d in total):\n- min populated: %v\n- avg populated: %v\n- max populated: %v", ms.Count(), ms.minPop, ms.avgPop, ms.maxPop) + } + return fmt.Sprintf("histogram series (%d in total):\n- min populated: %v\n- avg populated: %v\n- max populated: %v\n- total: %v", ms.Count(), ms.minPop, ms.avgPop, ms.maxPop, ms.total) +} + +func (ms *metaStatistics) update(s *statistics) { + ms.minPop.update(s.minPop) + ms.avgPop.update(int(s.avgPop)) + ms.maxPop.update(s.maxPop) + ms.total.update(s.total) +} diff --git a/cmd/promtool/analyze_test.go b/cmd/promtool/analyze_test.go new file mode 100644 index 00000000000..83d2ac4a3db --- /dev/null +++ b/cmd/promtool/analyze_test.go @@ -0,0 +1,170 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/common/model" +) + +var ( + exampleMatrix = model.Matrix{ + &model.SampleStream{ + Metric: model.Metric{ + "le": "+Inf", + }, + Values: []model.SamplePair{ + { + Value: 31, + Timestamp: 100, + }, + { + Value: 32, + Timestamp: 200, + }, + { + Value: 40, + Timestamp: 300, + }, + }, + }, + &model.SampleStream{ + Metric: model.Metric{ + "le": "0.5", + }, + Values: []model.SamplePair{ + { + Value: 10, + Timestamp: 100, + }, + { + Value: 11, + Timestamp: 200, + }, + { + Value: 11, + Timestamp: 300, + }, + }, + }, + &model.SampleStream{ + Metric: model.Metric{ + "le": "10", + }, + Values: []model.SamplePair{ + { + Value: 30, + Timestamp: 100, + }, + { + Value: 31, + Timestamp: 200, + }, + { + Value: 37, + Timestamp: 300, + }, + }, + }, + &model.SampleStream{ + Metric: model.Metric{ + "le": "2", + }, + Values: []model.SamplePair{ + { + Value: 25, + Timestamp: 100, + }, + { + Value: 26, + Timestamp: 200, + }, + { + Value: 27, + Timestamp: 300, + }, + }, + }, + } + exampleMatrixLength = len(exampleMatrix) +) + +func init() { + sortMatrix(exampleMatrix) +} + +func TestGetBucketCountsAtTime(t *testing.T) { + cases := []struct { + matrix model.Matrix + length int + timeIdx int + expected []int + }{ + { + exampleMatrix, + exampleMatrixLength, + 0, + []int{10, 15, 5, 1}, + }, + { + exampleMatrix, + exampleMatrixLength, + 1, + []int{11, 15, 5, 1}, + }, + { + exampleMatrix, + exampleMatrixLength, + 2, + []int{11, 16, 10, 3}, + }, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("exampleMatrix@%d", c.timeIdx), func(t *testing.T) { + res, err := getBucketCountsAtTime(c.matrix, c.length, c.timeIdx) + require.NoError(t, err) + require.Equal(t, c.expected, res) + }) + } +} + +func TestCalcClassicBucketStatistics(t *testing.T) { + cases := []struct { + matrix model.Matrix + expected *statistics + }{ + { + exampleMatrix, + &statistics{ + minPop: 4, + avgPop: 4, + maxPop: 4, + total: 4, + }, + }, + } + + for i, c := range cases { + t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { + res, err := calcClassicBucketStatistics(c.matrix) + require.NoError(t, err) + require.Equal(t, c.expected, res) + }) + } +} diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index f0b2719c90f..0332c33eaa3 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -35,8 +35,7 @@ import ( "github.com/go-kit/log" "github.com/google/pprof/profile" "github.com/prometheus/client_golang/api" - v1 "github.com/prometheus/client_golang/api/prometheus/v1" - "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil/promlint" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -184,6 +183,14 @@ func main() { queryLabelsEnd := queryLabelsCmd.Flag("end", "End time (RFC3339 or Unix timestamp).").String() queryLabelsMatch := queryLabelsCmd.Flag("match", "Series selector. Can be specified multiple times.").Strings() + queryAnalyzeCfg := &QueryAnalyzeConfig{} + queryAnalyzeCmd := queryCmd.Command("analyze", "Run queries against your Prometheus to analyze the usage pattern of certain metrics.") + queryAnalyzeCmd.Flag("server", "Prometheus server to query.").Required().URLVar(&serverURL) + queryAnalyzeCmd.Flag("type", "Type of metric: histogram.").Required().StringVar(&queryAnalyzeCfg.metricType) + queryAnalyzeCmd.Flag("duration", "Time frame to analyze.").Default("1h").DurationVar(&queryAnalyzeCfg.duration) + queryAnalyzeCmd.Flag("time", "Query time (RFC3339 or Unix timestamp), defaults to now.").StringVar(&queryAnalyzeCfg.time) + queryAnalyzeCmd.Flag("match", "Series selector. Can be specified multiple times.").Required().StringsVar(&queryAnalyzeCfg.matchers) + pushCmd := app.Command("push", "Push to a Prometheus server.") pushCmd.Flag("http.config.file", "HTTP client configuration file for promtool to connect to Prometheus.").PlaceHolder("").ExistingFileVar(&httpConfigFilePath) pushMetricsCmd := pushCmd.Command("metrics", "Push metrics to a prometheus remote write (for testing purpose only).") @@ -203,6 +210,7 @@ func main() { "test-rule-file", "The unit test file.", ).Required().ExistingFiles() + testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() defaultDBPath := "data/" tsdbCmd := app.Command("tsdb", "Run tsdb commands.") @@ -229,7 +237,7 @@ func main() { dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() - dumpMatch := tsdbDumpCmd.Flag("match", "Series selector.").Default("{__name__=~'(?s:.*)'}").String() + dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.") importHumanReadable := importCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool() @@ -317,7 +325,7 @@ func main() { switch parsedCmd { case sdCheckCmd.FullCommand(): - os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort)) + os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) @@ -368,6 +376,7 @@ func main() { EnableNegativeOffset: true, }, *testRulesRun, + *testRulesDiff, *testRulesFiles...), ) @@ -389,6 +398,9 @@ func main() { case importRulesCmd.FullCommand(): os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...))) + case queryAnalyzeCmd.FullCommand(): + os.Exit(checkErr(queryAnalyzeCfg.run(serverURL, httpRoundTripper))) + case documentationCmd.FullCommand(): os.Exit(checkErr(documentcli.GenerateMarkdown(app.Model(), os.Stdout))) @@ -996,246 +1008,6 @@ func checkMetricsExtended(r io.Reader) ([]metricStat, int, error) { return stats, total, nil } -// QueryInstant performs an instant query against a Prometheus server. -func QueryInstant(url *url.URL, roundTripper http.RoundTripper, query, evalTime string, p printer) int { - if url.Scheme == "" { - url.Scheme = "http" - } - config := api.Config{ - Address: url.String(), - RoundTripper: roundTripper, - } - - // Create new client. - c, err := api.NewClient(config) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating API client:", err) - return failureExitCode - } - - eTime := time.Now() - if evalTime != "" { - eTime, err = parseTime(evalTime) - if err != nil { - fmt.Fprintln(os.Stderr, "error parsing evaluation time:", err) - return failureExitCode - } - } - - // Run query against client. - api := v1.NewAPI(c) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, _, err := api.Query(ctx, query, eTime) // Ignoring warnings for now. - cancel() - if err != nil { - return handleAPIError(err) - } - - p.printValue(val) - - return successExitCode -} - -// QueryRange performs a range query against a Prometheus server. -func QueryRange(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, query, start, end string, step time.Duration, p printer) int { - if url.Scheme == "" { - url.Scheme = "http" - } - config := api.Config{ - Address: url.String(), - RoundTripper: roundTripper, - } - - if len(headers) > 0 { - config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { - for key, value := range headers { - req.Header.Add(key, value) - } - return roundTripper.RoundTrip(req) - }) - } - - // Create new client. - c, err := api.NewClient(config) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating API client:", err) - return failureExitCode - } - - var stime, etime time.Time - - if end == "" { - etime = time.Now() - } else { - etime, err = parseTime(end) - if err != nil { - fmt.Fprintln(os.Stderr, "error parsing end time:", err) - return failureExitCode - } - } - - if start == "" { - stime = etime.Add(-5 * time.Minute) - } else { - stime, err = parseTime(start) - if err != nil { - fmt.Fprintln(os.Stderr, "error parsing start time:", err) - return failureExitCode - } - } - - if !stime.Before(etime) { - fmt.Fprintln(os.Stderr, "start time is not before end time") - return failureExitCode - } - - if step == 0 { - resolution := math.Max(math.Floor(etime.Sub(stime).Seconds()/250), 1) - // Convert seconds to nanoseconds such that time.Duration parses correctly. - step = time.Duration(resolution) * time.Second - } - - // Run query against client. - api := v1.NewAPI(c) - r := v1.Range{Start: stime, End: etime, Step: step} - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, _, err := api.QueryRange(ctx, query, r) // Ignoring warnings for now. - cancel() - - if err != nil { - return handleAPIError(err) - } - - p.printValue(val) - return successExitCode -} - -// QuerySeries queries for a series against a Prometheus server. -func QuerySeries(url *url.URL, roundTripper http.RoundTripper, matchers []string, start, end string, p printer) int { - if url.Scheme == "" { - url.Scheme = "http" - } - config := api.Config{ - Address: url.String(), - RoundTripper: roundTripper, - } - - // Create new client. - c, err := api.NewClient(config) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating API client:", err) - return failureExitCode - } - - stime, etime, err := parseStartTimeAndEndTime(start, end) - if err != nil { - fmt.Fprintln(os.Stderr, err) - return failureExitCode - } - - // Run query against client. - api := v1.NewAPI(c) - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, _, err := api.Series(ctx, matchers, stime, etime) // Ignoring warnings for now. - cancel() - - if err != nil { - return handleAPIError(err) - } - - p.printSeries(val) - return successExitCode -} - -// QueryLabels queries for label values against a Prometheus server. -func QueryLabels(url *url.URL, roundTripper http.RoundTripper, matchers []string, name, start, end string, p printer) int { - if url.Scheme == "" { - url.Scheme = "http" - } - config := api.Config{ - Address: url.String(), - RoundTripper: roundTripper, - } - - // Create new client. - c, err := api.NewClient(config) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating API client:", err) - return failureExitCode - } - - stime, etime, err := parseStartTimeAndEndTime(start, end) - if err != nil { - fmt.Fprintln(os.Stderr, err) - return failureExitCode - } - - // Run query against client. - api := v1.NewAPI(c) - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, warn, err := api.LabelValues(ctx, name, matchers, stime, etime) - cancel() - - for _, v := range warn { - fmt.Fprintln(os.Stderr, "query warning:", v) - } - if err != nil { - return handleAPIError(err) - } - - p.printLabelValues(val) - return successExitCode -} - -func handleAPIError(err error) int { - var apiErr *v1.Error - if errors.As(err, &apiErr) && apiErr.Detail != "" { - fmt.Fprintf(os.Stderr, "query error: %v (detail: %s)\n", apiErr, strings.TrimSpace(apiErr.Detail)) - } else { - fmt.Fprintln(os.Stderr, "query error:", err) - } - - return failureExitCode -} - -func parseStartTimeAndEndTime(start, end string) (time.Time, time.Time, error) { - var ( - minTime = time.Now().Add(-9999 * time.Hour) - maxTime = time.Now().Add(9999 * time.Hour) - err error - ) - - stime := minTime - etime := maxTime - - if start != "" { - stime, err = parseTime(start) - if err != nil { - return stime, etime, fmt.Errorf("error parsing start time: %w", err) - } - } - - if end != "" { - etime, err = parseTime(end) - if err != nil { - return stime, etime, fmt.Errorf("error parsing end time: %w", err) - } - } - return stime, etime, nil -} - -func parseTime(s string) (time.Time, error) { - if t, err := strconv.ParseFloat(s, 64); err == nil { - s, ns := math.Modf(t) - return time.Unix(int64(s), int64(ns*float64(time.Second))).UTC(), nil - } - if t, err := time.Parse(time.RFC3339Nano, s); err == nil { - return t, nil - } - return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s) -} - type endpointsGroup struct { urlToFilename map[string]string postProcess func(b []byte) ([]byte, error) @@ -1389,15 +1161,12 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu evalInterval: evalInterval, maxBlockDuration: maxBlockDuration, } - client, err := api.NewClient(api.Config{ - Address: url.String(), - RoundTripper: roundTripper, - }) + api, err := newAPI(url, roundTripper, nil) if err != nil { return fmt.Errorf("new api client error: %w", err) } - ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, v1.NewAPI(client)) + ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api) errs := ruleImporter.loadGroups(ctx, files) for _, err := range errs { if err != nil { diff --git a/cmd/promtool/query.go b/cmd/promtool/query.go new file mode 100644 index 00000000000..0d7cb12cf42 --- /dev/null +++ b/cmd/promtool/query.go @@ -0,0 +1,251 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "errors" + "fmt" + "math" + "net/http" + "net/url" + "os" + "strconv" + "strings" + "time" + + "github.com/prometheus/client_golang/api" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus/promhttp" + + _ "github.com/prometheus/prometheus/plugins" // Register plugins. +) + +func newAPI(url *url.URL, roundTripper http.RoundTripper, headers map[string]string) (v1.API, error) { + if url.Scheme == "" { + url.Scheme = "http" + } + config := api.Config{ + Address: url.String(), + RoundTripper: roundTripper, + } + + if len(headers) > 0 { + config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + for key, value := range headers { + req.Header.Add(key, value) + } + return roundTripper.RoundTrip(req) + }) + } + + // Create new client. + client, err := api.NewClient(config) + if err != nil { + return nil, err + } + + api := v1.NewAPI(client) + return api, nil +} + +// QueryInstant performs an instant query against a Prometheus server. +func QueryInstant(url *url.URL, roundTripper http.RoundTripper, query, evalTime string, p printer) int { + api, err := newAPI(url, roundTripper, nil) + if err != nil { + fmt.Fprintln(os.Stderr, "error creating API client:", err) + return failureExitCode + } + + eTime := time.Now() + if evalTime != "" { + eTime, err = parseTime(evalTime) + if err != nil { + fmt.Fprintln(os.Stderr, "error parsing evaluation time:", err) + return failureExitCode + } + } + + // Run query against client. + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + val, _, err := api.Query(ctx, query, eTime) // Ignoring warnings for now. + cancel() + if err != nil { + return handleAPIError(err) + } + + p.printValue(val) + + return successExitCode +} + +// QueryRange performs a range query against a Prometheus server. +func QueryRange(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, query, start, end string, step time.Duration, p printer) int { + api, err := newAPI(url, roundTripper, headers) + if err != nil { + fmt.Fprintln(os.Stderr, "error creating API client:", err) + return failureExitCode + } + + var stime, etime time.Time + + if end == "" { + etime = time.Now() + } else { + etime, err = parseTime(end) + if err != nil { + fmt.Fprintln(os.Stderr, "error parsing end time:", err) + return failureExitCode + } + } + + if start == "" { + stime = etime.Add(-5 * time.Minute) + } else { + stime, err = parseTime(start) + if err != nil { + fmt.Fprintln(os.Stderr, "error parsing start time:", err) + return failureExitCode + } + } + + if !stime.Before(etime) { + fmt.Fprintln(os.Stderr, "start time is not before end time") + return failureExitCode + } + + if step == 0 { + resolution := math.Max(math.Floor(etime.Sub(stime).Seconds()/250), 1) + // Convert seconds to nanoseconds such that time.Duration parses correctly. + step = time.Duration(resolution) * time.Second + } + + // Run query against client. + r := v1.Range{Start: stime, End: etime, Step: step} + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + val, _, err := api.QueryRange(ctx, query, r) // Ignoring warnings for now. + cancel() + + if err != nil { + return handleAPIError(err) + } + + p.printValue(val) + return successExitCode +} + +// QuerySeries queries for a series against a Prometheus server. +func QuerySeries(url *url.URL, roundTripper http.RoundTripper, matchers []string, start, end string, p printer) int { + api, err := newAPI(url, roundTripper, nil) + if err != nil { + fmt.Fprintln(os.Stderr, "error creating API client:", err) + return failureExitCode + } + + stime, etime, err := parseStartTimeAndEndTime(start, end) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return failureExitCode + } + + // Run query against client. + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + val, _, err := api.Series(ctx, matchers, stime, etime) // Ignoring warnings for now. + cancel() + + if err != nil { + return handleAPIError(err) + } + + p.printSeries(val) + return successExitCode +} + +// QueryLabels queries for label values against a Prometheus server. +func QueryLabels(url *url.URL, roundTripper http.RoundTripper, matchers []string, name, start, end string, p printer) int { + api, err := newAPI(url, roundTripper, nil) + if err != nil { + fmt.Fprintln(os.Stderr, "error creating API client:", err) + return failureExitCode + } + + stime, etime, err := parseStartTimeAndEndTime(start, end) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return failureExitCode + } + + // Run query against client. + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + val, warn, err := api.LabelValues(ctx, name, matchers, stime, etime) + cancel() + + for _, v := range warn { + fmt.Fprintln(os.Stderr, "query warning:", v) + } + if err != nil { + return handleAPIError(err) + } + + p.printLabelValues(val) + return successExitCode +} + +func handleAPIError(err error) int { + var apiErr *v1.Error + if errors.As(err, &apiErr) && apiErr.Detail != "" { + fmt.Fprintf(os.Stderr, "query error: %v (detail: %s)\n", apiErr, strings.TrimSpace(apiErr.Detail)) + } else { + fmt.Fprintln(os.Stderr, "query error:", err) + } + + return failureExitCode +} + +func parseStartTimeAndEndTime(start, end string) (time.Time, time.Time, error) { + var ( + minTime = time.Now().Add(-9999 * time.Hour) + maxTime = time.Now().Add(9999 * time.Hour) + err error + ) + + stime := minTime + etime := maxTime + + if start != "" { + stime, err = parseTime(start) + if err != nil { + return stime, etime, fmt.Errorf("error parsing start time: %w", err) + } + } + + if end != "" { + etime, err = parseTime(end) + if err != nil { + return stime, etime, fmt.Errorf("error parsing end time: %w", err) + } + } + return stime, etime, nil +} + +func parseTime(s string) (time.Time, error) { + if t, err := strconv.ParseFloat(s, 64); err == nil { + s, ns := math.Modf(t) + return time.Unix(int64(s), int64(ns*float64(time.Second))).UTC(), nil + } + if t, err := time.Parse(time.RFC3339Nano, s); err == nil { + return t, nil + } + return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s) +} diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index 7c5ae703653..4892743fc06 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -22,6 +22,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" @@ -37,7 +38,7 @@ type sdCheckResult struct { } // CheckSD performs service discovery for the given job name and reports the results. -func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool) int { +func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int { logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) @@ -77,12 +78,25 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefault defer cancel() for _, cfg := range scrapeConfig.ServiceDiscoveryConfigs { - d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger}) + reg := prometheus.NewRegistry() + refreshMetrics := discovery.NewRefreshMetrics(reg) + metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) + err := metrics.Register() + if err != nil { + fmt.Fprintln(os.Stderr, "Could not register service discovery metrics", err) + return failureExitCode + } + + d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger, Metrics: metrics}) if err != nil { fmt.Fprintln(os.Stderr, "Could not create new discoverer", err) return failureExitCode } - go d.Run(ctx, targetGroupChan) + go func() { + d.Run(ctx, targetGroupChan) + metrics.Unregister() + refreshMetrics.Unregister() + }() } var targetGroups []*targetgroup.Group diff --git a/cmd/promtool/testdata/dump-test-1.prom b/cmd/promtool/testdata/dump-test-1.prom new file mode 100644 index 00000000000..878cdecab8a --- /dev/null +++ b/cmd/promtool/testdata/dump-test-1.prom @@ -0,0 +1,15 @@ +{__name__="heavy_metric", foo="bar"} 5 0 +{__name__="heavy_metric", foo="bar"} 4 60000 +{__name__="heavy_metric", foo="bar"} 3 120000 +{__name__="heavy_metric", foo="bar"} 2 180000 +{__name__="heavy_metric", foo="bar"} 1 240000 +{__name__="heavy_metric", foo="foo"} 5 0 +{__name__="heavy_metric", foo="foo"} 4 60000 +{__name__="heavy_metric", foo="foo"} 3 120000 +{__name__="heavy_metric", foo="foo"} 2 180000 +{__name__="heavy_metric", foo="foo"} 1 240000 +{__name__="metric", baz="abc", foo="bar"} 1 0 +{__name__="metric", baz="abc", foo="bar"} 2 60000 +{__name__="metric", baz="abc", foo="bar"} 3 120000 +{__name__="metric", baz="abc", foo="bar"} 4 180000 +{__name__="metric", baz="abc", foo="bar"} 5 240000 diff --git a/cmd/promtool/testdata/dump-test-2.prom b/cmd/promtool/testdata/dump-test-2.prom new file mode 100644 index 00000000000..4ac2ffa5aec --- /dev/null +++ b/cmd/promtool/testdata/dump-test-2.prom @@ -0,0 +1,10 @@ +{__name__="heavy_metric", foo="foo"} 5 0 +{__name__="heavy_metric", foo="foo"} 4 60000 +{__name__="heavy_metric", foo="foo"} 3 120000 +{__name__="heavy_metric", foo="foo"} 2 180000 +{__name__="heavy_metric", foo="foo"} 1 240000 +{__name__="metric", baz="abc", foo="bar"} 1 0 +{__name__="metric", baz="abc", foo="bar"} 2 60000 +{__name__="metric", baz="abc", foo="bar"} 3 120000 +{__name__="metric", baz="abc", foo="bar"} 4 180000 +{__name__="metric", baz="abc", foo="bar"} 5 240000 diff --git a/cmd/promtool/testdata/dump-test-3.prom b/cmd/promtool/testdata/dump-test-3.prom new file mode 100644 index 00000000000..faa278101ed --- /dev/null +++ b/cmd/promtool/testdata/dump-test-3.prom @@ -0,0 +1,2 @@ +{__name__="metric", baz="abc", foo="bar"} 2 60000 +{__name__="metric", baz="abc", foo="bar"} 3 120000 diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index e6df9b78cf2..4bba8421c2d 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -667,7 +667,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. it := fhchk.Iterator(nil) bucketCount := 0 for it.Next() == chunkenc.ValFloatHistogram { - _, f := it.AtFloatHistogram() + _, f := it.AtFloatHistogram(nil) bucketCount += len(f.PositiveBuckets) bucketCount += len(f.NegativeBuckets) } @@ -682,7 +682,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. it := hchk.Iterator(nil) bucketCount := 0 for it.Next() == chunkenc.ValHistogram { - _, f := it.AtHistogram() + _, f := it.AtHistogram(nil) bucketCount += len(f.PositiveBuckets) bucketCount += len(f.NegativeBuckets) } @@ -706,7 +706,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. return nil } -func dumpSamples(ctx context.Context, path string, mint, maxt int64, match string) (err error) { +func dumpSamples(ctx context.Context, path string, mint, maxt int64, match []string) (err error) { db, err := tsdb.OpenDBReadOnly(path, nil) if err != nil { return err @@ -720,11 +720,21 @@ func dumpSamples(ctx context.Context, path string, mint, maxt int64, match strin } defer q.Close() - matchers, err := parser.ParseMetricSelector(match) + matcherSets, err := parser.ParseMetricSelectors(match) if err != nil { return err } - ss := q.Select(ctx, false, nil, matchers...) + + var ss storage.SeriesSet + if len(matcherSets) > 1 { + var sets []storage.SeriesSet + for _, mset := range matcherSets { + sets = append(sets, q.Select(ctx, true, nil, mset...)) + } + ss = storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) + } else { + ss = q.Select(ctx, false, nil, matcherSets[0]...) + } for ss.Next() { series := ss.At() @@ -735,11 +745,11 @@ func dumpSamples(ctx context.Context, path string, mint, maxt int64, match strin fmt.Printf("%s %g %d\n", lbs, val, ts) } for it.Next() == chunkenc.ValFloatHistogram { - ts, fh := it.AtFloatHistogram() + ts, fh := it.AtFloatHistogram(nil) fmt.Printf("%s %s %d\n", lbs, fh.String(), ts) } for it.Next() == chunkenc.ValHistogram { - ts, h := it.AtHistogram() + ts, h := it.AtHistogram(nil) fmt.Printf("%s %s %d\n", lbs, h.String(), ts) } if it.Err() != nil { diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index 0f0040cd3dc..aeb51a07e03 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -14,9 +14,18 @@ package main import ( + "bytes" + "context" + "io" + "math" + "os" + "runtime" + "strings" "testing" "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/promql" ) func TestGenerateBucket(t *testing.T) { @@ -41,3 +50,101 @@ func TestGenerateBucket(t *testing.T) { require.Equal(t, tc.step, step) } } + +// getDumpedSamples dumps samples and returns them. +func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []string) string { + t.Helper() + + oldStdout := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + err := dumpSamples( + context.Background(), + path, + mint, + maxt, + match, + ) + require.NoError(t, err) + + w.Close() + os.Stdout = oldStdout + + var buf bytes.Buffer + io.Copy(&buf, r) + return buf.String() +} + +func TestTSDBDump(t *testing.T) { + storage := promql.LoadedStorage(t, ` + load 1m + metric{foo="bar", baz="abc"} 1 2 3 4 5 + heavy_metric{foo="bar"} 5 4 3 2 1 + heavy_metric{foo="foo"} 5 4 3 2 1 + `) + + tests := []struct { + name string + mint int64 + maxt int64 + match []string + expectedDump string + }{ + { + name: "default match", + mint: math.MinInt64, + maxt: math.MaxInt64, + match: []string{"{__name__=~'(?s:.*)'}"}, + expectedDump: "testdata/dump-test-1.prom", + }, + { + name: "same matcher twice", + mint: math.MinInt64, + maxt: math.MaxInt64, + match: []string{"{foo=~'.+'}", "{foo=~'.+'}"}, + expectedDump: "testdata/dump-test-1.prom", + }, + { + name: "no duplication", + mint: math.MinInt64, + maxt: math.MaxInt64, + match: []string{"{__name__=~'(?s:.*)'}", "{baz='abc'}"}, + expectedDump: "testdata/dump-test-1.prom", + }, + { + name: "well merged", + mint: math.MinInt64, + maxt: math.MaxInt64, + match: []string{"{__name__='heavy_metric'}", "{baz='abc'}"}, + expectedDump: "testdata/dump-test-1.prom", + }, + { + name: "multi matchers", + mint: math.MinInt64, + maxt: math.MaxInt64, + match: []string{"{__name__='heavy_metric',foo='foo'}", "{__name__='metric'}"}, + expectedDump: "testdata/dump-test-2.prom", + }, + { + name: "with reduced mint and maxt", + mint: int64(60000), + maxt: int64(120000), + match: []string{"{__name__='metric'}"}, + expectedDump: "testdata/dump-test-3.prom", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.mint, tt.maxt, tt.match) + expectedMetrics, err := os.ReadFile(tt.expectedDump) + require.NoError(t, err) + if strings.Contains(runtime.GOOS, "windows") { + // We use "/n" while dumping on windows as well. + expectedMetrics = bytes.ReplaceAll(expectedMetrics, []byte("\r\n"), []byte("\n")) + } + // even though in case of one matcher samples are not sorted, the order in the cases above should stay the same. + require.Equal(t, string(expectedMetrics), dumpedMetrics) + }) + } +} diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index a25a8596d42..a89288c44a4 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -15,6 +15,7 @@ package main import ( "context" + "encoding/json" "errors" "fmt" "os" @@ -27,6 +28,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/regexp" + "github.com/nsf/jsondiff" "github.com/prometheus/common/model" "gopkg.in/yaml.v2" @@ -40,7 +42,7 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files ...string) int { +func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { failed := false var run *regexp.Regexp @@ -49,7 +51,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files . } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -67,7 +69,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files . return successExitCode } -func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp) []error { +func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error { fmt.Println("Unit Testing: ", filename) b, err := os.ReadFile(filename) @@ -109,7 +111,7 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp. if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(evalInterval, groupOrderMap, queryOpts, unitTestInp.RuleFiles...) + ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...) if ers != nil { errs = append(errs, ers...) } @@ -173,7 +175,7 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promql.LazyLoaderOpts, ruleFiles ...string) []error { +func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promql.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) []error { // Setup testing suite. suite, err := promql.NewLazyLoader(nil, tg.seriesLoadingString(), queryOpts) if err != nil { @@ -345,8 +347,44 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i } expString := indentLines(expAlerts.String(), " ") gotString := indentLines(gotAlerts.String(), " ") - errs = append(errs, fmt.Errorf("%s alertname: %s, time: %s, \n exp:%v, \n got:%v", - testName, testcase.Alertname, testcase.EvalTime.String(), expString, gotString)) + if diffFlag { + // If empty, populates an empty value + if gotAlerts.Len() == 0 { + gotAlerts = append(gotAlerts, labelAndAnnotation{ + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + }) + } + // If empty, populates an empty value + if expAlerts.Len() == 0 { + expAlerts = append(expAlerts, labelAndAnnotation{ + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + }) + } + + diffOpts := jsondiff.DefaultConsoleOptions() + expAlertsJSON, err := json.Marshal(expAlerts) + if err != nil { + errs = append(errs, fmt.Errorf("error marshaling expected %s alert: [%s]", tg.TestGroupName, err.Error())) + continue + } + + gotAlertsJSON, err := json.Marshal(gotAlerts) + if err != nil { + errs = append(errs, fmt.Errorf("error marshaling received %s alert: [%s]", tg.TestGroupName, err.Error())) + continue + } + + res, diff := jsondiff.Compare(expAlertsJSON, gotAlertsJSON, &diffOpts) + if res != jsondiff.FullMatch { + errs = append(errs, fmt.Errorf("%s alertname: %s, time: %s, \n diff: %v", + testName, testcase.Alertname, testcase.EvalTime.String(), indentLines(diff, " "))) + } + } else { + errs = append(errs, fmt.Errorf("%s alertname: %s, time: %s, \n exp:%v, \n got:%v", + testName, testcase.Alertname, testcase.EvalTime.String(), expString, gotString)) + } } } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index fb4012e3c14..b8170d784e4 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -125,7 +125,7 @@ func TestRulesUnitTest(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, nil, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) @@ -178,7 +178,7 @@ func TestRulesUnitTestRun(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, tt.args.run, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) diff --git a/config/config.go b/config/config.go index b832ac9a172..7fa03a14450 100644 --- a/config/config.go +++ b/config/config.go @@ -454,12 +454,19 @@ var ( OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0", } + // DefaultScrapeProtocols is the set of scrape protocols that will be proposed + // to scrape target, ordered by priority. DefaultScrapeProtocols = []ScrapeProtocol{ OpenMetricsText1_0_0, OpenMetricsText0_0_1, PrometheusText0_0_4, } - DefaultNativeHistogramScrapeProtocols = []ScrapeProtocol{ + + // DefaultProtoFirstScrapeProtocols is like DefaultScrapeProtocols, but it + // favors protobuf Prometheus exposition format. + // Used by default for certain feature-flags like + // "native-histograms" and "created-timestamp-zero-ingestion". + DefaultProtoFirstScrapeProtocols = []ScrapeProtocol{ PrometheusProto, OpenMetricsText1_0_0, OpenMetricsText0_0_1, @@ -603,9 +610,12 @@ type ScrapeConfig struct { // More than this label value length post metric-relabeling will cause the // scrape to fail. 0 means no limit. LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"` - // More than this many buckets in a native histogram will cause the scrape to - // fail. + // If there are more than this many buckets in a native histogram, + // buckets will be merged to stay within the limit. NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"` + // If the growth factor of one bucket to the next is smaller than this, + // buckets will be merged to increase the factor sufficiently. + NativeHistogramMinBucketFactor float64 `yaml:"native_histogram_min_bucket_factor,omitempty"` // Keep no more than this many dropped targets per job. // 0 means no limit. KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` @@ -1117,6 +1127,9 @@ type QueueConfig struct { MinBackoff model.Duration `yaml:"min_backoff,omitempty"` MaxBackoff model.Duration `yaml:"max_backoff,omitempty"` RetryOnRateLimit bool `yaml:"retry_on_http_429,omitempty"` + + // Samples older than the limit will be dropped. + SampleAgeLimit model.Duration `yaml:"sample_age_limit,omitempty"` } // MetadataConfig is the configuration for sending metadata to remote diff --git a/config/config_default_test.go b/config/config_default_test.go index f5333f4c883..26623590d96 100644 --- a/config/config_default_test.go +++ b/config/config_default_test.go @@ -12,7 +12,6 @@ // limitations under the License. //go:build !windows -// +build !windows package config diff --git a/config/config_test.go b/config/config_test.go index 5d753a0f73e..e614a44637e 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -568,6 +568,7 @@ var expectedConf = &Config{ ServiceDiscoveryConfigs: discovery.Configs{ &xds.KumaSDConfig{ Server: "http://kuma-control-plane.kuma-system.svc:5676", + ClientID: "main-prometheus", HTTPClientConfig: config.DefaultHTTPClientConfig, RefreshInterval: model.Duration(15 * time.Second), FetchTimeout: model.Duration(2 * time.Minute), diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index e034eff431c..b584301649e 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -221,6 +221,7 @@ scrape_configs: kuma_sd_configs: - server: http://kuma-control-plane.kuma-system.svc:5676 + client_id: main-prometheus - job_name: service-marathon marathon_sd_configs: diff --git a/config/testdata/roundtrip.good.yml b/config/testdata/roundtrip.good.yml index f2634d257a0..24ab7d25927 100644 --- a/config/testdata/roundtrip.good.yml +++ b/config/testdata/roundtrip.good.yml @@ -108,6 +108,7 @@ scrape_configs: kuma_sd_configs: - server: http://kuma-control-plane.kuma-system.svc:5676 + client_id: main-prometheus marathon_sd_configs: - servers: diff --git a/consoles/node-cpu.html b/consoles/node-cpu.html index d6c515d2dd0..284ad738f2b 100644 --- a/consoles/node-cpu.html +++ b/consoles/node-cpu.html @@ -47,7 +47,7 @@

CPU Usage