diff --git a/float/.gitrepo b/float/.gitrepo index 7d7f18b880bba9997e3ab1d91b050ffe65bd1ea9..3e1509f342368a5b11f2323da84d1df03fb0c6bf 100644 --- a/float/.gitrepo +++ b/float/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://git.autistici.org/ai3/float.git branch = master - commit = 61177d5be20b4f84efaa307e2de26b9bc225b091 - parent = ed7b87e3a8bae8689e21021c6247ad067a848360 + commit = 2cbbc5b2199d1a659e9ea177a7cbc5b59e8fbaef + parent = 94c22c4abe4a4277a48f0b71af23a70b173798b8 cmdver = 0.4.1 method = merge diff --git a/float/README.md b/float/README.md index 9b329d9a85fa41fc16f8a811cd17535b5ddc50b7..a5d5b5e4b6025a696a3fcc32618d0fad2b91f235 100644 --- a/float/README.md +++ b/float/README.md @@ -65,7 +65,7 @@ and in README files for individual Ansible roles: ### General Documentation * [Quick start guide](docs/quickstart.md) -* [Reference](docs/reference.md) +* [Reference](docs/reference.md) ([PDF](docs/reference.pdf)) * [Testing](docs/testing.md) # Requirements diff --git a/float/docs/Makefile b/float/docs/Makefile index e325c82b9bfea26dc379fc8bce9bd59f022b4af5..2fb8384361f454a2f5cf211476e38fb96b9d6bcf 100644 --- a/float/docs/Makefile +++ b/float/docs/Makefile @@ -2,7 +2,7 @@ DOTS = $(wildcard *.dot) SVGS = $(DOTS:%.dot=%.svg) PNGS = $(DOTS:%.dot=%.png) -all: $(SVGS) $(PNGS) +all: $(SVGS) $(PNGS) reference.pdf %.svg: %.dot dot -Tsvg -o$@ $< @@ -10,3 +10,6 @@ all: $(SVGS) $(PNGS) %.png: %.dot dot -Tpng -o$@ $< +%.pdf: %.md + awk '/^# Services/,/EOF/ {print}' $< \ + | pandoc -V 'title:Float Reference' --from=gfm --output=$@ --toc diff --git a/float/docs/reference.md b/float/docs/reference.md index 10151c062dfd97f36ee984b4e75a95dca0423129..7b723678af9ded198c15fc94cfa3db2608c761a7 100644 --- a/float/docs/reference.md +++ b/float/docs/reference.md @@ -405,12 +405,14 @@ myservice: - template: src: myservice.conf.j2 dest: /etc/myservice.conf + group: docker-myservice + mode: 0640 ``` *roles/myservice/templates/myservice.conf.j2* ```yaml -# just an example +# Just an example of an Ansible template, with no particular meaning. domain={{ domain }} ``` @@ -425,15 +427,65 @@ float can't automatically generate this association itself): ``` This takes advantage of the fact that float defines an Ansible group -for each service, which includes the hosts that the service instances -have been scheduled on. +for each service (with the same name as the service itself), which +includes the hosts that the service instances have been scheduled +on. **Note** that since Ansible 2.9, the group names will be +"normalized" according to the rules for Python identifiers, +i.e. dashes will be turned into underscores. + +### On the Ansible requirement + +Does the above mean you have to learn Ansible in order to use float? +Should you be concerned about investing effort into writing a +configuration for my service in yet another configuration management +system's language? The answer is *yes*, but to a very limited extent: + +* You do need knowledge of how to set up an Ansible environment: the + role of `ansible.cfg`, how to structure `group_vars` etc. Writing a + dedicated configuration push system for float was surely an option, + but we preferred relying on a popular existing ecosystem for this, + both for convenience of implementation and also to allow a migration + path of co-existence for legacy systems. To counter-balance, float + tries to keep its usage of Ansible as limited as possible, to allow + eventual replacement. + +* Most services will only need an extremely simple Ansible role to + generate the service configuration, normally a mix of *template* and + *copy* tasks, which are possibly the most basic functionality of any + configuration management system. This should guarantee a certain + *ease of portability* to other mechanisms, should one decide to + migrate away from float. Besides, it is a good sanity check: if your + service requires complicated setup steps, perhaps it might be + possible to move some of that complexity *inside* the service + containers. + +To emphasize portability, it might be wise to adhere to the following +rules when writing Ansible roles: + +* Try to use only *copy*, *file* and *template* tasks, rather than + complex Ansible modules; +* avoid using complex conditional logic or loops in your Ansible tasks +* keep the configuration "local" to the service: do not reference + other services except using the proper service discovery APIs (DNS), + do not try to look up configuration attributes for other services + (instead make those into global configuration variables); +* do not use facts from other hosts that need to be discovered (these + break if you are not using a fact cache when doing partial runs): + instead, define whatever host attributes you need, explicitly, in + the inventory; + +More generally, the integration with Ansible as the underlying +configuration management engine is the "escape hatch" that allows the +implemention of setups that are not explicitly modeled by float +itself. + # Infrastructure Part 1: Base Layer We can subdivide what is done by float in two separate sections: -operations and services done on each host, the so-called "base" layer -of infrastructure, and then the fundamental services that are part of -the "cluster-level" infrastructure (logging, monitoring, +operations and services affecting every host, the so-called "base" +layer of infrastructure, and then the fundamental services that are +part of the "cluster-level" infrastructure (logging, monitoring, authentication, etc): the latter are part of float but run on the base layer itself as proper services, with their own descriptions and Ansible roles to configure them. @@ -923,7 +975,7 @@ delegated to the external automation. ## Authentication and Identity -The fkiat infrastructure provides a full AAA solution that is used by +The float infrastructure provides a full AAA solution that is used by all the built-in services, and that can be easily integrated with your services (or at least that would be the intention). It aims to implement modern solutions, and support moderately complex scenarios, @@ -1184,11 +1236,8 @@ infrastructure: * it is possible to separate short-term and long-term metrics storage by using the *prometheus-lts* service to scrape the other Prometheus instances and retain metrics long term. The Thanos layer will again - transparently support this configuration. - -To enable long-term metrics storage, include -*services.prometheus-lts.yml* in your service definitions, and add the -corresponding *playbooks/prometheus-lts.yml* playbook to your own. + transparently support this configuration. See the *Scaling up the + monitoring infrastructure* section below for details. Monitoring dashboards are provided by Grafana. @@ -1234,6 +1283,42 @@ alerting less noisy): * `scope` should be one of *host* (for prober-based alerts), *instance* (for all other targets), or *global*. +### Scaling up the monitoring infrastructure + +Float upholds the philosophy that collecting lots and lots of metrics +is actually a good thing, because it enables post-facto diagnosis of +issues. However, even with relatively small numbers of services and +machines, the amount of timeseries data that needs to be stored will +grow very quickly. + +Float allows you to split the monitoring data collection into two +logical "parts" (which themselves can consist of multiple identical +instances for redundancy purposes), let's call them *environments* to +avoid overloading the term *instance*: + +* A *short-term* Prometheus environment that scrapes all the service + targets with high frequency, evaluates alerts, but has a short + retention time (hours / days, depending on storage + requirements). Storage requirements for this environment are + bounded, for a given set of services and targets. + +* A *long-term* Prometheus environment that scrapes data from the + short-term environment, with a lower frequency, and discarding + high-cardinality metrics for which we have aggregates. The storage + requirement grows much more slowly over time than the short-term + environment. Float calls this service *prometheus-lts* (long-term + storage). + +This effectively implements a two-tiered (high-resolution / +low-resolution) timeseries database, which is then reconciled +transparently when querying through the Thanos service layer. + +To enable long-term metrics storage, include +*services.prometheus-lts.yml* in your service definitions, and add the +corresponding *playbooks/prometheus-lts.yml* playbook to your own. + +You will also need to set *prometheus_tsdb_retention* and +*prometheus_lts_tsdb_retention* variables appropriately. ## Log Collection and Analysis @@ -1264,10 +1349,12 @@ added: ### Metric extraction -It is often useful to extract real-time metrics from logs, for -instance this is how we compute real-time HTTP statistics. Float runs -an instance of [mtail](https://github.com/google/mtail) on every host -to process the local logs and compute metrics based on them. +It is often useful to extract real-time metrics from logs, most often +when dealing with software that does not export its own metrics. An +example is NGINX, where logs are parsed in order to compute real-time +access metrics. Float runs an instance of +[mtail](https://github.com/google/mtail) on every host to process the +local logs and compute metrics based on them. Custom rules can be added simply by dropping mtail programs in */etc/mtail*. This would generally be done by the relevant @@ -1277,14 +1364,33 @@ service-specific Ansible role. Syslog logs received by the log-collector will be subject to further processing in order to extract metadata fields that will be stored and -indexed. - -The implementation uses -the -[mmnormalize](https://www.rsyslog.com/doc/v8-stable/configuration/modules/mmnormalize.html) rsyslog -module, which parses logs using -the [liblognorm](http://www.liblognorm.com/files/manual/index.html) -engine to extract metadata fields. +indexed. Metadata extracted from logs is useful for searching and +filtering, even though those cases are already well served by +full-text search (or *grep*), and most importantly for aggregation +purposes: these can be either used for visualizations (dashboards), or +for analytical queries, that would be difficult to answer using the +coarse view provided by monitoring metrics. + +Perhaps it's best to make an example to better illustrate the relation +between metadata-annotated logs and monitoring metrics, especially +log-derived ones, which are obviously related being derived from the +same source. Let's consider the canonical example of the HTTP access +logs of a website which is having problems: the monitoring system can +tell which fraction of the incoming requests is returning, say, an +error 500, while properly annotated logs can answer more detailed +queries such as "the list of top 10 URLs that have returned an error +500 in the last day". The extremely large cardinality of the URL field +(which is user-controlled) makes it too impractical to use for +monitoring purposes, but the monitoring metric is cheap to compute and +easy to alert on in real-time, while the metadata-annotated logs +provide us with the (detailed, but more expensive to compute) +analytical view. + +The implementation uses the +[mmnormalize](https://www.rsyslog.com/doc/v8-stable/configuration/modules/mmnormalize.html) +rsyslog module, which parses logs with the +[liblognorm](http://www.liblognorm.com/files/manual/index.html) engine +to extract metadata fields. Liblognorm rulebase files are a bit verbose but relatively simple to write. Rules can be manually tested using the *lognormalizer* utility, @@ -1354,7 +1460,12 @@ sane replication options. # Configuration Float is an Ansible plugin with its own configuration, that replaces -the native Ansible inventory configuration. +the native Ansible inventory configuration. You will still be running +Ansible (`ansible-playbook` or whatever frontend you prefer) in order +to apply your configuration to your production environment. Float only +provides its own roles and plugins, but it does not interfere with the +rest of the Ansible configuration: playbooks, host and group +variables, etc. which will have to be present for a functional setup. The toolkit configuration is split into two parts, the *service description metadata*, containing definitions of the known services, @@ -1363,7 +1474,8 @@ same information you would have in a normal Ansible inventory). A number of global Ansible variables are also required to customize the infrastructure for your application. -All files are YAML-encoded and should usually have a *.yml* extension. +All configuration files are YAML-encoded and should usually have a +*.yml* extension. Float is controlled by a top-level configuration file, which you should pass to the ansible command-line tool as the inventory with the @@ -1378,6 +1490,9 @@ credentials_dir: credentials/ plugin: float ``` +This file **must** exist and it must contain at the very least the +"plugin: float" directive. + The attributes supported are: `services_file` points at the location of the file containing the @@ -1695,6 +1810,9 @@ publicly exported (at least in the current implementation), which unfortunately means that the service itself shouldn't be running on *frontend* nodes. +`use_proxy_protocol`: When true, enable the HAProxy proxy protocol for +the service, to propagate the original client IP to the backends. + #### Other endpoints Other endpoints are used when the service runs their own reverse @@ -1806,8 +1924,10 @@ attempt to restore it on new servers: the idea is that for sharded datasets, the application layer is responsible for data management. This attribute is false by default. -`owner`: For filesystem paths, the user that will own the files upon -restore. +`owner`, `group`, `mode`: For filesystem-backed datasets, float will +create the associated directory if it does not exist; these parameters +specify ownership and permissions. These permissions will also be +reset upon restore. ### Volumes @@ -2051,6 +2171,16 @@ associated with the alerts. Prometheus instances (default 90d). Set it to a shorter value when enabling long-term storage mode. +`prometheus_lts_tsdb_retention` controls the time horizon of the +long-term Prometheus instances (default 1 year), when they are +enabled. + +`prometheus_scrape_interval` sets how often the primary Prometheus +instances should scrape their targets (default 10s). + +`prometheus_lts_scrape_interval` sets how often the long-term +Prometheus instances should scrape the primary ones (default 1m). + `prometheus_external_targets` allows adding additional targets to Prometheus beyond those that are described by the service metadata. It is a list of entries with *name* and *targets* attributes, where diff --git a/float/docs/reference.pdf b/float/docs/reference.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bd147fbe28efebcd9d71368151adc8725b3cc14d Binary files /dev/null and b/float/docs/reference.pdf differ diff --git a/float/roles/README.md b/float/roles/README.md index 412b10ae2863f981cd5374b050f0feee82f57246..21588be678b7f6956be529931fcbe0bf993e04d7 100644 --- a/float/roles/README.md +++ b/float/roles/README.md @@ -10,7 +10,7 @@ with user-defined roles. They are roughly grouped into sections: top of the *base* layer, i.e. within containers etc). * *util* for internal roles that are included by other roles, either to - expose common functionality to user roles (geoip, mariadb instances), - or to handle Ansible-related logic shared by multiple roles. + expose common functionality to user roles (geoip), or to handle + Ansible-related logic shared by multiple roles. diff --git a/float/roles/float-base-docker/files/containers.conf b/float/roles/float-base-docker/files/containers.conf new file mode 100644 index 0000000000000000000000000000000000000000..06c0f8c96a519958efd60ecf108e67cb68b70bed --- /dev/null +++ b/float/roles/float-base-docker/files/containers.conf @@ -0,0 +1,407 @@ +# The containers configuration file specifies all of the available configuration +# command-line options/flags for container engine tools like Podman & Buildah, +# but in a TOML format that can be easily modified and versioned. + +# Please refer to containers.conf(5) for details of all configuration options. +# Not all container engines implement all of the options. +# All of the options have hard coded defaults and these options will override +# the built in defaults. Users can then override these options via the command +# line. Container engines will read containers.conf files in up to three +# locations in the following order: +# 1. /usr/share/containers/containers.conf +# 2. /etc/containers/containers.conf +# 3. $HOME/.config/containers/containers.conf (Rootless containers ONLY) +# Items specified in the latter containers.conf, if they exist, override the +# previous containers.conf settings, or the default settings. + +[containers] + +# List of devices. Specified as +# "<device-on-host>:<device-on-container>:<permissions>", for example: +# "/dev/sdc:/dev/xvdc:rwm". +# If it is empty or commented out, only the default devices will be used +# +# devices = [] + +# List of volumes. Specified as +# "<directory-on-host>:<directory-in-container>:<options>", for example: +# "/db:/var/lib/db:ro". +# If it is empty or commented out, no volumes will be added +# +# volumes = [] + +# Used to change the name of the default AppArmor profile of container engine. +# +# apparmor_profile = "container-default" + +# List of annotation. Specified as +# "key=value" +# If it is empty or commented out, no annotations will be added +# +# annotations = [] + +# Default way to to create a cgroup namespace for the container +# Options are: +# `private` Create private Cgroup Namespace for the container. +# `host` Share host Cgroup Namespace with the container. +# +# cgroupns = "private" + +# Control container cgroup configuration +# Determines whether the container will create CGroups. +# Options are: +# `enabled` Enable cgroup support within container +# `disabled` Disable cgroup support, will inherit cgroups from parent +# `no-conmon` Container engine runs run without conmon +# +# cgroups = "enabled" + +# List of default capabilities for containers. If it is empty or commented out, +# the default capabilities defined in the container engine will be added. +# +# default_capabilities = [ +# "AUDIT_WRITE", +# "CHOWN", +# "DAC_OVERRIDE", +# "FOWNER", +# "FSETID", +# "KILL", +# "MKNOD", +# "NET_BIND_SERVICE", +# "NET_RAW", +# "SETGID", +# "SETPCAP", +# "SETUID", +# "SYS_CHROOT", +# ] + +# A list of sysctls to be set in containers by default, +# specified as "name=value", +# for example:"net.ipv4.ping_group_range = 0 1000". +# +# default_sysctls = [ +# "net.ipv4.ping_group_range=0 1000", +# ] + +# A list of ulimits to be set in containers by default, specified as +# "<ulimit name>=<soft limit>:<hard limit>", for example: +# "nofile=1024:2048" +# See setrlimit(2) for a list of resource names. +# Any limit not specified here will be inherited from the process launching the +# container engine. +# Ulimits has limits for non privileged container engines. +# +# default_ulimits = [ +# “nofile”=”1280:2560”, +# ] + +# List of default DNS options to be added to /etc/resolv.conf inside of the container. +# +# dns_options = [] + +# List of default DNS search domains to be added to /etc/resolv.conf inside of the container. +# +# dns_searches = [] + +# Set default DNS servers. +# This option can be used to override the DNS configuration passed to the +# container. The special value “none” can be specified to disable creation of +# /etc/resolv.conf in the container. +# The /etc/resolv.conf file in the image will be used without changes. +# +# dns_servers = [] + +# Environment variable list for the conmon process; used for passing necessary +# environment variables to conmon or the runtime. +# +# env = [ +# "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", +# ] + +# Pass all host environment variables into the container. +# +# env_host = false + +# Path to OCI hooks directories for automatically executed hooks. +# +# hooks_dir = [ +# “/usr/share/containers/oci/hooks.d”, +# ] + +# Default proxy environment variables passed into the container. +# The environment variables passed in include: +# http_proxy, https_proxy, ftp_proxy, no_proxy, and the upper case versions of +# these. This option is needed when host system uses a proxy but container +# should not use proxy. Proxy environment variables specified for the container +# in any other way will override the values passed from the host. +# +# http_proxy = true + +# Run an init inside the container that forwards signals and reaps processes. +# +# init = false + +# Container init binary, if init=true, this is the init binary to be used for containers. +# +# init_path = "/usr/libexec/podman/catatonit" + +# Default way to to create an IPC namespace (POSIX SysV IPC) for the container +# Options are: +# `private` Create private IPC Namespace for the container. +# `host` Share host IPC Namespace with the container. +# +# ipcns = "private" + +# Flag tells container engine to whether to use container separation using +# MAC(SELinux)labeling or not. +# Flag is ignored on label disabled systems. +# +# label = true + +# Logging driver for the container. Available options: k8s-file and journald. +# +# log_driver = "k8s-file" + +# Maximum size allowed for the container log file. Negative numbers indicate +# that no size limit is imposed. If positive, it must be >= 8192 to match or +# exceed conmon's read buffer. The file is truncated and re-opened so the +# limit is never exceeded. +# +log_size_max = 65536 + +# Default way to to create a Network namespace for the container +# Options are: +# `private` Create private Network Namespace for the container. +# `host` Share host Network Namespace with the container. +# `none` Containers do not use the network +# +# netns = "private" + +# Create /etc/hosts for the container. By default, container engine manage +# /etc/hosts, automatically adding the container's own IP address. +# +# no_hosts = false + +# Maximum number of processes allowed in a container. +# +# pids_limit = 2048 + +# Default way to to create a PID namespace for the container +# Options are: +# `private` Create private PID Namespace for the container. +# `host` Share host PID Namespace with the container. +# +# pidns = "private" + +# Path to the seccomp.json profile which is used as the default seccomp profile +# for the runtime. +# +# seccomp_profile = "/usr/share/containers/seccomp.json" + +# Size of /dev/shm. Specified as <number><unit>. +# Unit is optional, values: +# b (bytes), k (kilobytes), m (megabytes), or g (gigabytes). +# If the unit is omitted, the system uses bytes. +# +# shm_size = "65536k" + +# Default way to to create a UTS namespace for the container +# Options are: +# `private` Create private UTS Namespace for the container. +# `host` Share host UTS Namespace with the container. +# +# utsns = "private" + +# Default way to to create a User namespace for the container +# Options are: +# `auto` Create unique User Namespace for the container. +# `host` Share host User Namespace with the container. +# +# userns = "host" + +# Number of UIDs to allocate for the automatic container creation. +# UIDs are allocated from the “container” UIDs listed in +# /etc/subuid & /etc/subgid +# +# userns_size=65536 + +# The network table contains settings pertaining to the management of +# CNI plugins. + +[network] + +# Path to directory where CNI plugin binaries are located. +# +# cni_plugin_dirs = ["/usr/libexec/cni"] + +# Path to the directory where CNI configuration files are located. +# +# network_config_dir = "/etc/cni/net.d/" + +[engine] + +# Cgroup management implementation used for the runtime. +# Valid options “systemd” or “cgroupfs” +# +# cgroup_manager = "systemd" + +# Environment variables to pass into conmon +# +# conmon_env_vars = [ +# "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" +# ] + +# Paths to look for the conmon container manager binary +# +# conmon_path = [ +# "/usr/libexec/podman/conmon", +# "/usr/local/libexec/podman/conmon", +# "/usr/local/lib/podman/conmon", +# "/usr/bin/conmon", +# "/usr/sbin/conmon", +# "/usr/local/bin/conmon", +# "/usr/local/sbin/conmon" +# ] + +# Specify the keys sequence used to detach a container. +# Format is a single character [a-Z] or a comma separated sequence of +# `ctrl-<value>`, where `<value>` is one of: +# `a-z`, `@`, `^`, `[`, `\`, `]`, `^` or `_` +# +# detach_keys = "ctrl-p,ctrl-q" + +# Determines whether engine will reserve ports on the host when they are +# forwarded to containers. When enabled, when ports are forwarded to containers, +# ports are held open by as long as the container is running, ensuring that +# they cannot be reused by other programs on the host. However, this can cause +# significant memory usage if a container has many ports forwarded to it. +# Disabling this can save memory. +# +# enable_port_reservation = true + +# Selects which logging mechanism to use for container engine events. +# Valid values are `journald`, `file` and `none`. +# +# events_logger = "journald" + +# Default transport method for pulling and pushing for images +# +# image_default_transport = "docker://" + +# Default command to run the infra container +# +# infra_command = "/pause" + +# Infra (pause) container image name for pod infra containers. When running a +# pod, we start a `pause` process in a container to hold open the namespaces +# associated with the pod. This container does nothing other then sleep, +# reserving the pods resources for the lifetime of the pod. +# +# infra_image = "k8s.gcr.io/pause:3.2" + +# Specify the locking mechanism to use; valid values are "shm" and "file". +# Change the default only if you are sure of what you are doing, in general +# "file" is useful only on platforms where cgo is not available for using the +# faster "shm" lock type. You may need to run "podman system renumber" after +# you change the lock type. +# +# lock_type** = "shm" + +# Default engine namespace +# If engine is joined to a namespace, it will see only containers and pods +# that were created in the same namespace, and will create new containers and +# pods in that namespace. +# The default namespace is "", which corresponds to no namespace. When no +# namespace is set, all containers and pods are visible. +# +# namespace = "" + +# Whether to use chroot instead of pivot_root in the runtime +# +# no_pivot_root = false + +# Number of locks available for containers and pods. +# If this is changed, a lock renumber must be performed (e.g. with the +# 'podman system renumber' command). +# +# num_locks = 2048 + +# Whether to pull new image before running a container +# pull_policy = "missing" + +# Directory for persistent engine files (database, etc) +# By default, this will be configured relative to where the containers/storage +# stores containers +# Uncomment to change location from this default +# +# static_dir = "/var/lib/containers/storage/libpod" + +# Directory for temporary files. Must be tmpfs (wiped after reboot) +# +# tmp_dir = "/var/run/libpod" + +# Directory for libpod named volumes. +# By default, this will be configured relative to where containers/storage +# stores containers. +# Uncomment to change location from this default. +# +# volume_path = "/var/lib/containers/storage/volumes" + +# Default OCI runtime +# +# runtime = "runc" + +# List of the OCI runtimes that support --format=json. When json is supported +# engine will use it for reporting nicer errors. +# +# runtime_supports_json = ["crun", "runc", "kata"] + +# List of the OCI runtimes that supports running containers without cgroups. +# +# runtime_supports_nocgroups = ["crun"] + +# List of the OCI runtimes that supports running containers with KVM Separation. +# +# runtime_supports_kvm = ["kata"] + +# Paths to look for a valid OCI runtime (runc, runv, kata, etc) +[engine.runtimes] +runc = [ + "/usr/lib/cri-o-runc/sbin/runc", + "/usr/sbin/runc", + "/usr/bin/runc", + "/usr/local/bin/runc", + "/usr/local/sbin/runc", + "/sbin/runc", + "/bin/runc", +] + +# crun = [ +# "/usr/bin/crun", +# "/usr/sbin/crun", +# "/usr/local/bin/crun", +# "/usr/local/sbin/crun", +# "/sbin/crun", +# "/bin/crun", +# "/run/current-system/sw/bin/crun", +# ] + +# kata = [ +# "/usr/bin/kata-runtime", +# "/usr/sbin/kata-runtime", +# "/usr/local/bin/kata-runtime", +# "/usr/local/sbin/kata-runtime", +# "/sbin/kata-runtime", +# "/bin/kata-runtime", +# "/usr/bin/kata-qemu", +# "/usr/bin/kata-fc", +# ] + +# Number of seconds to wait for container to exit before sending kill signal. +#stop_timeout = 10 + +# The [engine.runtimes] table MUST be the last entry in this file. +# (Unless another table is added) +# TOML does not provide a way to end a table other than a further table being +# defined, so every key hereafter will be part of [runtimes] and not the main +# config. diff --git a/float/roles/float-base-docker/tasks/main.yml b/float/roles/float-base-docker/tasks/main.yml index d4493153b0be3f9c6cf536c0ac39eb6866ebca07..40df517c7112a2b63a040dadcf76a8b4b128613c 100644 --- a/float/roles/float-base-docker/tasks/main.yml +++ b/float/roles/float-base-docker/tasks/main.yml @@ -32,9 +32,10 @@ - name: Get list of running containers shell: "{{ container_runtime }} ps --format={% raw %}'{{.Names}}'{% endraw %}" - changed_when: False + changed_when: false check_mode: no register: docker_running_containers + ignore_errors: true - set_fact: enabled_container_tags: "{{ float_enabled_containers | map(attribute='tag') | list }}" @@ -53,7 +54,7 @@ state: absent with_items: - "{{ docker_running_containers.stdout_lines }}" - when: "item not in enabled_container_tags" + when: "docker_running_containers is succeeded and item not in enabled_container_tags" ignore_errors: true - name: Install docker cleanup script diff --git a/float/roles/float-base-docker/tasks/podman.yml b/float/roles/float-base-docker/tasks/podman.yml index 0eddc8b5b179142d7b5dcbbaed2aba223e2103d6..141dd089c8bc2af5cc450cb6d7ad90ea28a2d261 100644 --- a/float/roles/float-base-docker/tasks/podman.yml +++ b/float/roles/float-base-docker/tasks/podman.yml @@ -40,6 +40,11 @@ state: link force: true +- name: Install containers.conf + copy: + src: containers.conf + dest: "/etc/containers/containers.conf" + # TODO: remove this once the podman packaging issues are fixed. - name: Install a working seccomp.json copy: diff --git a/float/roles/float-base-docker/tasks/start.yml b/float/roles/float-base-docker/tasks/start.yml index a2f6019f0c96d459606a48ae5ecbd128a766ff32..5a2dffa93c770eac106421b1e39f6c535bec341b 100644 --- a/float/roles/float-base-docker/tasks/start.yml +++ b/float/roles/float-base-docker/tasks/start.yml @@ -26,13 +26,6 @@ append: true loop: "{{ float_enabled_containers }}" -# TODO: get rid of this once the existing environments have been updated! -- name: Remove obsolete container environment files - file: - path: "/etc/default/{{ item.service }}-{{ item.container.name }}" - state: absent - loop: "{{ float_enabled_containers }}" - - name: Create run scripts template: src: run.sh.j2 diff --git a/float/roles/float-base/files/mtail/kernel.mtail b/float/roles/float-base/files/mtail/kernel.mtail index 90bb867f0409d41726d9ce9d874db4ed69472e8d..4d60bf920518ef82122b418a4d46c82326194527 100644 --- a/float/roles/float-base/files/mtail/kernel.mtail +++ b/float/roles/float-base/files/mtail/kernel.mtail @@ -23,11 +23,11 @@ def syslog { kernel_ooms_total++ } - /CPU(?P<cpu>\d+): (?:Core|Package) temperature above threshold, cpu clock throttled/ { + /CPU(?P<cpu>\d+): \w+ temperature above threshold, cpu clock throttled/ { kernel_cpu_throttled[$cpu] = 1 } - /CPU(?P<cpu>\d+): (?:Core|Package) temperature\/speed normal/ { + /CPU(?P<cpu>\d+): \w+ temperature\/speed normal/ { kernel_cpu_throttled[$cpu] = 0 } } diff --git a/float/roles/float-base/files/node-exporter-scripts/podman.sh b/float/roles/float-base/files/node-exporter-scripts/podman.sh new file mode 100644 index 0000000000000000000000000000000000000000..4b12787524dcfb89387a8f0b792ef0e0ca4b8b86 --- /dev/null +++ b/float/roles/float-base/files/node-exporter-scripts/podman.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# Dump a map of container name -> image digest, to track +# container "versions". + +podman ps --format='{{.Names}} {{.ImageID}}' 2>/dev/null \ +| while read name image_id; do + digest=$(podman image inspect --format='{{.Digest}}' ${image_id} 2>/dev/null | cut -d: -f2 | cut -c1-7) + [ -z "$digest" ] && continue + echo "container_digest{service=\"docker-${name}\",digest=\"${digest}\"} 1" + done + +exit 0 diff --git a/float/roles/float-base/files/run-node-exporter-script.sh b/float/roles/float-base/files/run-node-exporter-script.sh index 79f9f5d1b3a9c068fd8cb62cf5165895e01e490f..bacc64563741481588f52bf12126712404c9e9c6 100644 --- a/float/roles/float-base/files/run-node-exporter-script.sh +++ b/float/roles/float-base/files/run-node-exporter-script.sh @@ -24,7 +24,7 @@ output_file="${output_dir}/${script_name}.prom" tmp_file="${output_file}.$$" trap "rm -f $tmp_file 2>/dev/null" EXIT INT TERM -runcron --no-metrics --splay 60 --name "node-exporter-$script_name" -- \ +runcron --no-syslog --no-metrics --splay 60 --name "node-exporter-$script_name" -- \ "$script_path" > "$tmp_file" if [ $? -gt 0 ]; then rm -f "$tmp_file" 2>/dev/null diff --git a/float/roles/float-base/tasks/backup_dataset.yml b/float/roles/float-base/tasks/backup_dataset.yml index 1311c801384e63a59ed2cb58ce25d1944230722d..10589bedf30adb81aecaab03e4b5789e1b72658b 100644 --- a/float/roles/float-base/tasks/backup_dataset.yml +++ b/float/roles/float-base/tasks/backup_dataset.yml @@ -9,14 +9,35 @@ dataset_name: "{{ item.0.name }}/{{ item.1.name }}" dataset_filename: "{{ item.0.name }}_{{ item.1.name }}" dataset_owner: "{{ item.1.get('owner', '') }}" + dataset_group: "{{ item.1.get('group', 'root') }}" + dataset_mode: "{{ item.1.get('mode', '0700') }}" dataset_path: "{{ item.1.get('path', '') }}" dataset_type: "{% if 'backup_command' in item.1 %}pipe{% else %}file{% endif %}" + dataset_is_present: "{{ (item.0.name in float_enabled_services) }}" dataset_should_backup: "{{ (item.0.name in float_enabled_services) and ((not item.1.get('on_master_only', False)) or (item.0.get('master_host') == inventory_hostname)) }}" - set_fact: dataset_should_restore: "{{ dataset_should_backup and not item.1.get('sharded', False) }}" -- name: Set up configuration for dataset {{ dataset.name }} (source) +- name: "Create path for dataset {{ dataset_name }}" + file: + path: "{{ dataset_path }}" + mode: "{{ dataset_mode | default('0700') }}" + state: directory + when: "(dataset_is_present) and (dataset_path) and (dataset_mode)" + +# Try to set permissions. Ignore errors because it is possible that +# the user does not (yet) exist. +- name: "Set permissions for dataset directory of {{ dataset_name }}" + file: + path: "{{ dataset_path }}" + state: directory + owner: "{{ dataset_owner }}" + group: "{{ dataset_group | default('root') }}" + when: "(dataset_is_present) and (dataset_path) and (dataset_owner)" + ignore_errors: true + +- name: Set up configuration for dataset {{ dataset_name }} (source) template: src: "tabacco/sources/source.yml.j2" dest: "/etc/tabacco/sources/{{ dataset_filename }}.yml" @@ -25,7 +46,7 @@ notify: - reload backup agent -- name: Set up configuration for dataset {{ dataset.name }} (handler) +- name: Set up configuration for dataset {{ dataset_name }} (handler) template: src: "tabacco/handlers/{{ dataset_type }}.yml.j2" dest: "/etc/tabacco/handlers/{{ dataset_filename }}.yml" @@ -34,7 +55,7 @@ notify: - reload backup agent -- name: Clear configuration for dataset {{ dataset.name }} +- name: Clear configuration for dataset {{ dataset_name }} file: path: "/etc/tabacco/{{ diritem }}/{{ dataset_filename }}.yml" state: absent diff --git a/float/roles/float-base/templates/sources.list.j2 b/float/roles/float-base/templates/sources.list.j2 index 05120c7d9f3c24aa7f882d13a874bce6387bfa08..ee880120c9f26a53ab5389ef325dba8eeb7d64f6 100644 --- a/float/roles/float-base/templates/sources.list.j2 +++ b/float/roles/float-base/templates/sources.list.j2 @@ -1,4 +1,6 @@ +{% if apt_sources_list_override is defined %}{{ apt_sources_list_override }}{% else %} deb http://deb.debian.org/debian {{ float_debian_dist }} main contrib non-free deb http://deb.debian.org/debian {{ float_debian_dist }}-backports main deb http://deb.debian.org/debian {{ float_debian_dist }}-updates main contrib non-free deb http://security.debian.org/ {{ float_debian_dist }}/updates main contrib non-free +{% endif %} diff --git a/float/roles/float-base/templates/tabacco/restore-script.j2 b/float/roles/float-base/templates/tabacco/restore-script.j2 index 5539817dbd552dbdcd7e6db96c6dc8a62802814f..3d05f23aa3970c2466edc2cc3404b3869005ab65 100644 --- a/float/roles/float-base/templates/tabacco/restore-script.j2 +++ b/float/roles/float-base/templates/tabacco/restore-script.j2 @@ -34,8 +34,8 @@ else fi fi -{% if dataset_owner %} -chown -R "{{ dataset_owner }}":"{{ dataset_owner }}" "{{ dataset_path }}" +{% if dataset_path and dataset_owner %} +chown -R "{{ dataset_owner }}":"{{ dataset_group }}" "{{ dataset_path }}" {% endif %} echo "marking restore successful" >&2 diff --git a/float/roles/float-infra-dns/tasks/main.yml b/float/roles/float-infra-dns/tasks/main.yml index 07e07bcae3be6c89e631657a6a5dafd937d3b429..fc65958d230c1a218fbb2e470994b414004c3dab 100644 --- a/float/roles/float-infra-dns/tasks/main.yml +++ b/float/roles/float-infra-dns/tasks/main.yml @@ -9,6 +9,7 @@ - bind9 - dnsutils - python-zonetool + - prometheus-bind-exporter - name: Install bind9 configuration (dirs) file: diff --git a/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 b/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 index 703e4b289cfbf5d8ca94075377780b0f47aea41a..61db7a64de81818b538b73954c5fbda02120f5df 100644 --- a/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 +++ b/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 @@ -34,7 +34,7 @@ backend be_{{ service_name }}_{{ ep.name }}_{{ port }} balance leastconn option independant-streams {% for s in groups[service_name]|sort %} - server task{{ loop.index -1 }} {{ s }}.{{ service_name }}.{{ domain }}:{{ port }} check fall 3 id {{ loop.index + 999 }} inter 5000 rise 3 slowstart 60000 weight 50 + server task{{ loop.index -1 }} {{ s }}.{{ service_name }}.{{ domain }}:{{ port }} check fall 3 id {{ loop.index + 999 }} inter 5000 rise 3 slowstart 60000 weight 50{% if ep.get('use_proxy_protocol') %} send-proxy-v2{% endif %} {% endfor %} {% endfor %} # ep.ports @@ -50,7 +50,7 @@ backend be_{{ service_name }}_{{ ep.name }}_{{ ep.port }} balance leastconn option independant-streams {% for s in groups[service_name]|sort %} - server task{{ loop.index -1 }} {{ s }}.{{ service_name }}.{{ domain }}:{{ ep.port }} check fall 3 id {{ loop.index + 999 }} inter 5000 rise 3 slowstart 60000 weight 50 + server task{{ loop.index -1 }} {{ s }}.{{ service_name }}.{{ domain }}:{{ ep.port }} check fall 3 id {{ loop.index + 999 }} inter 5000 rise 3 slowstart 60000 weight 50{% if ep.get('use_proxy_protocol') %} send-proxy-v2{% endif %} {% endfor %} {% endif %} # ep.get('ports') diff --git a/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf b/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf index 89a6a86b57aff3aab950b3680d3e60b471615759..68bbc39c60e881ff449e51c2939496eacca6f1ec 100644 --- a/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf +++ b/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf @@ -31,6 +31,3 @@ proxy_cache_min_uses 2; # Show our own error pages, not the remote ones. proxy_intercept_errors on; -# Add a X-Cache-Status header. -add_header X-Cache-Status $upstream_cache_status; - diff --git a/float/roles/float-infra-nginx/templates/config/snippets/site-common.conf b/float/roles/float-infra-nginx/templates/config/snippets/site-common.conf index 071eb7efb7477e9de5461810a4d3d6b992c2a099..9a50d8cffd7b068aee95508b036fc916f164e800 100644 --- a/float/roles/float-infra-nginx/templates/config/snippets/site-common.conf +++ b/float/roles/float-infra-nginx/templates/config/snippets/site-common.conf @@ -23,6 +23,9 @@ error_page 451 /__errors/451.html; limit_req zone=perip burst={{ nginx_limit_perip_burst }}; limit_req zone=perserver burst={{ nginx_limit_perserver_burst }}; +# Add a X-Cache-Status header. +add_header X-Cache-Status $upstream_cache_status; + # Enable HSTS. add_header Strict-Transport-Security "max-age=31556926" always; diff --git a/float/roles/float-infra-prometheus-lts/defaults/main.yml b/float/roles/float-infra-prometheus-lts/defaults/main.yml index e2c1c60632659edb7c59cd271928eac965f8a6fc..6853fa4950e47a9f3f8256cd798f6a87f1bf8406 100644 --- a/float/roles/float-infra-prometheus-lts/defaults/main.yml +++ b/float/roles/float-infra-prometheus-lts/defaults/main.yml @@ -1,4 +1,4 @@ -prometheus_lts_scrape_interval: '5m' +prometheus_lts_scrape_interval: "1m" prometheus_lts_drop_metrics: - node_systemd_unit_state - nginx_http_requests_ms diff --git a/float/roles/float-infra-prometheus/defaults/main.yml b/float/roles/float-infra-prometheus/defaults/main.yml index ee9043219bcdbbc5f1dfd33b597191c360313564..e93a89c0964f2306a46f3f1152843e7eab5f22d6 100644 --- a/float/roles/float-infra-prometheus/defaults/main.yml +++ b/float/roles/float-infra-prometheus/defaults/main.yml @@ -21,3 +21,7 @@ alert_playbook_url: "https://playbooks" prometheus_custom_blackbox_probes: {} thanos_query_frontend_cache_size: '50MB' + +# Scrape interval for the primary Prometheus instances. +prometheus_scrape_interval: "10s" + diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/apache.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/apache.json index 015f6bb219bddaf5e4f7632589c906df0d31204b..c4da0f1d01af1197b480b9fd422fc2bd33b2b014 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/apache.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/apache.json @@ -273,7 +273,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(apache_sent_kilobytes_total{instance=~\"^$host:$port\"}[5m])", + "expr": "rate(apache_sent_kilobytes_total{instance=~\"^$host:$port\"}[$__rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -371,7 +371,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(apache_accesses_total{instance=~\"^$host:$port\"}[5m])", + "expr": "rate(apache_accesses_total{instance=~\"^$host:$port\"}[$__rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -847,4 +847,4 @@ "title": "Apache", "uid": "a_mnQfrik", "version": 4 -} \ No newline at end of file +} diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json new file mode 100644 index 0000000000000000000000000000000000000000..b3bf2e599537f52330996d5d6ae3bd36840df23b --- /dev/null +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json @@ -0,0 +1,1825 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Bind9 DNS Service Statistics.", + "editable": true, + "gnetId": 12309, + "graphTooltip": 0, + "id": 27, + "iteration": 1614356044266, + "links": [ + { + "icon": "external link", + "tags": [], + "title": "Dashboard Source", + "tooltip": "", + "type": "link", + "url": "https://github.com/pecastro/grafana-dashboards/blob/master/prometheus/bind9-exporter-dns.json" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 19, + "panels": [], + "repeat": null, + "title": "System", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 1 + }, + "height": "150", + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "s ago", + "postfixFontSize": "80%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(node_time_seconds{instance=~\"$node:.*\"}) - max(bind_boot_time_seconds{instance=~\"$node:.*\"}) ", + "interval": "5m", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 600, + "target": "" + } + ], + "thresholds": "", + "title": "Restarted", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 1 + }, + "height": "150px", + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "s ago", + "postfixFontSize": "80%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(node_time_seconds{instance=~\"$node:.*\"}) - max(bind_config_time_seconds{instance=~\"$node:.*\"})", + "interval": "5m", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 600, + "target": "" + } + ], + "thresholds": "", + "title": "Reconfigured", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 3, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(process_cpu_seconds_total{instance=~\"$node:.*\", job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Named CPU Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Max File Descriptors", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_max_fds{instance=\"$node:$port\",job=\"$job\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Max", + "refId": "A", + "step": 10, + "target": "" + }, + { + "expr": "process_open_fds{instance=\"$node:$port\",job=\"$job\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Open", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Descriptors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 32, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Resident": "#890F02", + "Virtual": "#0A437C", + "Virtual Memory": "#0A437C" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 5 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 3, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_virtual_memory_bytes{instance=\"$node:$port\",job=\"$job\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Virtual", + "refId": "A", + "step": 10, + "target": "" + }, + { + "expr": "process_resident_memory_bytes{instance=\"$node:$port\",job=\"$job\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Resident", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(bind_query_duplicates_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "intervalFactor": 2, + "legendFormat": "Duplicates", + "refId": "A", + "step": 4, + "target": "" + }, + { + "expr": "increase(bind_query_errors_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "intervalFactor": 2, + "legendFormat": "{{ error }}", + "refId": "B", + "step": 4, + "target": "" + }, + { + "expr": "increase(bind_query_recursions_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "intervalFactor": 2, + "legendFormat": "Recursions", + "refId": "C", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 21, + "panels": [], + "repeat": null, + "title": "Incoming", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + {} + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_incoming_queries_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ type }}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Incoming Queries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": -1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 20 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_incoming_requests_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ opcode }}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Incoming Request Opcodes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_responses_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ result }}", + "refId": "A", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Response Results", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 23, + "panels": [], + "repeat": null, + "title": "Resolver", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_resolver_response_errors_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / {{ error }}", + "refId": "A", + "step": 4, + "target": "" + }, + { + "expr": "irate(bind_resolver_response_lame_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / LAME", + "refId": "B", + "step": 4, + "target": "" + }, + { + "expr": "irate(bind_resolver_response_mismatch_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / MISMATCH", + "refId": "C", + "step": 4, + "target": "" + }, + { + "expr": "irate(bind_resolver_response_truncated_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / TRUNCATED", + "refId": "D", + "step": 4, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resolver Response Errors", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_resolver_queries_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / {{ type }}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resolver Queries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 42 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_resolver_query_errors_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / {{ error }}", + "refId": "A", + "step": 10, + "target": "" + }, + { + "expr": "irate(bind_resolver_query_edns0_errors_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / EDNS0", + "refId": "B", + "step": 10, + "target": "" + }, + { + "expr": "irate(bind_resolver_query_retries_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / Retry", + "refId": "C", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query Errors", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 42 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_resolver_query_duration_seconds_bucket{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / {{ le }}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query By Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "bind_resolver_cache_rrsets{instance=\"$node:$port\",job=\"$job\"}", + "intervalFactor": 2, + "legendFormat": "{{ view }} / {{ type }}", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resolver Cache RR Sets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(bind_resolver_dnssec_validation_errors_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / ValErr", + "refId": "A", + "step": 10, + "target": "" + }, + { + "expr": "irate(bind_resolver_dnssec_validation_success_total{instance=\"$node:$port\",job=\"$job\"}[120s])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ view }} / {{ result }}", + "refId": "B", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "DNSSEC Validation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "bind", + "dns", + "bind-exporter", + "prometheus" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "localhost", + "value": "localhost" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "frontend", + "value": "frontend" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(bind_up, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(bind_up, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "devianza.frontend.investici.org", + "value": "devianza.frontend.investici.org" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(bind_up, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Host:", + "multi": false, + "name": "node", + "options": [], + "query": { + "query": "label_values(bind_up, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/([^:]+):.*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "9119", + "value": "9119" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(bind_up{instance=~\"$node:(.*)\"}, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Port", + "multi": false, + "name": "port", + "options": [], + "query": { + "query": "label_values(bind_up{instance=~\"$node:(.*)\"}, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/[^:]+:(.*)/", + "skipUrlSync": false, + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "DNS", + "uid": "XTqyUORMz", + "version": 1 +} \ No newline at end of file diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/elasticsearch.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/elasticsearch.json index 62b860e52fe049caec94d6a00926b843e02ec52b..c89c34b94bb52dbb3cb5ac92bc3f0d242f3b9451 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/elasticsearch.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/elasticsearch.json @@ -1163,7 +1163,7 @@ } ], "dsType": "elasticsearch", - "expr": "avg_over_time(elasticsearch_process_cpu_percent{cluster=~\"$cluster\", name=~\"$node\"}[5m])", + "expr": "avg_over_time(elasticsearch_process_cpu_percent{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -1439,7 +1439,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg_over_time(elasticsearch_jvm_memory_used_bytes{area=\"heap\", cluster=~\"$cluster\", name=~\"$node\"}[1m]) / elasticsearch_jvm_memory_max_bytes{area=\"heap\", cluster=~\"$cluster\", name=~\"$node\"}", + "expr": "avg_over_time(elasticsearch_jvm_memory_used_bytes{area=\"heap\", cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval]) / elasticsearch_jvm_memory_max_bytes{area=\"heap\", cluster=~\"$cluster\", name=~\"$node\"}", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -1543,7 +1543,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__interval])", + "expr": "rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -1630,7 +1630,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__interval])", + "expr": "rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -1720,7 +1720,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__interval])) by (name) / sum(rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__interval])) by (name)\nor\nsum(irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[5m])) by (name) / sum(irate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[5m])) by (name)", + "expr": "sum(rate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__rate_interval])) by (name) / sum(rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__rate_interval])) by (name)\nor\nsum(irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__rate_interval])) by (name) / sum(irate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"young\"}[$__rate_interval])) by (name)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -1811,7 +1811,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__interval])) by (name) / sum(rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__interval])) by (name)\nor\nsum(irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[5m])) by (name) / sum(irate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[5m])) by (name)", + "expr": "sum(rate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__rate_interval])) by (name) / sum(rate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__rate_interval])) by (name)\nor\nsum(irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__rate_interval])) by (name) / sum(irate(elasticsearch_jvm_gc_collection_seconds_count{cluster=~\"$cluster\", name=~\"$node\", gc=\"old\"}[$__rate_interval])) by (name)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -1918,7 +1918,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_thread_pool_completed_count{cluster=~\"$cluster\", type!=\"management\", name=~\"$node\"}[$__interval])) by (type) or sum(irate(elasticsearch_thread_pool_completed_count{cluster=~\"$cluster\", type!=\"management\", name=~\"$node\"}[5m])) by (type)", + "expr": "sum(rate(elasticsearch_thread_pool_completed_count{cluster=~\"$cluster\", type!=\"management\", name=~\"$node\"}[$__rate_interval])) by (type) or sum(irate(elasticsearch_thread_pool_completed_count{cluster=~\"$cluster\", type!=\"management\", name=~\"$node\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -2188,7 +2188,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_thread_pool_completed_count{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])) by (type)", + "expr": "sum(rate(elasticsearch_thread_pool_completed_count{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ type }}", @@ -2279,7 +2279,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_thread_pool_rejected_count{cluster=~\"$cluster\", type!=\"management\", name=~\"$node\"}[$__interval])) by (type)", + "expr": "sum(rate(elasticsearch_thread_pool_rejected_count{cluster=~\"$cluster\", type!=\"management\", name=~\"$node\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -2385,7 +2385,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_transport_rx_packets_total{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])", + "expr": "rate(elasticsearch_transport_rx_packets_total{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} RX", @@ -2393,7 +2393,7 @@ "step": 240 }, { - "expr": "rate(elasticsearch_transport_tx_packets_total{cluster=~\"$cluster\", name=~\"$node\"}[$__interval]) * -1", + "expr": "rate(elasticsearch_transport_tx_packets_total{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval]) * -1", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} TX", @@ -2481,7 +2481,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_transport_rx_size_bytes_total{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])", + "expr": "rate(elasticsearch_transport_rx_size_bytes_total{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -2489,7 +2489,7 @@ "refId": "A" }, { - "expr": "rate(elasticsearch_transport_tx_size_bytes_total{cluster=~\"$cluster\", name=~\"$node\"}[$__interval]) * -1 ", + "expr": "rate(elasticsearch_transport_tx_size_bytes_total{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval]) * -1 ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} TX", @@ -2588,7 +2588,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_search_query_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) / sum(rate(elasticsearch_indices_search_query_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval]))", + "expr": "sum(rate(elasticsearch_indices_search_query_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(rate(elasticsearch_indices_search_query_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))", "format": "time_series", "hide": false, "interval": "", @@ -2597,7 +2597,7 @@ "refId": "A" }, { - "expr": "sum(rate(elasticsearch_indices_search_fetch_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) / sum(rate(elasticsearch_indices_search_fetch_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval]))", + "expr": "sum(rate(elasticsearch_indices_search_fetch_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(rate(elasticsearch_indices_search_fetch_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -2687,7 +2687,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_get_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name) / sum(rate(elasticsearch_indices_get_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name) \nor\nsum(irate(elasticsearch_indices_get_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name) / sum(irate(elasticsearch_indices_get_total{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name) ", + "expr": "sum(rate(elasticsearch_indices_get_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(rate(elasticsearch_indices_get_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) \nor\nsum(irate(elasticsearch_indices_get_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(irate(elasticsearch_indices_get_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -2775,14 +2775,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) / sum(rate(elasticsearch_indices_indexing_index_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval]))\nor\nsum(irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[5m])) / sum(irate(elasticsearch_indices_indexing_index_total{cluster=\"$cluster\", name=~\"$node\"}[5m]))", + "expr": "sum(rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(rate(elasticsearch_indices_indexing_index_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))\nor\nsum(irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(irate(elasticsearch_indices_indexing_index_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "index", "refId": "A" }, { - "expr": "sum(rate(elasticsearch_indices_indexing_delete_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) / sum(rate(elasticsearch_indices_indexing_delete_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval]))\nor\nsum(irate(elasticsearch_indices_indexing_delete_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[5m])) / sum(irate(elasticsearch_indices_indexing_delete_total{cluster=\"$cluster\", name=~\"$node\"}[5m]))", + "expr": "sum(rate(elasticsearch_indices_indexing_delete_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(rate(elasticsearch_indices_indexing_delete_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))\nor\nsum(irate(elasticsearch_indices_indexing_delete_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(irate(elasticsearch_indices_indexing_delete_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "delete", @@ -2876,14 +2876,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_merges_total_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name) / sum(rate(elasticsearch_indices_merges_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name)\nor\nsum(irate(elasticsearch_indices_merges_total_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name) / sum(irate(elasticsearch_indices_merges_total\t{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name)", + "expr": "sum(rate(elasticsearch_indices_merges_total_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(rate(elasticsearch_indices_merges_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)\nor\nsum(irate(elasticsearch_indices_merges_total_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(irate(elasticsearch_indices_merges_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} time", "refId": "A" }, { - "expr": "sum(rate(elasticsearch_indices_merges_total_size_bytes_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name) / sum(rate(elasticsearch_indices_merges_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name)\nor\nsum(irate(elasticsearch_indices_merges_total_size_bytes_total{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name) / sum(irate(elasticsearch_indices_merges_total\t{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name)", + "expr": "sum(rate(elasticsearch_indices_merges_total_size_bytes_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(rate(elasticsearch_indices_merges_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)\nor\nsum(irate(elasticsearch_indices_merges_total_size_bytes_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(irate(elasticsearch_indices_merges_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} size", @@ -2968,7 +2968,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_flush_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) / sum(rate(elasticsearch_indices_flush_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__interval]))\nor\nsum(irate(elasticsearch_indices_flush_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[5m])) / sum(irate(elasticsearch_indices_flush_total\t{cluster=\"$cluster\", name=~\"$node\"}[5m]))", + "expr": "sum(rate(elasticsearch_indices_flush_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(rate(elasticsearch_indices_flush_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))\nor\nsum(irate(elasticsearch_indices_flush_time_seconds{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) / sum(irate(elasticsearch_indices_flush_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "flush", @@ -3056,7 +3056,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_refresh_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name) / sum(rate(elasticsearch_indices_refresh_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__interval])) by (name)\nor\nsum(irate(elasticsearch_indices_refresh_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name) / sum(irate(elasticsearch_indices_refresh_total\t{cluster=\"$cluster\", name=~\"$node\"}[5m])) by (name)", + "expr": "sum(rate(elasticsearch_indices_refresh_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(rate(elasticsearch_indices_refresh_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)\nor\nsum(irate(elasticsearch_indices_refresh_time_seconds_total{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name) / sum(irate(elasticsearch_indices_refresh_total\t{cluster=\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "refresh", @@ -3161,7 +3161,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_fielddata_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])) by (name)", + "expr": "sum(rate(elasticsearch_indices_fielddata_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} evictions", @@ -3355,7 +3355,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_query_cache_count{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])) by (name, cache)", + "expr": "sum(rate(elasticsearch_indices_query_cache_count{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name, cache)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} {{ cache }}", @@ -3443,7 +3443,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_indices_query_cache_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])\nor\nirate(elasticsearch_indices_query_cache_evictions{cluster=~\"$cluster\", name=~\"$node\"}[5m])", + "expr": "rate(elasticsearch_indices_query_cache_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])\nor\nirate(elasticsearch_indices_query_cache_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -3721,7 +3721,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_request_cache_count{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])) by (name, cache)", + "expr": "sum(rate(elasticsearch_indices_request_cache_count{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name, cache)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} {{ cache }}", @@ -3809,7 +3809,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_indices_request_cache_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])) by (name)", + "expr": "sum(rate(elasticsearch_indices_request_cache_evictions{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -4202,7 +4202,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_indices_store_throttle_time_seconds_total{cluster=~\"$cluster\"}[$__interval])", + "expr": "rate(elasticsearch_indices_store_throttle_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -4681,7 +4681,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_indices_translog_operations{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])", + "expr": "rate(elasticsearch_indices_translog_operations{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -4769,7 +4769,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(elasticsearch_indices_translog_size_in_bytes{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])", + "expr": "rate(elasticsearch_indices_translog_size_in_bytes{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -4959,7 +4959,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_breakers_tripped{cluster=~\"$cluster\", name=~\"$node\"}[$__interval])) by (name, breaker)", + "expr": "sum(rate(elasticsearch_breakers_tripped{cluster=~\"$cluster\", name=~\"$node\"}[$__rate_interval])) by (name, breaker)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }} {{ breaker }}", diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/go.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/go.json index f8f3be1e99aeecc0f1f651d226aba57e8c00d363..3aa32e679275b356f0432690ffb69532c63d3818 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/go.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/go.json @@ -204,7 +204,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(process_resident_memory_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "rate(process_resident_memory_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}@{{host}} - resident", @@ -213,7 +213,7 @@ "step": 4 }, { - "expr": "deriv(process_virtual_memory_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "deriv(process_virtual_memory_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}@{{host}} - virtual", @@ -435,7 +435,7 @@ "steppedLine": false, "targets": [ { - "expr": "deriv(go_memstats_alloc_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "deriv(go_memstats_alloc_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}@{{host}} - bytes allocated", @@ -444,7 +444,7 @@ "step": 4 }, { - "expr": "rate(go_memstats_alloc_bytes_total{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "rate(go_memstats_alloc_bytes_total{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}@{{host}} - alloc rate", @@ -453,7 +453,7 @@ "step": 4 }, { - "expr": "deriv(go_memstats_stack_inuse_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "deriv(go_memstats_stack_inuse_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}@{{host}} - stack inuse", @@ -462,7 +462,7 @@ "step": 4 }, { - "expr": "deriv(go_memstats_heap_inuse_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "deriv(go_memstats_heap_inuse_bytes{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -647,7 +647,7 @@ "steppedLine": false, "targets": [ { - "expr": "deriv(process_open_fds{job=~\"^($job)$\",host=~\"^($host)$\"}[$interval])", + "expr": "deriv(process_open_fds{job=~\"^($job)$\",host=~\"^($host)$\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{job}}@{{host}}", @@ -937,7 +937,7 @@ "value": "5m" }, "datasource": null, - "hide": 0, + "hide": 2, "includeAll": false, "label": "", "multi": false, diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json index 00ccdce7b53d1b6459055dff4d39773a930ee92b..f0dd62489b9bd47d78354db731de26f5bc72e2a2 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json @@ -85,7 +85,7 @@ "steppedLine": false, "targets": [ { - "expr": "100 - (avg by (cpu) (irate(node_cpu_seconds_total{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)", + "expr": "100 - (avg by (cpu) (irate(node_cpu_seconds_total{mode=\"idle\", host=~\"$server\"}[$__rate_interval])) * 100)", "format": "time_series", "hide": true, "intervalFactor": 3, @@ -94,7 +94,7 @@ "step": 200 }, { - "expr": "sum by (mode) (irate(node_cpu_seconds_total{mode!=\"idle\",instance=~\"$server\"}[5m])) / scalar(count(node_cpu_seconds_total{mode=\"idle\",instance=~\"$server\"}))", + "expr": "sum by (mode) (irate(node_cpu_seconds_total{mode!=\"idle\",host=~\"$server\"}[$__rate_interval])) / scalar(count(node_cpu_seconds_total{mode=\"idle\",host=~\"$server\"}))", "format": "time_series", "hide": false, "intervalFactor": 3, @@ -203,7 +203,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_load1{instance=~\"$server\"}", + "expr": "node_load1{host=~\"$server\"}", "format": "time_series", "intervalFactor": 4, "legendFormat": "load 1m", @@ -212,7 +212,7 @@ "target": "" }, { - "expr": "node_load5{instance=~\"$server\"}", + "expr": "node_load5{host=~\"$server\"}", "format": "time_series", "intervalFactor": 4, "legendFormat": "load 5m", @@ -221,7 +221,7 @@ "target": "" }, { - "expr": "node_load15{instance=~\"$server\"}", + "expr": "node_load15{host=~\"$server\"}", "format": "time_series", "intervalFactor": 4, "legendFormat": "load 15m", @@ -230,7 +230,7 @@ "target": "" }, { - "expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",instance=~\"$server\"}) by (cpu))", + "expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",host=~\"$server\"}) by (cpu))", "format": "time_series", "intervalFactor": 4, "legendFormat": "CPUs", @@ -239,7 +239,7 @@ "target": "" }, { - "expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",instance=~\"$server\"}) by (cpu))/2", + "expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",host=~\"$server\"}) by (cpu))/2", "format": "time_series", "hide": true, "intervalFactor": 4, @@ -348,7 +348,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_memory_MemTotal_bytes{instance=~\"$server\"}", + "expr": "node_memory_MemTotal_bytes{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Total", @@ -358,7 +358,7 @@ "target": "" }, { - "expr": "node_memory_Cached_bytes{instance=~\"$server\"}", + "expr": "node_memory_Cached_bytes{host=~\"$server\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -369,7 +369,7 @@ "target": "" }, { - "expr": "node_memory_MemTotal_bytes{instance=~\"$server\"} - node_memory_Writeback_bytes{instance=~\"$server\"} - node_memory_Cached_bytes{instance=~\"$server\"} - node_memory_Buffers_bytes{instance=~\"$server\"} - node_memory_MemFree_bytes{instance=~\"$server\"}", + "expr": "node_memory_MemTotal_bytes{host=~\"$server\"} - node_memory_Writeback_bytes{host=~\"$server\"} - node_memory_Cached_bytes{host=~\"$server\"} - node_memory_Buffers_bytes{host=~\"$server\"} - node_memory_MemFree_bytes{host=~\"$server\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -380,7 +380,7 @@ "target": "" }, { - "expr": "node_memory_MemFree_bytes{instance=~\"$server\"}", + "expr": "node_memory_MemFree_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -491,7 +491,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_memory_MemTotal_bytes{instance=~\"$server\"}", + "expr": "node_memory_MemTotal_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -502,7 +502,7 @@ "target": "" }, { - "expr": "1 - (node_memory_SwapFree_bytes{instance=~\"$server\"} / node_memory_SwapTotal_bytes{instance=~\"$server\"})", + "expr": "1 - (node_memory_SwapFree_bytes{host=~\"$server\"} / node_memory_SwapTotal_bytes{host=~\"$server\"})", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -513,7 +513,7 @@ "target": "" }, { - "expr": "node_memory_Dirty_bytes{instance=~\"$server\"}", + "expr": "node_memory_Dirty_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -524,7 +524,7 @@ "target": "" }, { - "expr": "node_memory_WritebackTmp_bytes{instance=~\"$server\"}", + "expr": "node_memory_WritebackTmp_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -535,7 +535,7 @@ "target": "" }, { - "expr": "rate(node_vmstat_pswpin{instance=~\"$server\"}[5m])", + "expr": "rate(node_vmstat_pswpin{host=~\"$server\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -546,7 +546,7 @@ "target": "" }, { - "expr": "node_memory_Writeback_bytes{instance=~\"$server\"}", + "expr": "node_memory_Writeback_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -557,7 +557,7 @@ "target": "" }, { - "expr": "rate(node_vmstat_pswpout{instance=~\"$server\"}[5m])", + "expr": "rate(node_vmstat_pswpout{host=~\"$server\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -661,7 +661,7 @@ "steppedLine": false, "targets": [ { - "expr": "8*sum(irate(node_network_receive_bytes_total{instance=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[5m]) > 0)", + "expr": "8*sum(irate(node_network_receive_bytes_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -671,7 +671,7 @@ "target": "" }, { - "expr": "8*sum(irate(node_network_transmit_bytes_total{instance=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[5m]) > 0)", + "expr": "8*sum(irate(node_network_transmit_bytes_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -681,7 +681,7 @@ "target": "" }, { - "expr": "8*sum(irate(node_network_transmit_packets_total{instance=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[5m]) > 0)", + "expr": "8*sum(irate(node_network_transmit_packets_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -692,7 +692,7 @@ "target": "" }, { - "expr": "8*sum(irate(node_network_receive_packets_total{instance=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[5m]) > 0)", + "expr": "8*sum(irate(node_network_receive_packets_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -785,7 +785,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_network_transmit_drop_total{instance=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[5m]) + irate(node_network_receive_drop_total{instance=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[5m])", + "expr": "irate(node_network_transmit_drop_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval]) + irate(node_network_receive_drop_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -796,7 +796,7 @@ "target": "" }, { - "expr": "irate(node_network_transmit_errs_total{instance=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[5m]) + irate(node_network_receive_errs_total{instance=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[5m])", + "expr": "irate(node_network_transmit_errs_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval]) + irate(node_network_receive_errs_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -891,7 +891,7 @@ "yaxis": 1 }, { - "alias": "{instance=\"172.17.0.1:9100\"}", + "alias": "{host=\"172.17.0.1:9100\"}", "yaxis": 2 }, { @@ -904,7 +904,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_disk_reads_completed_total{instance=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[5m])", + "expr": "irate(node_disk_reads_completed_total{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 4, @@ -914,7 +914,7 @@ "step": 1200 }, { - "expr": "irate(node_disk_writes_completed_total{instance=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[5m])", + "expr": "irate(node_disk_writes_completed_total{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 4, @@ -924,7 +924,7 @@ "step": 1200 }, { - "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[5m])irate(node_disk_io_time_seconds_total{instance=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[5m])", + "expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 4, @@ -934,7 +934,7 @@ "step": 20 }, { - "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[5m])", + "expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -1025,7 +1025,7 @@ "yaxis": 1 }, { - "alias": "{instance=\"172.17.0.1:9100\"}", + "alias": "{host=\"172.17.0.1:9100\"}", "yaxis": 2 }, { @@ -1038,7 +1038,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_disk_io_now{instance=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}", + "expr": "node_disk_io_now{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}", "format": "time_series", "hide": false, "intervalFactor": 4, @@ -1048,7 +1048,7 @@ "step": 60 }, { - "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[5m])", + "expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 4, @@ -1058,7 +1058,7 @@ "step": 1200 }, { - "expr": "node_md_is_active{instance=~\"$server\"} < 1", + "expr": "node_md_is_active{host=~\"$server\"} < 1", "format": "time_series", "hide": false, "intervalFactor": 4, @@ -1147,7 +1147,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_sockstat_TCP_tw{instance=~\"$server\"}", + "expr": "node_sockstat_TCP_tw{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "tcp/timewait", @@ -1156,7 +1156,7 @@ "target": "" }, { - "expr": "node_sockstat_UDP_inuse{instance=~\"$server\"}", + "expr": "node_sockstat_UDP_inuse{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "udp/inuse", @@ -1165,7 +1165,7 @@ "target": "" }, { - "expr": "node_sockstat_TCP_inuse{instance=~\"$server\"}", + "expr": "node_sockstat_TCP_inuse{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "tcp/inuse", @@ -1174,7 +1174,7 @@ "target": "" }, { - "expr": "node_nf_conntrack_entries{instance=~\"$server.*\"} / node_nf_conntrack_entries_limit{instance=~\"$server.*\"}", + "expr": "node_nf_conntrack_entries{host=~\"$server.*\"} / node_nf_conntrack_entries_limit{host=~\"$server.*\"}", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1261,7 +1261,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(node_netstat_Tcp_InErrs{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Tcp_InErrs{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "tcp/inerrs", @@ -1270,7 +1270,7 @@ "target": "" }, { - "expr": "rate(node_netstat_Tcp_AttemptFails{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Tcp_AttemptFails{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "tcp/attemptfails", @@ -1279,7 +1279,7 @@ "target": "" }, { - "expr": "rate(node_netstat_Tcp_EstabResets{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Tcp_EstabResets{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1289,7 +1289,7 @@ "target": "" }, { - "expr": "rate(node_netstat_Udp_RcvbufErrors{instance=~\"$server.*\"}[5m]) + rate(node_netstat_Udp_SndbufErrors{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Udp_RcvbufErrors{host=~\"$server.*\"}[$__rate_interval]) + rate(node_netstat_Udp_SndbufErrors{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "udp/buferr", @@ -1299,7 +1299,7 @@ "target": "" }, { - "expr": "rate(node_netstat_Udp_InErrors{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Udp_InErrors{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "udp/inerrs", @@ -1308,7 +1308,7 @@ "target": "" }, { - "expr": "rate(node_netstat_TcpExt_RcvPruned{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_TcpExt_RcvPruned{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1318,7 +1318,7 @@ "step": 30 }, { - "expr": "rate(node_netstat_TcpExt_SyncookiesFailed{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_TcpExt_SyncookiesFailed{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1328,7 +1328,7 @@ "step": 30 }, { - "expr": "rate(node_netstat_TcpExt_ListenDrops{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_TcpExt_ListenDrops{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1338,7 +1338,7 @@ "step": 30 }, { - "expr": "rate(node_netstat_Icmp_InErrors{instance=~\"$server.*\"}[5m]) + rate(node_netstat_Icmp_OutErrors{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Icmp_InErrors{host=~\"$server.*\"}[$__rate_interval]) + rate(node_netstat_Icmp_OutErrors{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1348,7 +1348,7 @@ "step": 30 }, { - "expr": "rate(node_netstat_Tcp_OutRsts{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_netstat_Tcp_OutRsts{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1437,7 +1437,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_filefd_allocated{instance=~\"$server.*\"} / node_filefd_maximum{instance=~\"$server.*\"}", + "expr": "node_filefd_allocated{host=~\"$server.*\"} / node_filefd_maximum{host=~\"$server.*\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1448,7 +1448,7 @@ "target": "isNonNull()" }, { - "expr": "node_procs_running{instance=~\"$server.*\"}", + "expr": "node_procs_running{host=~\"$server.*\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1459,7 +1459,7 @@ "target": "isNonNull()" }, { - "expr": "node_nf_conntrack_entries{instance=~\"$server.*\"} / node_nf_conntrack_entries_limit{instance=~\"$server.*\"}", + "expr": "node_nf_conntrack_entries{host=~\"$server.*\"} / node_nf_conntrack_entries_limit{host=~\"$server.*\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1560,7 +1560,7 @@ "steppedLine": false, "targets": [ { - "expr": "node_procs_blocked{instance=~\"$server.*\"}", + "expr": "node_procs_blocked{host=~\"$server.*\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1571,7 +1571,7 @@ "target": "isNonNull()" }, { - "expr": "node_entropy_available_bits{instance=~\"$server.*\"}", + "expr": "node_entropy_available_bits{host=~\"$server.*\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "entropy bytes available", @@ -1581,7 +1581,7 @@ "target": "isNonNull()" }, { - "expr": "rate(node_forks_total{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_forks_total{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1592,7 +1592,7 @@ "target": "isNonNull()" }, { - "expr": "rate(node_intr_total{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_intr_total{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1681,7 +1681,7 @@ "steppedLine": false, "targets": [ { - "expr": "rate(node_edac_uncorrectable_errors_total{instance=\"$server.*\"}[5m])", + "expr": "rate(node_edac_uncorrectable_errors_total{host=\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1692,7 +1692,7 @@ "target": "isNonNull()" }, { - "expr": "node_textfile_scrape_error{instance=\"$server.*\"}", + "expr": "node_textfile_scrape_error{host=\"$server.*\"}", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1703,7 +1703,7 @@ "target": "isNonNull()" }, { - "expr": "rate(node_intr_total{instance=~\"$server.*\"}[5m])", + "expr": "rate(node_intr_total{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -1714,7 +1714,7 @@ "target": "isNonNull()" }, { - "expr": "rate(node_edac_correctable_errors_total{instance=\"$server.*\"}[5m])", + "expr": "rate(node_edac_correctable_errors_total{host=\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1808,7 +1808,7 @@ "steppedLine": false, "targets": [ { - "expr": "1- (node_filesystem_avail_bytes{instance=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_size_bytes{instance=~\"$server\"})", + "expr": "1- (node_filesystem_avail_bytes{host=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_size_bytes{host=~\"$server\"})", "format": "time_series", "hide": false, "intervalFactor": 4, @@ -1818,7 +1818,7 @@ "target": "" }, { - "expr": "1- (node_filesystem_files_free{instance=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_files{instance=~\"$server\"})", + "expr": "1- (node_filesystem_files_free{host=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_files{host=~\"$server\"})", "format": "time_series", "hide": false, "intervalFactor": 4, @@ -1900,7 +1900,7 @@ "multi": false, "name": "server", "options": [], - "query": "label_values(node_boot_time_seconds, instance)", + "query": "label_values(node_boot_time_seconds, host)", "refresh": 1, "regex": "", "skipUrlSync": false, diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/http.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/http.json index 68abd31fff3cd3c989bc6ee16216af62586f19b1..de8e73c54a505479a5f1a22e3700a32b847b7dc3 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/http.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/http.json @@ -148,7 +148,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, sum(rate(nginx_http_requests{vhost=~\"^$vhost\",code=~\"[45].*\"}[5m])) by (code))", + "expr": "topk(10, sum(rate(nginx_http_requests{vhost=~\"^$vhost\",code=~\"[45].*\"}[$__rate_interval])) by (code))", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -238,7 +238,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, sum(rate(nginx_http_requests{vhost=~\"^$vhost$\"}[5m])) by (backend))", + "expr": "topk(10, sum(rate(nginx_http_requests{vhost=~\"^$vhost$\"}[$__rate_interval])) by (backend))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{backend}}", @@ -417,7 +417,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.5, sum(rate(nginx_http_requests_ms{vhost=~\"^$vhost$\"}[5m])) by (vhost,le))", + "expr": "histogram_quantile(0.5, sum(rate(nginx_http_requests_ms{vhost=~\"^$vhost$\"}[$__rate_interval])) by (vhost,le))", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -425,14 +425,14 @@ "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum(rate(nginx_http_requests_ms{vhost=~\"^$vhost$\"}[5m])) by (vhost,le))", + "expr": "histogram_quantile(0.9, sum(rate(nginx_http_requests_ms{vhost=~\"^$vhost$\"}[$__rate_interval])) by (vhost,le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{vhost}}/90pct", "refId": "B" }, { - "expr": "histogram_quantile(0.99, sum(rate(nginx_http_requests_ms{vhost=~\"^$vhost$\"}[5m])) by (vhost,le))", + "expr": "histogram_quantile(0.99, sum(rate(nginx_http_requests_ms{vhost=~\"^$vhost$\"}[$__rate_interval])) by (vhost,le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{vhost}}/99pct", @@ -635,4 +635,4 @@ "title": "HTTP", "uid": "xCSUMFnmz", "version": 5 -} \ No newline at end of file +} diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/prometheus.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/prometheus.json index 0efc67abb065ac3eed403925ae38c006b2b53107..560fec6f95a3149266ef9f1f10936487a3571116 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/prometheus.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/prometheus.json @@ -28,7 +28,7 @@ { "datasource": "$datasource", "enable": true, - "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)", + "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[$__rate_interval])) by (instance)", "hide": false, "iconColor": "#fceaca", "limit": 100, @@ -2306,7 +2306,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)", + "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ instance }}", @@ -2986,7 +2986,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)", + "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[$__rate_interval]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ instance }}", diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/service.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/service.json index c566fa0708766b2ed1fd1112718eb6e42a8c6856..2b77f87a03888766b596067069094dd99de64075 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/service.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/service.json @@ -15,8 +15,7 @@ "editable": true, "gnetId": null, "graphTooltip": 1, - "id": null, - "iteration": 1560209742277, + "iteration": 1614086490638, "links": [ { "icon": "external link", @@ -25,19 +24,41 @@ } ], "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "Performance", + "type": "row" + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "localhost", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 5, "w": 8, "x": 0, - "y": 0 + "y": 1 }, + "hiddenSeries": false, "id": 2, "legend": { "avg": false, @@ -52,7 +73,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.4.0", "pointradius": 5, "points": false, "renderer": "flot", @@ -62,14 +87,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(cgroup_cpu_usage{mode=\"user\",service=\"$service.service\"}[5m])", + "expr": "rate(cgroup_cpu_usage{mode=\"user\",service=\"$service.service\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{host}}/user", "refId": "A" }, { - "expr": "rate(group_cpu_usage{mode=\"system\",service=\"$service.service\"}[5m])", + "expr": "rate(group_cpu_usage{mode=\"system\",service=\"$service.service\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{host}}/sys", @@ -123,13 +148,21 @@ "dashLength": 10, "dashes": false, "datasource": "localhost", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 5, "w": 8, "x": 8, - "y": 0 + "y": 1 }, + "hiddenSeries": false, "id": 4, "legend": { "avg": false, @@ -144,7 +177,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.4.0", "pointradius": 5, "points": false, "renderer": "flot", @@ -209,13 +246,21 @@ "dashLength": 10, "dashes": false, "datasource": "localhost", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 5, "w": 8, "x": 16, - "y": 0 + "y": 1 }, + "hiddenSeries": false, "id": 6, "legend": { "avg": false, @@ -230,7 +275,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.4.0", "pointradius": 5, "points": false, "renderer": "flot", @@ -240,7 +289,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cgroup_blkio_bytes{service=\"$service.service\"}[5m])) by (host,mode)", + "expr": "sum(rate(cgroup_blkio_bytes{service=\"$service.service\"}[$__rate_interval])) by (host,mode)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -295,13 +344,21 @@ "dashLength": 10, "dashes": false, "datasource": "localhost", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 5, "w": 24, "x": 0, - "y": 5 + "y": 6 }, + "hiddenSeries": false, "id": 8, "legend": { "avg": false, @@ -316,12 +373,23 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.4.0", "pointradius": 5, "points": false, "renderer": "flot", "repeat": "vhosts", "repeatDirection": "h", + "scopedVars": { + "vhosts": { + "selected": false, + "text": "noblogs-cdn.autistici.org:443", + "value": "noblogs-cdn.autistici.org:443" + } + }, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -375,10 +443,128 @@ "align": false, "alignLevel": null } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 12, + "panels": [], + "repeat": "service", + "scopedVars": { + "service": { + "selected": true, + "text": "docker-noblogs-http", + "value": "docker-noblogs-http" + } + }, + "title": "Versions", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "localhost", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "expr": "count(container_digest{service=\"$service\"}) by (digest)", + "interval": "", + "legendFormat": "{{digest}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container Digest", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "5s", - "schemaVersion": 18, + "schemaVersion": 27, "style": "dark", "tags": [], "templating": { @@ -388,13 +574,18 @@ "current": {}, "datasource": "localhost", "definition": "", + "description": null, + "error": null, "hide": 0, "includeAll": false, "label": "Service", "multi": false, "name": "service", "options": [], - "query": "label_values(cgroup_cpu_usage, service)", + "query": { + "query": "label_values(cgroup_cpu_usage, service)", + "refId": "localhost-service-Variable-Query" + }, "refresh": 1, "regex": "/(.*)\\.service/", "skipUrlSync": false, @@ -410,13 +601,18 @@ "current": {}, "datasource": "localhost", "definition": "", + "description": null, + "error": null, "hide": 2, "includeAll": false, "label": null, "multi": false, "name": "vhosts", "options": [], - "query": "query_result(sum (vhostmap{service=\"$service.service\"}) by (vhost))", + "query": { + "query": "query_result(sum (vhostmap{service=\"$service.service\"}) by (vhost))", + "refId": "localhost-vhosts-Variable-Query" + }, "refresh": 1, "regex": "/^.*vhost=\"([^\"]*)\".*$/", "skipUrlSync": false, @@ -461,5 +657,5 @@ "timezone": "", "title": "Service overview", "uid": "xfV2rd7ik", - "version": 3 + "version": 7 } diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/system_overview.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/system_overview.json index f98b1b59028b68616e5ccd4a12392dde6c7206ad..f0b37b254f0ce0ab180a488ead16202bc34f478a 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/system_overview.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/system_overview.json @@ -366,4 +366,4 @@ "list": [] }, "version": 4 -} \ No newline at end of file +} diff --git a/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 b/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 index c505d487949bf54e56c4f23790f481848c2abad9..8b6e6ae01f69ec4d89a344e9e911430230511cd6 100644 --- a/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 +++ b/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 @@ -54,7 +54,7 @@ {% endmacro %} global: - scrape_interval: "10s" + scrape_interval: "{{ prometheus_scrape_interval }}" # Set an external label unique to this host, but remove it # from the generated alerts so that they match across all diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_cpu.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_cpu.conf.yml index 7568f0f7c4b80f9159a3f1bd67220d8a7066199d..326ca00fd92e634b73ee3a30e43560fd90023102 100644 --- a/float/roles/float-infra-prometheus/templates/rules/alerts_cpu.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/alerts_cpu.conf.yml @@ -24,3 +24,13 @@ groups: description: 'Load average on host {{$labels.host}} is very high ({{$value}}), the host is likely unresponsive.' runbook: '[[ alert_playbook_url ]]/HostThrashing' + - alert: ThermalEnvelopeThrottling + expr: host:kernel_cpu_throttled:max > 0 + for: 2h + labels: + scope: host + severity: warn + annotations: + summary: 'Host {{$labels.host}} is running too hot' + description: | + The CPU on {{$labels.host}} is being throttled because it is running too hot. diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_cpu.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_cpu.conf.yml index 5e6d90a1478c84541abe4d70314acfd9fc603af4..3d8f45ca2d206951067e182b7dcb800c64174c9f 100644 --- a/float/roles/float-infra-prometheus/templates/rules/rules_cpu.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/rules_cpu.conf.yml @@ -9,3 +9,5 @@ groups: expr: sum(rate(node_cpu_seconds_total{mode!="idle"}[5m])) by (host, instance) - record: instance_utilization:rate5m expr: instance_utilization:node_cpu:rate5m / instance:node_cpus:count + - record: host:kernel_cpu_throttled:max + expr: max(kernel_cpu_throttled) by (host) diff --git a/float/roles/float-util-credentials/tasks/main.yml b/float/roles/float-util-credentials/tasks/main.yml index 9a61b5936671af7b859437126c5b48ae039bddcf..20594ccbf9b53620902f65e0591a3eb6d37e3e4c 100644 --- a/float/roles/float-util-credentials/tasks/main.yml +++ b/float/roles/float-util-credentials/tasks/main.yml @@ -96,7 +96,7 @@ # This should use the systemd module but it doesn't take lists of services. - name: "Restart associated services" shell: "systemctl restart {{ services[item.0.service].systemd_services | join(' ') }}" - when: "item.1.changed and item.0.service != 'LOCAL'" + when: "item.1.changed and item.0.service != 'LOCAL' and services[item.0.service].systemd_services" loop: "{{ credentials | zip(x509_sign.results) | list }}" rescue: diff --git a/float/roles/float-util-mariadb/defaults/main.yml b/float/roles/float-util-mariadb/defaults/main.yml deleted file mode 100644 index 636e270eb0d655685eccc38a180fde42665acc20..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/defaults/main.yml +++ /dev/null @@ -1,41 +0,0 @@ ---- - -mariadb_instance: default -mariadb_service: "mariadb@{{ mariadb_instance }}.service" -mariadb_socket: "/var/run/mariadb-{{ mariadb_instance }}/server.sock" -mariadb_config: "/etc/mysql/conf.d/my{{ mariadb_instance }}.cnf" -mariadb_config_dir: "/etc/mysql/{{ mariadb_instance }}.conf.d" -mariadb_data_dir: "/var/lib/mariadb/{{ mariadb_instance }}" -mariadb_client: "/usr/local/bin/mysql-{{ mariadb_instance }}" - -# Server configuration -mariadb_server_id: 1 -mariadb_port: 3306 -mariadb_bind_address: 0.0.0.0 - -# InnoDB -mariadb_innodb_log_file_size: 256M - -# User limits -mariadb_max_user_connections: 200 - -# System limits -mariadb_ulimit_nofile: 65535 - -# Monitoring configuration -mariadb_metrics_port: "{{ mariadb_port + 6000 }}" -mariadb_metrics_address: "" -mariadb_metrics_config: "/etc/prometheus/mysql-{{ mariadb_instance }}.cnf" -mariadb_metrics_service: "prometheus-mysqld-exporter@{{ mariadb_instance }}.service" - -# Replication -mariadb_enable_replication: false -mariadb_is_master: false -mariadb_master_host: "" -mariadb_master_port: "{{ mariadb_port }}" -mariadb_replication_user: replica -mariadb_replication_password: "" -mariadb_replicate_dbs: [] - -# Custom server settings. -mariadb_settings: {} diff --git a/float/roles/float-util-mariadb/files/mariadb-replicator b/float/roles/float-util-mariadb/files/mariadb-replicator deleted file mode 100755 index c0810a9052fd1e5358a1805dacc084951f01e153..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/files/mariadb-replicator +++ /dev/null @@ -1,234 +0,0 @@ -#!/bin/bash -# -# Manage replication state for a MariaDB (>=10) instance. -# -# Uses a state file, stored in the data directory, to detect state -# changes. The possible states are 'init' (empty database on first -# install), 'slave' and 'master'. The state file stores a fingerprint -# of the master connection parameters, to detect changes in those as -# well. -# -# The script can manage one or more databases, passed as arguments -# on the command line, or all the databases if no arguments are given. -# -# When first setting up a slave, it will pull a dump of the current -# contents from the master, using the credentials of the replication -# user: make sure that it also has SELECT permissions on all tables. -# - -set -o pipefail - -usage() { - cat <<EOF -Usage: $0 [<OPTIONS>] [<DATABASE>...] -Options: - --master | --slave - --defaults-file FILE - --master-host HOST - --master-port PORT - --replication-user USER - --replication-password PASSWORD - -EOF - exit 2 -} - -die() { - echo "ERROR: $*" >&2 - exit 1 -} - -get_current_state() { - if [ -e "${DATADIR}/.replication_state" ]; then - cat "${DATADIR}/.replication_state" - else - echo "init" - fi -} - -compute_new_state() { - new_master_hash=$(echo -n "${master_host}:${master_port}:${replication_user}:${replication_password}" | sha1sum | cut -d' ' -f 1) - echo "${role}:${new_master_hash}" -} - -pull_database_from_master() { - MYSQLDUMP="mysqldump --host=${master_host} --port=${master_port} --user=${replication_user} --password=${replication_password} --gtid --master-data --opt --single-transaction --quick" - if [ -z "${databases}" ]; then - ${MYSQLDUMP} --all-databases | ${MYSQL} - else - ${MYSQLDUMP} --databases ${databases} | ${MYSQL} - fi -} - -setup_slave() { - ${MYSQL} -NBe "CHANGE MASTER TO -MASTER_HOST='${master_host}', -MASTER_PORT=${master_port}, -MASTER_USER='${replication_user}', -MASTER_PASSWORD='${replication_password}', -MASTER_CONNECT_RETRY=10, -MASTER_USE_GTID=slave_pos; -START SLAVE" -} - -promote_slave_to_master() { - ${MYSQL} -NBe "STOP SLAVE; RESET MASTER" -} - -demote_master_to_slave() { - ${MYSQL} -NBe "RESET MASTER" - setup_slave -} - -change_slave_parameters() { - ${MYSQL} -NBe "STOP SLAVE" - setup_slave -} - -# Parse command-line options. -defaults_file=/etc/mysql/my.cnf -master_host= -master_port=3306 -replication_user= -replication_password= -role= -databases= - -while [ $# -gt 0 ]; do - case "$1" in - --defaults-file=*) - defaults_file="${1##--defaults-file=}" - ;; - --defaults-file) - defaults_file="$2" - shift - ;; - - --master-host=*) - master_host="${1##--master-host=}" - ;; - --master-host) - master_host="$2" - shift - ;; - - --master-port=*) - master_port="${1##--master-port=}" - ;; - --master-port) - master_port="$2" - shift - ;; - - --replication-user=*) - replication_user="${1##--replication-user=}" - ;; - --replication-user) - replication_user="$2" - shift - ;; - - --replication-password=*) - replication_password="${1##--replication-password=}" - ;; - --replication-password) - replication_password="$2" - shift - ;; - - --master|--slave) - if [ -n "${role}" ]; then - echo "Must specify only one of --master or --slave" >&2 - echo "Run with --help for help." >&2 - exit 2 - fi - role="${1##--}" - ;; - - -h|--help) - usage - ;; - - -*) - echo "Unknown argument: $1" >&2 - echo "Run with --help for help." >&2 - exit 2 - ;; - - *) - databases="${databases} $1" - ;; - esac - shift -done - -if [ -z "${role}" ]; then - echo "Must specify the desired role using either --master or --slave" >&2 - exit 2 -fi - -if [ "${role}" = "slave" ]; then - if [ -z "${master_host}" -o -z "${master_port}" ]; then - echo "Must specify --master-host and --master-port" >&2 - exit 2 - fi - if [ -z "${replication_user}" -o -z "${replication_password}" ]; then - echo "Must specify --replication-user and --replication-password" >&2 - exit 2 - fi -fi - -MYSQL=/usr/bin/mysql -if [ -e "${defaults_file}" ]; then - MYSQL="${MYSQL} --defaults-file=${defaults_file}" -fi - -DATADIR=$(${MYSQL} -NBe 'select @@datadir;') -[ -n "${DATADIR}" ] || die "can't connect to mysql" -[ -d "${DATADIR}" ] || die "data directory ${DATADIR} does not exist" - -# Compare current and desired state. -cur_state=$(get_current_state) -new_state=$(compute_new_state) -if [ "${cur_state}" = "${new_state}" ]; then - echo "Nothing to do." - exit 0 -fi - -# Handle the transition. -cur_role="${cur_state%%:*}" - -echo "replication state needs to change: ${cur_role} -> ${role}" - -case "${cur_role}->${role}" in - "init->master") - # Nothing to do in this case. - ;; - "init->slave") - # Pull the database contents from the master to initialize a slave. - pull_database_from_master - setup_slave - ;; - "slave->slave") - # Master parameters have changed. - change_slave_parameters - ;; - "slave->master") - promote_slave_to_master - ;; - "master->slave") - demote_master_to_slave - ;; - *) - die "unsupported state transition ${cur_role} -> ${role}" - ;; -esac - -if [ $? -gt 0 ]; then - die "could not complete state transition" -fi - -# Update the state file. -echo "${new_state}" > "${DATADIR}/.replication_state" - -exit 0 diff --git a/float/roles/float-util-mariadb/files/mariadb@.service b/float/roles/float-util-mariadb/files/mariadb@.service deleted file mode 100644 index adc8d5520d50f492de4c7e2a5acd48e5f9ea29aa..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/files/mariadb@.service +++ /dev/null @@ -1,173 +0,0 @@ -# Multi instance version of mariadb. For if you run multiple versions at once. -# Also used for mariadb@bootstrap to bootstrap Galera. -# -# create config file /etc/mysql/conf.d/my{instancename}.cnf -# -# start as systemctl start mariadb@{instancename}.server - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or -# (at your option) any later version. -# -# Thanks to: -# Daniel Black -# Erkan Yanar -# David Strauss -# and probably others -# Inspired from https://gitweb.gentoo.org/repo/gentoo.git/tree/dev-db/mysql-init-scripts/files/mysqld_at.service - -[Unit] -Description=MariaDB database server -After=network.target - -ConditionPathExists=/etc/mysql/conf.d/my%I.cnf - -[Install] -WantedBy=multi-user.target -#Alias=mysql.service -#Alias=mysqld.service - - -[Service] - -############################################################################## -## Core requirements -## - -Type=notify - -# Setting this to true can break replication and the Type=notify settings -# See also bind-address mysqld option. -PrivateNetwork=false - -############################################################################## -## Package maintainers -## - -User=mysql -Group=mysql - -# To allow memlock to be used as non-root user if set in configuration -CapabilityBoundingSet=CAP_IPC_LOCK - -# Prevent writes to /usr, /boot, and /etc -ProtectSystem=full - -# Doesn't yet work properly with SELinux enabled -NoNewPrivileges=true - -PrivateDevices=true - -# Prevent accessing /home, /root and /run/user -ProtectHome=true - -# Execute pre and post scripts as root, otherwise it does it as User= -PermissionsStartOnly=true - -ExecStartPre=/usr/bin/install -m 755 -o mysql -g root -d /var/run/mysqld - -# Perform automatic wsrep recovery. When server is started without wsrep, -# galera_recovery simply returns an empty string. In any case, however, -# the script is not expected to return with a non-zero status. -# It is always safe to unset _WSREP_START_POSITION%I environment variable. -# Do not panic if galera_recovery script is not available. (MDEV-10538) -ExecStartPre=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION%I" - -ExecStartPre=/bin/sh -c "[ ! -e /usr/bin/galera_recovery ] && VAR= || \ - VAR=`/usr/bin/galera_recovery --defaults-file=/etc/mysql/conf.d/my%I.cnf`; [ $? -eq 0 ] \ - && systemctl set-environment _WSREP_START_POSITION%I=$VAR || exit 1" - -# Alternate: (remove ConditionPathExists above) -# use [mysqld.INSTANCENAME] as sections in my.cnf -# -#ExecStartPre=/bin/sh -c "[ ! -e /usr/bin/galera_recovery ] && VAR= || \ -# VAR=`/usr/bin/galera_recovery --defaults-group-suffix=%I`; [ $? -eq 0 ] \ -# && systemctl set-environment _WSREP_START_POSITION%I=$VAR || exit 1" - -# Needed to create system tables etc. -# ExecStartPre=/usr/bin/mysql_install_db -u mysql - -# Start main service -# MYSQLD_OPTS here is for users to set in /etc/systemd/system/mariadb@.service.d/MY_SPECIAL.conf -# Use the [service] section and Environment="MYSQLD_OPTS=...". -# This isn't a replacement for my.cnf. -# _WSREP_NEW_CLUSTER is for the exclusive use of the script galera_new_cluster - -# Note: Place $MYSQLD_OPTS at the very end for its options to take precedence. - -ExecStart=/usr/sbin/mysqld --defaults-file=/etc/mysql/conf.d/my%I.cnf \ - $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION%I $MYSQLD_OPTS -# Alternate: (remove ConditionPathExists above) -# use [mysqld.INSTANCENAME] as sections in my.cnf -# -# ExecStart=/usr/sbin/mysqld --defaults-group-suffix=%I \ -# $_WSREP_NEW_CLUSTER $_WSREP_START_POSITION%I $MYSQLD_OPTS - -# Unset _WSREP_START_POSITION environment variable. -ExecStartPost=/bin/sh -c "systemctl unset-environment _WSREP_START_POSITION%I" - -KillMode=process -KillSignal=SIGTERM - -# Don't want to see an automated SIGKILL ever -SendSIGKILL=no - -# Restart crashed server only, on-failure would also restart, for example, when -# my.cnf contains unknown option -Restart=on-abort -RestartSec=5s - -UMask=007 - -############################################################################## -## USERs can override -## -## -## by creating a file in /etc/systemd/system/mariadb.service.d/MY_SPECIAL.conf -## and adding/setting the following will override this file's settings. - -# Useful options not previously available in [mysqld_safe] - -# Kernels like killing mysqld when out of memory because its big. -# Lets temper that preference a little. -# OOMScoreAdjust=-600 - -# Explicitly start with high IO priority -# BlockIOWeight=1000 - -# If you don't use the /tmp directory for SELECT ... OUTFILE and -# LOAD DATA INFILE you can enable PrivateTmp=true for a little more security. -PrivateTmp=true - -## -## Options previously available to be set via [mysqld_safe] -## that now needs to be set by systemd config files as mysqld_safe -## isn't executed. -## - -# Number of files limit. previously [mysqld_safe] open-file-limit -LimitNOFILE=16364 - -# Maximium core size. previously [mysqld_safe] core-file-size -# LimitCore= - -# Nice priority. previously [mysqld_safe] nice -# Nice=-5 - -# Timezone. previously [mysqld_safe] timezone -# Environment="TZ=UTC" - -# Library substitutions. previously [mysqld_safe] malloc-lib with explicit paths -# (in LD_LIBRARY_PATH) and library name (in LD_PRELOAD). -# Environment="LD_LIBRARY_PATH=/path1 /path2" "LD_PRELOAD= - -# Flush caches. previously [mysqld_safe] flush-caches=1 -# ExecStartPre=sync -# ExecStartPre=sysctl -q -w vm.drop_caches=3 - -# numa-interleave=1 equalivant -# Change ExecStart=numactl --interleave=all /usr/sbin/mysqld...... - -# crash-script equalivent -# FailureAction= diff --git a/float/roles/float-util-mariadb/handlers/main.yml b/float/roles/float-util-mariadb/handlers/main.yml deleted file mode 100644 index 89774f263dd98cb85d3e6a00a2697ed4da6b08ba..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/handlers/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- - -- name: systemctl reset-failed - command: "systemctl reset-failed" - changed_when: false - diff --git a/float/roles/float-util-mariadb/tasks/main.yml b/float/roles/float-util-mariadb/tasks/main.yml deleted file mode 100644 index 9f410f96ea9811998f646f5af8f6cd236a2241b5..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/tasks/main.yml +++ /dev/null @@ -1,139 +0,0 @@ -# Main task for mariadb. - -# A single instance is identified by these variables: -# - mariadb_instance (default) -# - mariadb_port (3306) -# - mariadb_metrics_port (9104) -# -# If you are using replication remember to set mariadb_server_id accordingly! -# -# Example usage: -# - include_role: -# name: mariadb -# vars: -# mariadb_instance: apps -# mariadb_port: 3307 -# mariadb_metrics_port: 9144 -# -# The server and client configuration can be found in {{ mariadb_config }}. -# However for convenience a mysql-{{ mariadb_instance }} wrapper is also -# installed. - -- file: - path: "{{ mariadb_config | dirname }}" - state: directory - -- file: - path: "{{ mariadb_config_dir }}" - state: directory - -- name: Server/Client configuration file - template: - src: templates/mariadb.cnf.j2 - dest: "{{ mariadb_config }}" - register: mariadb_server_config - -- name: Install client wrappers - template: - src: templates/{{ item }}-wrapper.j2 - dest: "/usr/local/bin/{{ item }}-{{ mariadb_instance }}" - mode: 0555 - owner: root - group: root - with_items: - - mysql - - mysqladmin - - mysqldump - -- name: Install server package - apt: - name: "{{ packages }}" - state: present - vars: - packages: - - mariadb-client - - mariadb-server - -# Running MariaDB in multi-instance mode with systemd requires some effort: -# the default "mariadb@.service" shipped with the Debian package includes -# Alias directives for "mysql.service" and "mysqld.service", and if we -# understand the problem correctly, systemd will try to enable those aliases -# (by creating those symlinks in /etc/systemd) every time we set up a new -# instance. To avoid that, we ship our own "mariadb@.service" with those -# Alias directives removed. -- name: Mask default services - systemd: - masked: yes - enabled: no - state: stopped - name: "{{ item }}" - with_items: - - mariadb.service - - mysql.service - - mysqld.service - ignore_errors: yes - notify: "systemctl reset-failed" - changed_when: false - -- name: Install mariadb multi-instance base systemd unit - copy: - src: mariadb@.service - dest: /etc/systemd/system/mariadb@.service - register: mariadb_multi_instance_systemd_unit - -- name: Install systemd override dir - file: - dest: "/etc/systemd/system/{{ mariadb_service }}.d" - state: directory - -# Fix an issue where mysql_install_db needs /usr/sbin/resolveip, but the -# Debian package installs /usr/bin/resolveip instead. -- stat: - path: /usr/sbin/resolveip - register: mariadb_resolveip -- file: - src: /usr/bin/resolveip - dest: /usr/sbin/resolveip - state: link - when: "not mariadb_resolveip.stat.exists" - -- name: Install tmpfiles.d for socket directories - template: - src: templates/tmpfiles.conf.j2 - dest: "/etc/tmpfiles.d/{{ mariadb_service }}.conf" - register: mariadb_tmpfiles - -- name: Create socket directories - shell: "systemd-tmpfiles --create" - when: mariadb_tmpfiles.changed - -- name: Install systemd override file - template: - src: templates/service_override.conf.j2 - dest: "/etc/systemd/system/{{ mariadb_service }}.d/role-mariadb.conf" - register: mariadb_systemd_unit - -- name: Bootstrap data directory - shell: "/usr/bin/mysql_install_db --defaults-file={{ mariadb_config }} --datadir={{ mariadb_data_dir }} --user=mysql --auth-root-authentication-method=socket && date > {{ mariadb_data_dir }}/.float-bootstrap-ok" - args: - creates: "{{ mariadb_data_dir }}/.float-bootstrap-ok" - -# Explicitly start the service after bootstrapping data directory -# instead of using a restart handler, so that the database is available -# immediately for subsequent tasks. -- set_fact: - restart_mariadb: "{{ mariadb_systemd_unit.changed or mariadb_multi_instance_systemd_unit.changed or mariadb_server_config.changed }}" - -- name: Start service {{ mariadb_service }} - systemd: - name: "{{ mariadb_service }}" - daemon_reload: "{{ restart_mariadb }}" - enabled: yes - state: "{{ 'restarted' if restart_mariadb else 'started' }}" - -# Monitoring creates a mysql user, needs the server up. -- import_tasks: monitoring.yml - -# Set up replication. -- include_tasks: replication.yml - when: mariadb_enable_replication diff --git a/float/roles/float-util-mariadb/tasks/monitoring.yml b/float/roles/float-util-mariadb/tasks/monitoring.yml deleted file mode 100644 index 36e9b96e37fdc22585b347c7c49068d21defbe8c..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/tasks/monitoring.yml +++ /dev/null @@ -1,56 +0,0 @@ -# TODO(godog) setup ai-metrics-exporter scraping - -- name: Install exporter and deps - apt: - name: "{{ packages }}" - state: present - vars: - packages: - - prometheus-mysqld-exporter - - python-mysqldb - -- include_tasks: user.yml - vars: - user: prometheus - mariadb_max_user_connections: 3 - -- name: Grant privileges to monitoring user - mysql_user: - name: 'prometheus' - priv: '*.*:PROCESS,REPLICATION CLIENT,SELECT' - append_privs: yes - state: present - config_file: "{{ mariadb_config }}" - -- name: Exporter config directory - file: - dest: "{{ mariadb_metrics_config | dirname }}" - state: directory - -- name: Mask default instance - systemd: - name: prometheus-mysqld-exporter - masked: yes - enabled: no - notify: "systemctl reset-failed" - -- name: Install exporter for {{ mariadb_instance }} - template: - src: templates/exporter.service.j2 - dest: /etc/systemd/system/{{ mariadb_metrics_service }} - register: mariadb_metrics_systemd_unit - -- name: Install exporter config for {{ mariadb_instance }} - template: - src: templates/exporter.cnf.j2 - dest: "{{ mariadb_metrics_config }}" - register: mariadb_metrics_systemd_unit - -# Explicitly start the service after bootstrapping data directory -- name: Start {{ mariadb_metrics_service }} - systemd: - name: "{{ mariadb_metrics_service }}" - daemon_reload: "{{ mariadb_metrics_systemd_unit is changed }}" - enabled: yes - state: "{{ 'restarted' if mariadb_metrics_systemd_unit.changed else 'started' }}" - diff --git a/float/roles/float-util-mariadb/tasks/replication.yml b/float/roles/float-util-mariadb/tasks/replication.yml deleted file mode 100644 index 8d337c609ae1035e82afeec14da0093d77080c4b..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/tasks/replication.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- - -- name: Grant replica permissions - command: "{{ mariadb_client }} -e \"GRANT SELECT, REPLICATION SLAVE ON *.* TO '{{ mariadb_replication_user }}'@'%' IDENTIFIED BY '{{ mariadb_replication_password }}'\"" - -- name: Install replication manager script - copy: - src: mariadb-replicator - dest: /usr/sbin/mariadb-replicator - mode: 0755 - -- name: Setup MariaDB replication - command: "/usr/sbin/mariadb-replicator --defaults-file={{ mariadb_config }} --master-host={{ mariadb_master_host }} --master-port={{ mariadb_master_port }} --replication-user={{ mariadb_replication_user }} --replication-password={{ mariadb_replication_password }} {{ '--master' if mariadb_is_master else '--slave' }}" - register: mariadb_replicator_result - changed_when: "mariadb_replicator_result.stdout != 'Nothing to do.'" diff --git a/float/roles/float-util-mariadb/tasks/user.yml b/float/roles/float-util-mariadb/tasks/user.yml deleted file mode 100644 index 84094ad493527f9ff0b885c9246e84bae729d898..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/tasks/user.yml +++ /dev/null @@ -1,11 +0,0 @@ -# Create a user named 'user' on 'mariadb_instance' with access granted via -# unix socket (i.e. no passwords, the user must be known to nss) -# -# Note the mysql_user module is not yet able to create a user identified via -# unix_socket, hence the 'command' invocation. - -- name: Create user - command: "{{ mariadb_client }} -e 'create or replace user \"{{ user }}\"@\"localhost\" identified via unix_socket;'" - -- name: Set user limits - command: "{{ mariadb_client }} -e 'grant usage on *.* to \"{{ user }}\"@\"localhost\" with max_user_connections {{ mariadb_max_user_connections }};'" diff --git a/float/roles/float-util-mariadb/templates/exporter.cnf.j2 b/float/roles/float-util-mariadb/templates/exporter.cnf.j2 deleted file mode 100644 index eaa15f8e58b03384a766e2f5173e50f70053dd4b..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/exporter.cnf.j2 +++ /dev/null @@ -1,6 +0,0 @@ -# Exporter client configuration, user/password are required - -[client] -user=prometheus -password=unused -socket={{ mariadb_socket }} diff --git a/float/roles/float-util-mariadb/templates/exporter.service.j2 b/float/roles/float-util-mariadb/templates/exporter.service.j2 deleted file mode 100644 index 30b7e2d058dc5a03b6f97354dd61c7299c1f460f..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/exporter.service.j2 +++ /dev/null @@ -1,10 +0,0 @@ -[Unit] -Description=Prometheus exporter for MySQL server (%I) - -[Service] -Restart=always -User=prometheus -ExecStart=/usr/bin/prometheus-mysqld-exporter --web.listen-address {{ mariadb_metrics_address }}:{{ mariadb_metrics_port }} --config.my-cnf {{ mariadb_metrics_config }} --no-collect.info_schema.tables --no-collect.info_schema.tablestats - -[Install] -WantedBy=multi-user.target diff --git a/float/roles/float-util-mariadb/templates/mariadb.cnf.j2 b/float/roles/float-util-mariadb/templates/mariadb.cnf.j2 deleted file mode 100644 index 79ac157e863631b5cef0e151365d59fff9902c8d..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/mariadb.cnf.j2 +++ /dev/null @@ -1,50 +0,0 @@ -[client] -port={{ mariadb_port }} -socket={{ mariadb_socket }} -default-character-set = utf8mb4 - -[mysql] -default-character-set = utf8mb4 -prompt=\u@\h({{ mariadb_instance }}) [\d]>\_ -no-auto-rehash - -[mariadb] -server_id={{ mariadb_server_id }} -bind-address={{ mariadb_bind_address }} -port={{ mariadb_port }} -socket={{ mariadb_socket }} -datadir={{ mariadb_data_dir }} -log_bin={{ mariadb_data_dir }}/binary_log -binlog-format=ROW -innodb_file_per_table=1 -innodb_flush_method=O_DIRECT -innodb_flush_log_at_trx_commit=2 -innodb_log_file_size={{ mariadb_innodb_log_file_size }} -innodb_log_buffer_size=8M -log_warnings=3 -skip-name-resolve - -# For long mysqldumps. -max_allowed_packet=1024M -net_read_timeout=3600 -net_write_timeout=3600 - -# Do not block hosts because of errors. -max_connect_errors = 4294967295 - -character-set-server = utf8mb4 -collation-server = utf8mb4_general_ci - -{% if mariadb_replicate_dbs %} -{% for db in mariadb_replicate_dbs %} -replicate_do_db={{ db }} -replicate_wild_do_table={{ db }}.% -{% endfor %} -{% endif %} - -{% for key, value in mariadb_settings | dictsort %} -{{ key }}={{ value }} -{% endfor %} - -# Include instance-specific configuration. -!includedir {{ mariadb_config_dir }}/ diff --git a/float/roles/float-util-mariadb/templates/mysql-wrapper.j2 b/float/roles/float-util-mariadb/templates/mysql-wrapper.j2 deleted file mode 100644 index b2dcea14b56d648664c07150cff199230a84d158..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/mysql-wrapper.j2 +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec /usr/bin/mysql --defaults-file={{ mariadb_config }} "$@" diff --git a/float/roles/float-util-mariadb/templates/mysqladmin-wrapper.j2 b/float/roles/float-util-mariadb/templates/mysqladmin-wrapper.j2 deleted file mode 100644 index c9d2b08709990c06c8e6ebf4965b964f871d065a..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/mysqladmin-wrapper.j2 +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec /usr/bin/mysqladmin --defaults-file={{ mariadb_config }} "$@" diff --git a/float/roles/float-util-mariadb/templates/mysqldump-wrapper.j2 b/float/roles/float-util-mariadb/templates/mysqldump-wrapper.j2 deleted file mode 100644 index 5e0954b1582442932600382a4f6b741404cdc133..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/mysqldump-wrapper.j2 +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec /usr/bin/mysqldump --defaults-file={{ mariadb_config }} "$@" diff --git a/float/roles/float-util-mariadb/templates/service_override.conf.j2 b/float/roles/float-util-mariadb/templates/service_override.conf.j2 deleted file mode 100644 index 59790e2b76a8c5dfaa59b61d9a35acc9344e5c1b..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/service_override.conf.j2 +++ /dev/null @@ -1,6 +0,0 @@ -[Service] -SyslogIdentifier=mariadb@%I -NoNewPrivileges=true -PrivateTmp=true -LimitNOFILE={{ mariadb_ulimit_nofile }} -TimeoutStartSec=86400 diff --git a/float/roles/float-util-mariadb/templates/tmpfiles.conf.j2 b/float/roles/float-util-mariadb/templates/tmpfiles.conf.j2 deleted file mode 100644 index aa2d60ff967d58b8d10bcc5f6cb18aca2eae0d92..0000000000000000000000000000000000000000 --- a/float/roles/float-util-mariadb/templates/tmpfiles.conf.j2 +++ /dev/null @@ -1 +0,0 @@ -d /run/mariadb-{{ mariadb_instance }} 0755 mysql mysql - diff --git a/float/services.yml.default b/float/services.yml.default index 4f6912fc4ef1f129d66b77047b12db93feaba227..f7bdab9f8802f75318eeeecadfc14c94a4f841ad 100644 --- a/float/services.yml.default +++ b/float/services.yml.default @@ -15,6 +15,10 @@ frontend: - replds@acme.service ports: - 5005 + monitoring_endpoints: + - name: bind + port: 9119 + scheme: http volumes: - name: cache path: /var/cache/nginx diff --git a/float/services.yml.no-elasticsearch b/float/services.yml.no-elasticsearch index 55c5c25a924a9a7b623c55d72b7bf5b1568d057f..a44a48b36b1dcbc2d4c27cf7de67cae24c23e387 100644 --- a/float/services.yml.no-elasticsearch +++ b/float/services.yml.no-elasticsearch @@ -15,6 +15,10 @@ frontend: - replds@acme.service ports: - 5005 + monitoring_endpoints: + - name: bind + port: 9119 + scheme: http volumes: - name: cache path: /var/cache/nginx