From 05426278649c625d16d7c1ac1783cfae161fbab8 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 31 May 2017 11:09:30 -0400 Subject: [PATCH] update ContainerPilot to 3.0.0-RC1 --- Dockerfile | 14 ++-- bin/sensor | 95 +++++++++++++++++++----- etc/containerpilot.json | 89 ----------------------- etc/containerpilot.json5 | 114 +++++++++++++++++++++++++++++ etc/nginx/nginx.conf | 3 +- examples/Dockerfile | 2 +- examples/containerpilot.json | 96 ------------------------ examples/containerpilot.json5 | 133 ++++++++++++++++++++++++++++++++++ 8 files changed, 331 insertions(+), 215 deletions(-) delete mode 100644 etc/containerpilot.json create mode 100644 etc/containerpilot.json5 delete mode 100644 examples/containerpilot.json create mode 100644 examples/containerpilot.json5 diff --git a/Dockerfile b/Dockerfile index a191d39..0a49217 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,11 +34,10 @@ RUN export CONSUL_TEMPLATE_VERSION=0.18.3 \ && rm /tmp/consul-template.zip # Add Containerpilot and set its configuration -# Releases at https://github.com/joyent/containerpilot/releases -ENV CONTAINERPILOT_VER 2.7.3 -ENV CONTAINERPILOT file:///etc/containerpilot.json +ENV CONTAINERPILOT_VER 3.0.0-RC1 +ENV CONTAINERPILOT /etc/containerpilot.json5 -RUN export CONTAINERPILOT_CHECKSUM=2511fdfed9c6826481a9048e8d34158e1d7728bf \ +RUN export CONTAINERPILOT_CHECKSUM=f67929d1c8567d31772085fc252338091a5f795c \ && curl -Lso /tmp/containerpilot.tar.gz \ "https://github.com/joyent/containerpilot/releases/download/${CONTAINERPILOT_VER}/containerpilot-${CONTAINERPILOT_VER}.tar.gz" \ && echo "${CONTAINERPILOT_CHECKSUM} /tmp/containerpilot.tar.gz" | sha1sum -c \ @@ -60,7 +59,7 @@ RUN export JQ_VERSION=1.5 \ # Add our configuration files and scripts RUN rm -f /etc/nginx/conf.d/default.conf COPY etc/acme /etc/acme -COPY etc/containerpilot.json /etc/ +COPY etc/containerpilot.json5 /etc/ COPY etc/nginx /etc/nginx/templates COPY bin /usr/local/bin @@ -73,7 +72,4 @@ RUN mkdir -p /var/www/acme/challenge # Consul session data written here RUN mkdir -p /var/consul -CMD [ "/usr/local/bin/containerpilot", \ - "nginx", \ - "-g", \ - "daemon off;"] +CMD ["/usr/local/bin/containerpilot"] diff --git a/bin/sensor b/bin/sensor index 50977d3..7cea180 100755 --- a/bin/sensor +++ b/bin/sensor @@ -1,28 +1,47 @@ #!/bin/bash set -e +# populated by calling scrape() +scraped= + help() { echo 'Make requests to the Nginx stub_status endpoint and pull out metrics' echo 'for the telemetry service. Refer to the Nginx docs for details:' echo 'http://nginx.org/en/docs/http/ngx_http_stub_status_module.html' } -# Cumulative number of dropped connections +scrape() { + scraped=$(curl -s --fail localhost/nginx-health) +} + +# lets us make sure we only hit the Nginx health endpoint once +assert_scraped() { + if [ -z "${scraped}" ]; then scrape; fi +} + +# Cummulative number of dropped connections unhandled() { - local scraped=$(curl -s --fail localhost/nginx-health) - local accepts=$(echo "${scraped}" | awk 'FNR == 3 {print $1}') - local handled=$(echo "${scraped}" | awk 'FNR == 3 {print $2}') - echo $(expr ${accepts} - ${handled}) + assert_scraped + local accepts handled connections_unhandled + accepts=$(echo "$scraped" | awk 'FNR == 3 {print $1}') + handled=$(echo "$scraped" | awk 'FNR == 3 {print $2}') + connections_unhandled=$(expr ${accepts} - ${handled}) + /usr/local/bin/containerpilot \ + -putmetric \ + "nginx_connections_unhandled_total=$connections_unhandled" } # ratio of connections-in-use to available workers connections_load() { - local scraped=$(curl -s --fail localhost/nginx-health) - local active=$(echo "${scraped}" | awk '/Active connections/{print $3}') - local waiting=$(echo "${scraped}" | awk '/Reading/{print $6}') - local workers=$(echo $(cat /etc/nginx/nginx.conf | perl -n -e'/worker_connections *(\d+)/ && print $1') -) - echo $(echo "scale=4; (${active} - ${waiting}) / ${workers}" | bc) + assert_scraped + local active waiting workers connections_load + active=$(echo "$scraped" | awk '/Active connections/{print $3}') + waiting=$(echo "$scraped" | awk '/Reading/{print $6}') + workers=$(cat /etc/nginx/nginx.conf | perl -n -e'/worker_connections *(\d+)/ && print $1') + connections_load=$(echo "scale=4; (${active} - ${waiting}) / ${workers}" | bc) + /usr/local/bin/containerpilot \ + -putmetric \ + "nginx_connections_load=$connections_load" } # ------------------------------------------------------- @@ -32,40 +51,75 @@ connections_load() { # The current number of active client connections including Waiting connections. connections_active() { - curl -s localhost/nginx-health | awk '/Active connections/{print $3}' + assert_scraped + local connections_active + connections_active=$(awk '/Active connections/{print $3}' scraped) + /usr/local/bin/containerpilot \ + -putmetric \ + "nginx_connections_active=$connections_active" } # The current number of connections where nginx is reading the request header. connections_reading() { - curl -s localhost/nginx-health | awk '/Reading/{print $2}' + assert_scraped + local connections_reading + connections_reading=$(awk '/Reading/{print $2}' scraped) + /usr/local/bin/containerpilot \ + -putmetric \ + "nginx_connections_reading=$connections_reading" } # The current number of connections where nginx is writing the response back # to the client. connections_writing() { - curl -s localhost/nginx-health | awk '/Reading/{print $4}' + assert_scraped + local connections_writing + connections_writing=$(awk '/Reading/{print $4}' scraped) + /usr/local/bin/containerpilot \ + -putmetric \ + "nginx_connections_writing=$connections_writing" } # The current number of idle client connections waiting for a request. connections_waiting() { - curl -s localhost/nginx-health | awk '/Reading/{print $6}' + assert_scraped + local connections_waiting + connections_waiting=$(awk '/Reading/{print $6}' scraped) + /usr/local/bin/containerpilot \ + -putmetric \ + "nginx_connections_waiting=$connections_waiting" } # The total number of accepted client connections. accepts() { - curl -s localhost/nginx-health | awk 'FNR == 3 {print $1}' + assert_scraped + local accepts + accepts=$(awk 'FNR == 3 {print $1}') + /usr/local/bin/containerpilot \ + -putmetrics \ + "nginx_accepts=$accepts" } # The total number of handled connections. Generally, the parameter value is the # same as accepts unless some resource limits have been reached (for example, the # worker_connections limit). handled() { - curl -s localhost/nginx-health | awk 'FNR == 3 {print $2}' + assert_scraped + local handled + handled=$(awk 'FNR == 3 {print $2}') + /usr/local/bin/containerpilot \ + -putmetrics \ + "nginx_handled=$handled" } # The total number of client requests. requests() { - curl -s localhost/nginx-health | awk 'FNR == 3 {print $3}' + assert_scraped + local requests + requests=$(awk 'FNR == 3 {print $3}') + /usr/local/bin/containerpilot \ + -putmetrics \ + "nginx_requests=$requests" } # ------------------------------------------------------- @@ -77,4 +131,7 @@ if [ ! -z "$cmd" ]; then exit fi -help +# default behavior +scrape +unhandled +connections_load diff --git a/etc/containerpilot.json b/etc/containerpilot.json deleted file mode 100644 index e70b660..0000000 --- a/etc/containerpilot.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "consul": "{{ if .CONSUL_AGENT }}localhost{{ else }}{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}{{ end }}:8500", - "preStart": "generate-config", - "services": [ - { - "name": "nginx", - "port": 80, - "health": "health-check http", - "poll": 10, - "ttl": 25, - "interfaces": ["eth0"] - }, - { - "name": "nginx-public", - "port": 80, - "health": "health-check http", - "poll": 10, - "ttl": 25, - "interfaces": ["eth1", "eth0"] - }{{ if .ACME_DOMAIN }}, - { - "name": "nginx-ssl", - "port": 443, - "health": "acme init && health-check https", - "poll": 10, - "ttl": 25, - "interfaces": ["eth0"] - }, - { - "name": "nginx-public-ssl", - "port": 443, - "health": "health-check https", - "poll": 10, - "ttl": 25, - "interfaces": ["eth0"] - }, - { - "name": "nginx-public-ssl", - "port": 443, - "health": "/usr/bin/curl --insecure --fail --silent --show-error --output /dev/null --header \"HOST: {{ .ACME_DOMAIN }}\" https://localhost/nginx-health", - "poll": 10, - "ttl": 25, - "interfaces": ["eth1", "eth0"] - }{{ end }} - ], - "coprocesses": [{{ if .CONSUL_AGENT }} - { - "command": ["consul", "agent", - "-data-dir=/var/lib/consul", - "-config-dir=/etc/consul", - "-rejoin", - "-retry-join", "{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}", - "-retry-max", "10", - "-retry-interval", "10s"], - "restarts": "unlimited" - }{{ end }} - {{ if and .CONSUL_AGENT .ACME_DOMAIN }},{{ end }} - {{ if .ACME_DOMAIN }} - { - "command": ["acme", "watch"], - "restarts": "unlimited" - }{{ end }}], - "telemetry": { - "port": 9090, - "sensors": [ - { - "name": "nginx_connections_unhandled_total", - "help": "Number of accepted connnections that were not handled", - "type": "gauge", - "poll": 5, - "check": ["sensor", "unhandled"] - }, - { - "name": "nginx_connections_load", - "help": "Ratio of active connections (less waiting) to the maximum worker connections", - "type": "gauge", - "poll": 5, - "check": ["sensor", "connections_load"] - } - ] - }, - "tasks": [{{ if .ACME_DOMAIN }} - { - "name": "acme-checkin", - "command": [ "acme", "checkin" ], - "frequency": "1h", - "timeout": "10s" - }{{ end }}] -} diff --git a/etc/containerpilot.json5 b/etc/containerpilot.json5 new file mode 100644 index 0000000..37cc5d5 --- /dev/null +++ b/etc/containerpilot.json5 @@ -0,0 +1,114 @@ +{ + consul: "{{ if .CONSUL_AGENT }}localhost{{ else }}{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}{{ end }}:8500", + jobs: [ + { + name: "preStart", + exec: "generate-config" + }, + { + name: "nginx", + port: 80, + interfaces: ["eth0"], + exec: "nginx", + when: { + source: "preStart", + once: "exitSuccess" + }, + health: { + exec: "health-check http", + interval: 10, + ttl: 25 + } + }, + { + name: "nginx-public", + port: 80, + interfaces: ["eth1", "eth0"], + health: { + exec: "health-check http", + interval: 10, + ttl: 25, + } + }, + {{ if .ACME_DOMAIN }}{ + name: "nginx-ssl", + port: 443, + interfaces: ["eth0"], + health: { + exec: "acme init && health-check https", + interval: 10, + ttl: 25 + } + }, + { + name: "nginx-public-ssl", + port: 443, + interfaces: ["eth0"], + health: { + exec: "health-check https", + interval: 10, + ttl: 25, + } + }, + { + name: "nginx-public-ssl", + port: 443, + interfaces: ["eth1", "eth0"], + health: { + exec: "/usr/bin/curl --insecure --fail --silent --show-error --output /dev/null --header \"HOST: {{ .ACME_DOMAIN }}\" https://localhost/nginx-health", + interval: 10, + ttl: 25 + } + },{{ end }} + {{ if .CONSUL_AGENT }}{ + name: "consul-agent", + exec: [ + "consul", "agent", + "-data-dir=/var/lib/consul", + "-config-dir=/etc/consul", + "-rejoin", + "-retry-join", "{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}", + "-retry-max", "10", + "-retry-interval", "10s" + ], + restarts: "unlimited" + },{{ end }} + {{ if .ACME_DOMAIN }}{ + name: "acme-watch", + exec: ["acme", "watch"], + restarts: "unlimited" + }, + { + name: "acme-checkin", + exec: [ "acme", "checkin" ], + timeout: "10s", + when: { + interval: "1h", + } + },{{ end }} + { + // this sensor runs -putmetric to each of the metrics described + // in the telemetry/metrics block below + name: "sensor", + exec: "/usr/local/bin/sensor", + when: { + interval: "5s" + } + } + ], + telemetry: { + port: 9090, + metrics: [ + { + name: "nginx_connections_unhandled_total", + help: "Number of accepted connnections that were not handled", + type: "gauge" + }, + { + name: "nginx_connections_load", + help: "Ratio of active connections (less waiting) to the maximum worker connections", + type: "gauge" + } + ] + } +} diff --git a/etc/nginx/nginx.conf b/etc/nginx/nginx.conf index ae385d3..04053ec 100644 --- a/etc/nginx/nginx.conf +++ b/etc/nginx/nginx.conf @@ -3,6 +3,7 @@ user nginx; worker_processes 1; +daemon off; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; @@ -19,7 +20,7 @@ http { ~^2 0; default 1; } - + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' diff --git a/examples/Dockerfile b/examples/Dockerfile index 1b009d2..9e7c0f3 100644 --- a/examples/Dockerfile +++ b/examples/Dockerfile @@ -1,4 +1,4 @@ FROM autopilotpattern/nginx:latest COPY examples/example.conf /etc/nginx/templates/conf.d/site.conf -COPY examples/containerpilot.json /etc/containerpilot.json +COPY examples/containerpilot.json5 /etc/containerpilot.json5 diff --git a/examples/containerpilot.json b/examples/containerpilot.json deleted file mode 100644 index c20e0d6..0000000 --- a/examples/containerpilot.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "consul": "{{ if .CONSUL_AGENT }}localhost{{ else }}{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}{{ end }}:8500", - "preStart": "generate-config", - "services": [ - { - "name": "nginx", - "port": 80, - "health": "health-check http", - "poll": 10, - "ttl": 25, - "interfaces": ["eth0"] - }, - { - "name": "nginx-public", - "port": 80, - "health": "health-check http", - "poll": 10, - "ttl": 25, - "interfaces": ["eth1", "eth0"] - }{{ if .ACME_DOMAIN }}, - { - "name": "nginx-ssl", - "port": 443, - "health": "acme init && health-check https", - "poll": 10, - "ttl": 25, - "interfaces": ["eth0"] - }, - { - "name": "nginx-public-ssl", - "port": 443, - "health": "health-check https", - "poll": 10, - "ttl": 25, - "interfaces": ["eth0"] - }, - { - "name": "nginx-public-ssl", - "port": 443, - "health": "/usr/bin/curl --insecure --fail --silent --show-error --output /dev/null --header \"HOST: {{ .ACME_DOMAIN }}\" https://localhost/nginx-health", - "poll": 10, - "ttl": 25, - "interfaces": ["eth1", "eth0"] - }{{ end }} - ], - "coprocesses": [{{ if .CONSUL_AGENT }} - { - "command": ["consul", "agent", - "-data-dir=/var/lib/consul", - "-config-dir=/etc/consul", - "-rejoin", - "-retry-join", "{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}", - "-retry-max", "10", - "-retry-interval", "10s"], - "restarts": "unlimited" - }{{ end }} - {{ if and .CONSUL_AGENT .ACME_DOMAIN }},{{ end }} - {{ if .ACME_DOMAIN }} - { - "command": ["acme", "watch"], - "restarts": "unlimited" - }{{ end }}], - "backends": [ - { - "name": "backend", - "poll": 7, - "onChange": "reload" - } - ], - "telemetry": { - "port": 9090, - "sensors": [ - { - "name": "nginx_connections_unhandled_total", - "help": "Number of accepted connnections that were not handled", - "type": "gauge", - "poll": 5, - "check": ["sensor", "unhandled"] - }, - { - "name": "nginx_connections_load", - "help": "Ratio of active connections (less waiting) to the maximum worker connections", - "type": "gauge", - "poll": 5, - "check": ["sensor", "connections_load"] - } - ] - }, - "tasks": [{{ if .ACME_DOMAIN }} - { - "name": "acme-checkin", - "command": [ "acme", "checkin" ], - "frequency": "1h", - "timeout": "10s" - }{{ end }}] -} diff --git a/examples/containerpilot.json5 b/examples/containerpilot.json5 new file mode 100644 index 0000000..54cb6ce --- /dev/null +++ b/examples/containerpilot.json5 @@ -0,0 +1,133 @@ +{ + consul: "{{ if .CONSUL_AGENT }}localhost{{ else }}{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}{{ end }}:8500", + jobs: [ + { + name: "preStart", + exec: "generate-config" + }, + { + name: "nginx", + port: 80, + interfaces: ["eth0"], + exec: "nginx", + when: { + source: "preStart", + once: "exitSuccess" + }, + health: { + exec: "health-check http", + interval: 10, + ttl: 25 + } + }, + { + name: "nginx-public", + port: 80, + interfaces: ["eth1", "eth0"], + health: { + exec: "health-check http", + interval: 10, + ttl: 25, + } + }, + {{ if .ACME_DOMAIN }}{ + name: "nginx-ssl", + port: 443, + interfaces: ["eth0"], + health: { + exec: "acme init && health-check https", + interval: 10, + ttl: 25 + } + }, + { + name: "nginx-public-ssl", + port: 443, + interfaces: ["eth0"], + health: { + exec: "health-check https", + interval: 10, + ttl: 25, + } + }, + { + name: "nginx-public-ssl", + port: 443, + interfaces: ["eth1", "eth0"], + health: { + exec: "/usr/bin/curl --insecure --fail --silent --show-error --output /dev/null --header \"HOST: {{ .ACME_DOMAIN }}\" https://localhost/nginx-health", + interval: 10, + ttl: 25 + } + },{{ end }} + {{ if .CONSUL_AGENT }}{ + name: "consul-agent", + exec: [ + "consul", "agent", + "-data-dir=/var/lib/consul", + "-config-dir=/etc/consul", + "-rejoin", + "-retry-join", "{{ if .CONSUL }}{{ .CONSUL }}{{ else }}consul{{ end }}", + "-retry-max", "10", + "-retry-interval", "10s" + ], + restarts: "unlimited" + },{{ end }} + {{ if .ACME_DOMAIN }}{ + name: "acme-watch", + exec: ["acme", "watch"], + restarts: "unlimited" + }, + { + name: "acme-checkin", + exec: [ "acme", "checkin" ], + timeout: "10s", + when: { + interval: "1h", + } + },{{ end }} + { + name: "onChange", + exec: "reload", + when: { + source: "watch.backend", + each: "changed" + } + }, + { + name: "sensor-unhandled", + exec: "/usr/local/bin/sensor unhandled", + when: { + interval: "5s" + } + }, + { + name: "sensor-connections-load", + exec: "/usr/local/bin/sensor connections_load", + when: { + interval: "5s" + } + } + ], + watches: [ + { + name: "backend", + interval: 7 + } + ], + telemetry: { + port: 9090, + metrics: [ + { + name: "nginx_connections_unhandled_total", + help: "Number of accepted connnections that were not handled", + type: "gauge" + }, + { + name: "nginx_connections_load", + help: "Ratio of active connections (less waiting) to the maximum worker connections", + type: "gauge" + } + ] + } +}