Commit 530aa19b authored by Brian Brazil's avatar Brian Brazil

Merge pull request #444 from brian-brazil/group-ignoring

Add docs for ignoring and new group_* semantics.
parents cbde8e57 54043b11
...@@ -5,6 +5,7 @@ output/ ...@@ -5,6 +5,7 @@ output/
# Temporary file directory # Temporary file directory
tmp/ tmp/
downloads/
# Crash Log # Crash Log
crash.log crash.log
......
sudo: false sudo: false
language: ruby language: ruby
branches: branches:
only: only:
- master - master
script: make deploy script: make deploy
before_install: before_install:
- eval "$(ssh-agent -s)" - eval "$(ssh-agent -s)"
- openssl aes-256-cbc -K $encrypted_2ba894bc7c2f_key -iv $encrypted_2ba894bc7c2f_iv -in prometheus_rsa.enc -out prometheus_rsa -d - openssl aes-256-cbc -K $encrypted_2ba894bc7c2f_key -iv $encrypted_2ba894bc7c2f_iv -in prometheus_rsa.enc -out prometheus_rsa -d
- chmod 600 prometheus_rsa - chmod 600 prometheus_rsa
- ssh-add prometheus_rsa - ssh-add prometheus_rsa
env:
global:
secure: aNZoB4UDtdelUDxFTTJa5Dxm8gXorKGmQWYaFIXzuBwLvFMp1toIY+amsMrGBbnvxrJosO4764GpS8sWRT/Jbr252sxjOe8N4jfbtJUX29U2aNFQSWVc/o8VnKFW1NI5cjBAZ9OOvgpS6KUGkXZ2/PTmvgxvyqGS4pPovIM1OOo=
...@@ -12,3 +12,4 @@ gem 'builder' ...@@ -12,3 +12,4 @@ gem 'builder'
gem 'rb-inotify', :require => false gem 'rb-inotify', :require => false
gem 'rb-fsevent', :require => false gem 'rb-fsevent', :require => false
gem 'rb-fchange', :require => false gem 'rb-fchange', :require => false
gem 'nanoc-cachebuster'
...@@ -42,6 +42,8 @@ GEM ...@@ -42,6 +42,8 @@ GEM
multi_json (1.11.2) multi_json (1.11.2)
nanoc (3.7.3) nanoc (3.7.3)
cri (~> 2.3) cri (~> 2.3)
nanoc-cachebuster (0.3.1)
nanoc (>= 3.3.0)
nokogiri (1.6.3.1) nokogiri (1.6.3.1)
mini_portile (= 0.6.0) mini_portile (= 0.6.0)
posix-spawn (0.3.9) posix-spawn (0.3.9)
...@@ -75,6 +77,7 @@ DEPENDENCIES ...@@ -75,6 +77,7 @@ DEPENDENCIES
guard-nanoc guard-nanoc
kramdown kramdown
nanoc nanoc
nanoc-cachebuster
nokogiri nokogiri
pygments.rb pygments.rb
rb-fchange rb-fchange
......
compile: DOWNLOADS := prometheus alertmanager blackbox_exporter haproxy_exporter mysqld_exporter node_exporter pushgateway statsd_exporter
rm -rf output
clean:
rm -rf output downloads
compile: clean downloads
bundle exec nanoc bundle exec nanoc
deploy: github_pages_export github_pages_push deploy: github_pages_export github_pages_push
downloads: $(DOWNLOADS:%=downloads/%/repo.json) $(DOWNLOADS:%=downloads/%/releases.json)
downloads/%/repo.json:
@mkdir -p $(dir $@)
@echo "curl -sf -H 'Accept: application/vnd.github.v3+json' <GITHUB_AUTHENTICATION> https://api.github.com/repos/prometheus/$* > $@"
@curl -sf -H 'Accept: application/vnd.github.v3+json' $(GITHUB_AUTHENTICATION) https://api.github.com/repos/prometheus/$* > $@
downloads/%/releases.json:
@mkdir -p $(dir $@)
@echo "curl -sf -H 'Accept: application/vnd.github.v3+json' <GITHUB_AUTHENTICATION> https://api.github.com/repos/prometheus/$*/releases > $@"
@curl -sf -H 'Accept: application/vnd.github.v3+json' $(GITHUB_AUTHENTICATION) https://api.github.com/repos/prometheus/$*/releases > $@
github_pages_export: compile github_pages_export: compile
cd output && \ cd output && \
echo prometheus.io > CNAME && \ echo prometheus.io > CNAME && \
......
# Prometheus Documentation # Prometheus Documentation [![Build Status](https://travis-ci.org/prometheus/docs.svg?branch=master)](https://travis-ci.org/prometheus/docs)
This repository contains both the content and the static-site generator code for the This repository contains both the content and the static-site generator code for the
Prometheus documentation site. Prometheus documentation site.
......
...@@ -17,8 +17,13 @@ compile '/assets/*' do ...@@ -17,8 +17,13 @@ compile '/assets/*' do
end end
route '/assets/*' do route '/assets/*' do
# /assets/foo.html/ → /foo.html # Cachebuster currently doesn't fingerprint all needed files (SVG and font
item.identifier[0..-2] # extensions are missing), so we need to differentiate here.
if Nanoc::Cachebuster::FILETYPES_TO_FINGERPRINT.include?(item[:extension])
item.identifier[0..-(3 + item[:extension].length)] + fingerprint(item[:filename]) + '.' + item[:extension]
else
item.identifier[0..-2]
end
end end
route '/README/' do route '/README/' do
...@@ -40,7 +45,6 @@ compile '*' do ...@@ -40,7 +45,6 @@ compile '*' do
if item[:title] == 'README.md' if item[:title] == 'README.md'
# Don't filter; this should propagate verbatim to the output GitHub repository. # Don't filter; this should propagate verbatim to the output GitHub repository.
elsif item[:extension] == 'md' elsif item[:extension] == 'md'
#filter :kramdown
filter :redcarpet, options: {filter_html: true, autolink: true, no_intraemphasis: true, fenced_code_blocks: true, gh_blockcode: true, tables: true}, renderer_options: {with_toc_data: true} filter :redcarpet, options: {filter_html: true, autolink: true, no_intraemphasis: true, fenced_code_blocks: true, gh_blockcode: true, tables: true}, renderer_options: {with_toc_data: true}
filter :add_anchors filter :add_anchors
filter :bootstrappify filter :bootstrappify
...@@ -61,6 +65,8 @@ compile '*' do ...@@ -61,6 +65,8 @@ compile '*' do
else else
layout 'default' layout 'default'
end end
filter :cache_buster
end end
route '/blog/' do route '/blog/' do
...@@ -78,7 +84,7 @@ end ...@@ -78,7 +84,7 @@ end
route '*' do route '*' do
if item[:extension] == 'css' if item[:extension] == 'css'
# Write item with identifier /foo/ to /foo.css # Write item with identifier /foo/ to /foo.css
item.identifier.chop + '.css' item.identifier.chop + fingerprint(item[:filename]) + '.css'
elsif item.binary? elsif item.binary?
# Write item with identifier /foo/ to /foo.ext # Write item with identifier /foo/ to /foo.ext
item.identifier.chop + '.' + item[:extension] item.identifier.chop + '.' + item[:extension]
......
...@@ -16,6 +16,8 @@ catch up on anything you missed. ...@@ -16,6 +16,8 @@ catch up on anything you missed.
In the future, we will use this blog to publish more articles and announcements In the future, we will use this blog to publish more articles and announcements
to help you get the most out of Prometheus. to help you get the most out of Prometheus.
<!-- more -->
## Using Prometheus ## Using Prometheus
Posts on how to use Prometheus comprise the majority of online content. Here Posts on how to use Prometheus comprise the majority of online content. Here
......
...@@ -22,6 +22,7 @@ In this blog post, we will take a closer look at the built-in service discovery ...@@ -22,6 +22,7 @@ In this blog post, we will take a closer look at the built-in service discovery
some practical examples. As an additional resource, see some practical examples. As an additional resource, see
[Prometheus's configuration documentation](/docs/operating/configuration). [Prometheus's configuration documentation](/docs/operating/configuration).
<!-- more -->
## Prometheus and targets ## Prometheus and targets
...@@ -35,7 +36,7 @@ labels set by an earlier stage: ...@@ -35,7 +36,7 @@ labels set by an earlier stage:
1. Global labels, which are assigned to every target scraped by the Prometheus instance. 1. Global labels, which are assigned to every target scraped by the Prometheus instance.
2. The `job` label, which is configured as a default value for each scrape configuration. 2. The `job` label, which is configured as a default value for each scrape configuration.
3. Labels that are set per target group within a scrape configuration. 3. Labels that are set per target group within a scrape configuration.
4. Advanced label manipulation via [_relabeling_](/docs/operating/configuration/#target-relabeling-relabel_config). 4. Advanced label manipulation via [_relabeling_](/docs/operating/configuration/#relabel_config).
Each stage overwrites any colliding labels from the earlier stages. Eventually, we have a flat Each stage overwrites any colliding labels from the earlier stages. Eventually, we have a flat
set of labels that describe a single target. Those labels are then attached to every time series that set of labels that describe a single target. Those labels are then attached to every time series that
...@@ -76,7 +77,7 @@ scrape_configs: ...@@ -76,7 +77,7 @@ scrape_configs:
job: 'job2' job: 'job2'
``` ```
Through a mechanism named [_relabeling_](http://prometheus.io/docs/operating/configuration/#target-relabeling-relabel_config), Through a mechanism named [_relabeling_](http://prometheus.io/docs/operating/configuration/#relabel_config),
any label can be removed, created, or modified on a per-target level. This any label can be removed, created, or modified on a per-target level. This
enables fine-grained labeling that can also take into account metadata coming enables fine-grained labeling that can also take into account metadata coming
from the service discovery. Relabeling is the last stage of label assignment from the service discovery. Relabeling is the last stage of label assignment
...@@ -124,7 +125,7 @@ This rule transforms a target with the label set: ...@@ -124,7 +125,7 @@ This rule transforms a target with the label set:
You could then also remove the source labels in an additional relabeling step. You could then also remove the source labels in an additional relabeling step.
You can read more about relabeling and how you can use it to filter targets in the You can read more about relabeling and how you can use it to filter targets in the
[configuration documentation](/docs/operating/configuration#target-relabeling-relabel_config). [configuration documentation](/docs/operating/configuration#relabel_config).
Over the next sections, we will see how you can leverage relabeling when using service discovery. Over the next sections, we will see how you can leverage relabeling when using service discovery.
...@@ -219,7 +220,7 @@ has the `production` or `canary` Consul tag, a respective `group` label is assig ...@@ -219,7 +220,7 @@ has the `production` or `canary` Consul tag, a respective `group` label is assig
Each target's `instance` label is set to the node name provided by Consul. Each target's `instance` label is set to the node name provided by Consul.
A full documentation of all configuration parameters for service discovery via Consul A full documentation of all configuration parameters for service discovery via Consul
can be found on the [Prometheus website](/docs/operating/configuration#target-relabeling-relabel_config). can be found on the [Prometheus website](/docs/operating/configuration#relabel_config).
## Custom service discovery ## Custom service discovery
......
...@@ -19,6 +19,8 @@ detect and handle with custom-built rules. The Prometheus [query ...@@ -19,6 +19,8 @@ detect and handle with custom-built rules. The Prometheus [query
language](../../../../../docs/querying/basics/) gives you the tools to discover language](../../../../../docs/querying/basics/) gives you the tools to discover
these anomalies while avoiding false positives. these anomalies while avoiding false positives.
<!-- more -->
## Building a query ## Building a query
A common problem within a service is when a small number of servers are not A common problem within a service is when a small number of servers are not
...@@ -83,8 +85,6 @@ ALERT InstanceLatencyOutlier ...@@ -83,8 +85,6 @@ ALERT InstanceLatencyOutlier
> >
1 1
FOR 30m FOR 30m
SUMMARY "{{$labels.instance}} in {{$labels.job}} is a latency outlier"
DESCRIPTION "{{$labels.instance}} has latency of {{humanizeDuration $value}}"
``` ```
## Automatic actions ## Automatic actions
...@@ -107,23 +107,13 @@ configuration that uses it could look like this: ...@@ -107,23 +107,13 @@ configuration that uses it could look like this:
``` ```
# A simple notification configuration which only sends alert notifications to # A simple notification configuration which only sends alert notifications to
# an external webhook. # an external webhook.
notification_config { receivers:
name: "restart_webhook" - name: restart_webhook
webhook_config { webhook_configs:
url: "http://example.org/my/hook" url: "http://example.org/my/hook"
}
} route:
receiver: restart_webhook
# An aggregation rule which matches all alerts with the label
# alertname="InstanceLatencyOutlier" and sends them using the "restart_webhook"
# notification configuration.
aggregation_rule {
filter {
name_re: "alertname"
value_re: "InstanceLatencyOutlier"
}
notification_config_name: "restart_webhook"
}
``` ```
...@@ -135,4 +125,4 @@ Alertmanager's generic webhook support can trigger automatic remediations. ...@@ -135,4 +125,4 @@ Alertmanager's generic webhook support can trigger automatic remediations.
This all combines to enable oncall engineers to focus on problems where they can This all combines to enable oncall engineers to focus on problems where they can
have the most impact. have the most impact.
When defining alerts for your services, see also our [alerting best practices](http://prometheus.io/docs/practices/alerting/). When defining alerts for your services, see also our [alerting best practices](/docs/practices/alerting/).
...@@ -30,6 +30,8 @@ or *dhtech*. This post is going to focus on the work of dhtech and how we used ...@@ -30,6 +30,8 @@ or *dhtech*. This post is going to focus on the work of dhtech and how we used
Prometheus during DreamHack Summer 2015 to try to kick our monitoring up another Prometheus during DreamHack Summer 2015 to try to kick our monitoring up another
notch. notch.
<!-- more -->
## The equipment ## The equipment
Turns out that to build a highly performant network for 10,000+ Turns out that to build a highly performant network for 10,000+
computers, you need at least the same number of network ports. In our case these computers, you need at least the same number of network ports. In our case these
...@@ -56,7 +58,7 @@ below. ...@@ -56,7 +58,7 @@ below.
[![The DreamHack network core](https://c2.staticflickr.com/4/3951/18679671439_10ce7a8eb4_c.jpg)](https://www.flickr.com/photos/dreamhack/18679671439) [![The DreamHack network core](https://c2.staticflickr.com/4/3951/18679671439_10ce7a8eb4_c.jpg)](https://www.flickr.com/photos/dreamhack/18679671439)
<center>*The DreamHack network core*</center> <center>*The DreamHack network core*</center>
[![Network planning map](http://i.imgur.com/ZCQa2Abl.png)](http://i.imgur.com/ZCQa2Ab.png) [![Network planning map](/assets/dh_network_planning_map.png)](/assets/dh_network_planning_map.png)
<center>*The planning map for the distribution and core layers. The core is <center>*The planning map for the distribution and core layers. The core is
clearly visible in "Hall D"*</center> clearly visible in "Hall D"*</center>
...@@ -96,7 +98,7 @@ snmpcollector (SNMP) and ipplan-pinger (ICMP), closely followed by dhcpinfo ...@@ -96,7 +98,7 @@ snmpcollector (SNMP) and ipplan-pinger (ICMP), closely followed by dhcpinfo
systems into [node_exporter](https://github.com/prometheus/node_exporter)'s systems into [node_exporter](https://github.com/prometheus/node_exporter)'s
textfile collector. textfile collector.
[![dhmon Architecture](http://i.imgur.com/6gN3MRp.png)](http://i.imgur.com/6gN3MRp.png) [![dhmon Architecture](/assets/dh_dhmon_architecture.png)](/assets/dh_dhmon_architecture.png)
<center>*The current architecture plan of dhmon as of Summer 2015*</center> <center>*The current architecture plan of dhmon as of Summer 2015*</center>
We use Prometheus as a central timeseries storage and querying engine, but we We use Prometheus as a central timeseries storage and querying engine, but we
...@@ -119,7 +121,7 @@ very short sampling intervals. In the end, we found no reason for why we can't ...@@ -119,7 +121,7 @@ very short sampling intervals. In the end, we found no reason for why we can't
use Prometheus for this data as well - we will definitely try to replace our use Prometheus for this data as well - we will definitely try to replace our
memcached with Prometheus at the next DreamHack. memcached with Prometheus at the next DreamHack.
[![dhmon Visualization](http://i.imgur.com/D5I0Ztbl.png)](http://i.imgur.com/D5I0Ztb.png) [![dhmon Visualization](/assets/dh_dhmon_visualization.png)](/assets/dh_dhmon_visualization.png)
<center>*The overview of our access layer visualized by dhmon*</center> <center>*The overview of our access layer visualized by dhmon*</center>
## Prometheus setup ## Prometheus setup
...@@ -196,7 +198,7 @@ Let's also look at how an alert for an almost full DHCP scope looks like: ...@@ -196,7 +198,7 @@ Let's also look at how an alert for an almost full DHCP scope looks like:
We found the syntax to define alerts easy to read and understand even if you had We found the syntax to define alerts easy to read and understand even if you had
no previous experience with Prometheus or time series databases. no previous experience with Prometheus or time series databases.
[![Prometheus alerts for DreamHack](http://i.imgur.com/RV5gM7Ol.png)](http://i.imgur.com/RV5gM7O.png) [![Prometheus alerts for DreamHack](/assets/dh_prometheus_alerts.png)](/assets/dh_prometheus_alerts.png)
<center>*Oops! Turns out we have some bad uplinks, better run out and fix <center>*Oops! Turns out we have some bad uplinks, better run out and fix
it!*</center> it!*</center>
...@@ -208,7 +210,7 @@ Every time someone asked us something about the network, we crafted a query to ...@@ -208,7 +210,7 @@ Every time someone asked us something about the network, we crafted a query to
get the answer and saved it as a dashboard widget. The most interesting ones get the answer and saved it as a dashboard widget. The most interesting ones
were then added to an overview dashboard that we proudly displayed. were then added to an overview dashboard that we proudly displayed.
[![dhmon Dashboard](http://i.imgur.com/yYtC8vLl.png)](http://i.imgur.com/yYtC8vL.png) [![dhmon Dashboard](/assets/dh_dhmon_dashboard.png)](/assets/dh_dhmon_dashboard.png)
<center>*The DreamHack Overview dashboard powered by PromDash*</center> <center>*The DreamHack Overview dashboard powered by PromDash*</center>
## The future ## The future
......
...@@ -24,6 +24,8 @@ In this post we will implement a small utility program that connects a custom ...@@ -24,6 +24,8 @@ In this post we will implement a small utility program that connects a custom
service discovery approach based on [etcd](https://coreos.com/etcd/), the service discovery approach based on [etcd](https://coreos.com/etcd/), the
highly consistent distributed key-value store, to Prometheus. highly consistent distributed key-value store, to Prometheus.
<!-- more -->
## Targets in etcd and Prometheus ## Targets in etcd and Prometheus
Our fictional service discovery system stores services and their Our fictional service discovery system stores services and their
......
...@@ -22,20 +22,22 @@ Docker and Boexever in 2014. Over the years, Prometheus was growing more and ...@@ -22,20 +22,22 @@ Docker and Boexever in 2014. Over the years, Prometheus was growing more and
more mature and although it was already solving people's monitoring problems, more mature and although it was already solving people's monitoring problems,
it was still unknown to the wider public. it was still unknown to the wider public.
<!-- more -->
## Going public ## Going public
Everything changed for us a year ago, in January of 2015. After more than two Everything changed for us a year ago, in January of 2015. After more than two
years of development and internal usage, we felt that Prometheus was ready for years of development and internal usage, we felt that Prometheus was ready for
a wider audience and decided to go fully public with our official [announcement a wider audience and decided to go fully public with our official [announcement
blog post](https://developers.soundcloud.com/blog/prometheus-monitoring-at-soundcloud), blog post](https://developers.soundcloud.com/blog/prometheus-monitoring-at-soundcloud),
a [website](http://prometheus.io/), and a series of a [website](https://prometheus.io/), and a series of
[related](http://www.boxever.com/tags/monitoring) [related](http://www.boxever.com/tags/monitoring)
[posts](http://5pi.de/2015/01/26/monitor-docker-containers-with-prometheus/). [posts](http://5pi.de/2015/01/26/monitor-docker-containers-with-prometheus/).
We already received a good deal of attention during the first week after the We already received a good deal of attention during the first week after the
announcement, but nothing could prepare us for what happened a week later: announcement, but nothing could prepare us for what happened a week later:
someone unknown to us (hello there, someone unknown to us (hello there,
[jjwiseman](https://news.ycombinator.com/user?id=jjwiseman)!) had submitted [jjwiseman](https://news.ycombinator.com/user?id=jjwiseman)!) had submitted
[the Prometheus website](http://prometheus.io/) to Hacker News and somehow their [the Prometheus website](https://prometheus.io/) to Hacker News and somehow their
post had made it [all the way to the top](https://news.ycombinator.com/item?id=8995696). post had made it [all the way to the top](https://news.ycombinator.com/item?id=8995696).
This is when things started going slightly crazy in a good way. We saw a sharp This is when things started going slightly crazy in a good way. We saw a sharp
...@@ -105,10 +107,10 @@ or another has become too long to mention all of them: ...@@ -105,10 +107,10 @@ or another has become too long to mention all of them:
[CoreOS](https://coreos.com/), [Docker](https://docker.com), [CoreOS](https://coreos.com/), [Docker](https://docker.com),
[Boxever](http://www.boxever.com/), [Boxever](http://www.boxever.com/),
[DigitalOcean](https://www.digitalocean.com/), [Financial Times](http://www.ft.com/), [DigitalOcean](https://www.digitalocean.com/), [Financial Times](http://www.ft.com/),
[Improbable](http://improbable.io/), [KPMG](https://kpmg.com), and many more. [Improbable](http://improbable.io/), [KPMG](https://www.kpmg.com), and many more.
Even the world's largest digital festival, Even the world's largest digital festival,
[DreamHack](https://www.dreamhack.se), has [used [DreamHack](https://www.dreamhack.se), has [used
Prometheus](http://prometheus.io/blog/2015/06/24/monitoring-dreamhack/) to keep Prometheus](/blog/2015/06/24/monitoring-dreamhack/) to keep
tabs on their network infrastructure in 2015, and tabs on their network infrastructure in 2015, and
[FOSDEM](https://fosdem.org/2016/) will do so in 2016. [FOSDEM](https://fosdem.org/2016/) will do so in 2016.
......
---
title: Custom Alertmanager Templates
created_at: 2016-03-03
kind: article
author_name: Fabian Reinartz
---
The Alertmanager handles alerts sent by Prometheus servers and sends
notifications about them to different receivers based on their labels.
A receiver can be one of many different integrations such as PagerDuty, Slack,
email, or a custom integration via the generic webhook interface (for example [JIRA](https://github.com/fabxc/jiralerts)).
## Templates
The messages sent to receivers are constructed via templates.
Alertmanager comes with default templates but also allows defining custom
ones.
In this blog post, we will walk through a simple customization of Slack
notifications.
We use this simple Alertmanager configuartion that sends all alerts to Slack:
```yaml
global:
slack_api_url: '<slack_webhook_url>'
route:
receiver: 'slack-notifications'
# All alerts in a notification have the same value for these labels.
group_by: [alertname, datacenter, app]
receivers:
- name: 'slack-notifications'
slack_configs:
- channel: '#alerts'
```
By default, a Slack message sent by Alertmanager looks like this:
![](/assets/blog/2016-03-03/slack_alert_before.png)
It shows us that there is one firing alert, followed by the label values of
the alert grouping (alertname, datacenter, app) and further label values the
alerts have in common (critical).
<!-- more -->
## Customize
If you have alerts, you should also have documentation on how to handle them –
a runbook. A good approach to that is having a wiki that has a section for
each app you are running with a page for each alert.
Suppose we have such a wiki running at `https://internal.myorg.net/wiki/alerts`.
Now we want links to these runbooks shown in our Slack messages.
In our template, we need access to the "alertname" and the "app" label. Since
these are labels we group alerts by, they are available in the `GroupLabels`
map of our templating data.
We can directly add custom templating to our Alertmanager's [Slack configuration](/docs/alerting/configuration/#slack-receiver-slack_config)
that is used for the `text` section of our Slack message.
The [templating language](https://godoc.org/text/template) is the one provided
by the Go programming language.
```yaml
global:
slack_api_url: '<slack_webhook_url>'
route:
- receiver: 'slack-notifications'
group_by: [alertname, datacenter, app]
receivers:
- name: 'slack-notifications'
slack_configs:
- channel: '#alerts'
# Template for the text field in Slack messages.
text: 'https://internal.myorg.net/wiki/alerts/{{ .GroupLabels.app }}/{{ .GroupLabels.alertname }}'
```
We reload our Alertmanager by sending a `SIGHUP` or restart it to load the
changed configuration. Done.
Our Slack notifications now look like this:
![](/assets/blog/2016-03-03/slack_alert_after.png)
### Template files
Alternatively, we can also provide a file containing named templates, which
are then loaded by Alertmanager. This is especially helpful for more complex
templates that span many lines.
We create a file `/etc/alertmanager/templates/myorg.tmpl` and create a
template in it named "slack.myorg.text":
```
{{ define "slack.myorg.text" }}https://internal.myorg.net/wiki/alerts/{{ .GroupLabels.app }}/{{ .GroupLabels.alertname }}{{ end}}
```
Our configuration now loads the template with the given name for the "text"
field and we provide a path to our custom template file:
```yaml
global:
slack_api_url: '<slack_webhook_url>'
route:
- receiver: 'slack-notifications'
group_by: [alertname, datacenter, app]
receivers:
- name: 'slack-notifications'
slack_configs:
- channel: '#alerts'
text: '{{ template "slack.myorg.text" . }}'
templates:
- '/etc/alertmanager/templates/myorg.tmpl'
```
We reload our Alertmanager by sending a `SIGHUP` or restart it to load the
changed configuration and the new template file. Done.
---
title: Interview with Life360
created_at: 2016-03-23
kind: article
author_name: Brian Brazil
---
*This is the first in a series of interviews with users of Prometheus, allowing
them to share their experiences of evaluating and using Prometheus. Our first
interview is with Daniel from Life360.*
## Can you tell us about yourself and what Life360 does?
I’m Daniel Ben Yosef, a.k.a, dby, and I’m an Infrastructure Engineer for
[Life360](https://www.life360.com/), and before that, I’ve held systems
engineering roles for the past 9 years.
Life360 creates technology that helps families stay connected, we’re the Family
Network app for families. We’re quite busy handling these families - at peak
we serve 700k requests per minute for 70 million registered families.
[<img src="/assets/blog/2016-03-23/life360_horizontal_logo_gradient_rgb.png" style="width: 444px; height:177px"/>](https://www.life360.com/)
We manage around 20 services in production, mostly handling location requests
from mobile clients (Android, iOS, and Windows Phone), spanning over 150+
instances at peak. Redundancy and high-availability are our goals and we strive
to maintain 100% uptime whenever possible because families trust us to be
available.
We hold user data in both our MySQL multi-master cluster and in our 12-node
Cassandra ring which holds around 4TB of data at any given time. We have
services written in Go, Python, PHP, as well as plans to introduce Java to our
stack. We use Consul for service discovery, and of course our Prometheus setup
is integrated with it.
<!-- more -->
## What was your pre-Prometheus monitoring experience?
Our monitoring setup, before we switched to Prometheus, included many
components such as:
* Copperegg (now Idera)
* Graphite + Statsd + Grafana
* Sensu
* AWS Cloudwatch
We primarily use MySQL, NSQ and HAProxy and we found that all of the monitoring
solutions mentioned above were very partial, and required a lot of
customization to actually get all working together.
## Why did you decide to look at Prometheus?
We had a few reasons for switching to Prometheus, one of which is that we
simply needed better monitoring.
Prometheus has been known to us for a while, and we have been tracking it and
reading about the active development, and at a point (a few months back) we
decided to start evaluating it for production use.
The PoC results were incredible. The monitoring coverage of MySQL was amazing,
and we also loved the JMX monitoring for Cassandra, which had been sorely
lacking in the past.
[![Cassandra Client Dashboard](/assets/blog/2016-03-23/cx_client.png)](/assets/blog/2016-03-23/cx_client.png)
## How did you transition?
We started with a relatively small box (4GB of memory) as an initial point. It
was effective for a small number of services, but not for our full monitoring
needs.
We also initially deployed with Docker, but slowly transitioned to its own box
on an r3.2xl instance (60GB ram), and that holds all of our service monitoring
needs with 30 days of in-memory data.
We slowly started introducing all of our hosts with the Node Exporter and built
Grafana graphs, up to the point where we had total service coverage.
We were also currently looking at InfluxDB for long term storage, but due to
[recent developments](https://influxdata.com/blog/update-on-influxdb-clustering-high-availability-and-monetization/),
this may no longer be a viable option.
We then added exporters for MySQL, Node, Cloudwatch, HAProxy, JMX, NSQ (with a
bit of our own code), Redis and Blackbox (with our own contribution to add
authentication headers).
[![NSQ Overview Dashboard](/assets/blog/2016-03-23/nsq_overview.png)](/assets/blog/2016-03-23/nsq_overview.png)
## What improvements have you seen since switching?
The visibility and instrumentation gain was the first thing we saw. Right
before switching, we started experiencing Graphite’s scalability issues, and
having an in-place replacement for Graphite so stakeholders can continue to use
Grafana as a monitoring tool was extremely valuable to us. Nowadays, we are
focusing on taking all that data and use it to detect anomalies, which will
eventually become alerts in the Alert Manager.
## What do you think the future holds for Life360 and Prometheus?
We currently have one of our projects instrumented directly with a Prometheus
client, a Python-based service. As we build out new services, Prometheus is
becoming our go-to for instrumentation, and will help us gain extremely
meaningful alerts and stats about our infrastructure.
We look forward to growing with the project and keep contributing.
*Thank you Daniel! The source for Life360's dashboards is shared on [Github](https://github.com/life360/prometheus-grafana-dashboards).*
---
title: Interview with ShowMax
created_at: 2016-05-01
kind: article
author_name: Brian Brazil
---
*This is the second in a series of interviews with users of Prometheus, allowing
them to share their experiences of evaluating and using Prometheus.*
## Can you tell us about yourself and what ShowMax does?
I’m Antonin Kral, and I’m leading research and architecture for
[ShowMax](http://www.showmax.com). Before that, I’ve held architectural and CTO
roles for the past 12 years.
ShowMax is a subscription video on demand service that launched in South Africa
in 2015. We’ve got an extensive content catalogue with more than 20,000
episodes of TV shows and movies. Our service is currently available in 65
countries worldwide. While better known rivals are skirmishing in America and
Europe, ShowMax is battling a more difficult problem: how do you binge-watch
in a barely connected village in sub-Saharan Africa? Already 35% of video
around the world is streamed, but there are still so many places the revolution
has left untouched.
![ShowMax logo](/assets/blog/2016-05-01/showmax-logo.png)
We are managing about 50 services running mostly on private clusters built
around CoreOS. They are primarily handling API requests from our clients
(Android, iOS, AppleTV, JavaScript, Samsung TV, LG TV etc), while some of them
are used internally. One of the biggest internal pipelines is video encoding
which can occupy 400+ physical servers when handling large ingestion batches.
The majority of our back-end services are written in Ruby, Go or Python. We use
EventMachine when writing apps in Ruby (Goliath on MRI, Puma on JRuby). Go is
typically used in apps that require large throughput and don’t have so much
business logic. We’re very happy with Falcon for services written in Python.
Data is stored in PostgreSQL and ElasticSearch clusters. We use etcd and custom
tooling for configuring Varnishes for routing requests.
<!-- more -->
## What was your pre-Prometheus monitoring experience?
The primary use-cases for monitoring systems are:
* Active monitoring and probing (via Icinga)
* Metrics acquisition and creation of alerts based on these metrics (now Prometheus)
* Log acquisition from backend services
* Event and log acquisition from apps
The last two use-cases are handled via our logging infrastructure. It consists
of a collector running in the service container, which is listening on local
Unix socket. The socket is used by apps to send messages to the outside world.
Messages are transferred via RabbitMQ servers to consumers. Consumers are
custom written or hekad based. One of the main message flows is going towards
the service ElasticSearch cluster, which makes logs accessible for Kibana and
ad-hoc searches. We also save all processed events to GlusterFS for archival
purposes and/or further processing.
We used to run two metric acquisition pipelines in parallel. The first is based
on Collectd + StatsD + Graphite + Grafana and the other using Collectd +
OpenTSDB. We have struggled considerably with both pipelines. We had to deal
with either the I/O hungriness of Graphite, or the complexity and inadequate
tooling around OpenTSDB.
## Why did you decide to look at Prometheus?
After learning from our problems with the previous monitoring system, we looked
for a replacement. Only a few solutions made it to our shortlist. Prometheus
was one of the first, as Jiri Brunclik, our head of Operations at the time, had
received a personal recommendation about the system from former colleagues at
Google.
The proof of concept went great. We got a working system very quickly. We also
evaluated InfluxDB as a main system as well as a long-term storage for
Prometheus. But due to recent developments, this may no longer be a viable
option for us.
## How did you transition?
We initially started with LXC containers on one of our service servers, but
quickly moved towards a dedicated server from Hetzner, where we host the
majority of our services. We’re using PX70-SSD, which is Intel® Xeon® E3-1270
v3 Quad-Core Haswell with 32GB RAM, so we have plenty of power to run
Prometheus. SSDs allow us to have retention set to 120 days. Our logging
infrastructure is built around getting logs locally (receiving them on Unix
socket) and then pushing them towards the various workers.
![Diagram of ShowMax logging infrastructure. Shows flow of log messages from the source via processors to various consumers.](/assets/blog/2016-05-01/Loggin_infrastructure.png)
Having this infrastructure available made pushing metrics a logical choice
(especially in pre-Prometheus times). On the other side, Prometheus is
primarily designed around the paradigm of scraping metrics. We wanted to stay
consistent and push all metrics towards Prometheus initially. We have created a
Go daemon called prometheus-pusher. It’s responsible for scraping metrics from
local exporters and pushing them towards the Pushgateway. Pushing metrics has
some positive aspects (e.g. simplified service discovery) but also quite a few
drawbacks (e.g. making it hard to distinguish between a network partition vs. a
crashed service). We made Prometheus-pusher available on
[GitHub](https://github.com/ShowMax/prometheus-pusher), so you can try it
yourself.
![Grafana dashboard showing April 5th 2016 log processors traffic.](/assets/blog/2016-05-01/log_processors.png)
The next step was for us to figure out what to use for managing dashboards and
graphs. We liked the Grafana integration, but didn’t really like how Grafana
manages dashboard configurations. We are running Grafana in a Docker
container, so any changes should be kept out of the container. Another problem
was the lack of change tracking in Grafana.
We have thus decided to write a generator which takes YAML maintained within
git and generates JSON configs for Grafana dashboards. It is furthemore able to
deploy dashboards to Grafana started in a fresh container without the need for
persisting changes made into the container. This provides you with automation,
repeatability, and auditing.
We are pleased to announce that this tool is also now available under an Apache
2.0 license on [GitHub](https://github.com/ShowMax/grafana-dashboards-generator).
## What improvements have you seen since switching?
An improvement which we saw immediately was the stability of Prometheus. We
were fighting with stability and scalability of Graphite prior to this, so
getting that sorted was a great win for us. Furthemore the speed and stability
of Prometheus made access to metrics very easy for developers. Prometheus is
really helping us to embrace the DevOps culture.
Tomas Cerevka, one of our backend developers, was testing a new version of the
service using JRuby. He needed a quick peek into the heap consumption of that
particular service. He was able to get that information in a snap. For us,
this speed is essential.
![Heap size consumed by JRuby worker during troubleshooting memory issues on JVM.](/assets/blog/2016-05-01/ui_fragments-heap-zoom.png)
## What do you think the future holds for ShowMax and Prometheus?
Prometheus has become an integral part of monitoring in ShowMax and it is going
to be with us for the foreseeable future. We have replaced our whole metric
storage with Prometheus, but the ingestion chain remains push based. We are
thus thinking about following Prometheus best practices and switching to a pull
model.
We’ve also already played with alerts. We want to spend more time on this topic
and come up with increasingly sophisticated alert rules.
---
title: When (not) to use varbit chunks
created_at: 2016-05-08
kind: article
author_name: Björn “Beorn” Rabenstein
---
The embedded time serie database (TSDB) of the Prometheus server organizes the
raw sample data of each time series in chunks of constant 1024 bytes size. In
addition to the raw sample data, a chunk contains some meta-data, which allows
the selection of a different encoding for each chunk. The most fundamental
distinction is the encoding version. You select the version for newly created
chunks via the command line flag `-storage.local.chunk-encoding-version`. Up to
now, there were only two supported versions: 0 for the original delta encoding,
and 1 for the improved double-delta encoding. With release
[0.18.0](https://github.com/prometheus/prometheus/releases/tag/0.18.0), we
added version 2, which is another variety of double-delta encoding. We call it
_varbit encoding_ because it involves a variable bit-width per sample within
the chunk. While version 1 is superior to version 0 in almost every aspect,
there is a real trade-off between version 1 and 2. This blog post will help you
to make that decision. Version 1 remains the default encoding, so if you want
to try out version 2 after reading this article, you have to select it
explicitly via the command line flag. There is no harm in switching back and
forth, but note that existing chunks will not change their encoding version
once they have been created. However, these chunks will gradually be phased out
according to the configured retention time and will thus be replaced by chunks
with the encoding specified in the command-line flag.
<!-- more -->
## What is varbit encoding?
From the beginning, we designed the chunked sample storage for easy addition of
new encodings. When Facebook published a
[paper on their in-memory TSDB Gorilla](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf),
we were intrigued by a number of similarities between the independently
developed approaches of Gorilla and Prometheus. However, there were also many
fundamental differences, which we studied in detail, wondering if we could get
some inspiration from Gorilla to improve Prometheus.
On the rare occasion of a free weekend ahead of me, I decided to give it a
try. In a coding spree, I implemented what would later (after a considerable
amount of testing and debugging) become the varbit encoding.
In a future blog post, I will describe the technical details of the
encoding. For now, you only need to know a few characteristics for your
decision between the new varbit encoding and the traditional double-delta
encoding. (I will call the latter just “double-delta encoding” from now on but
note that the varbit encoding also uses double deltas, just in a different
way.)
## What are the advantages of varbit encoding?
In short: It offers a way better compression ratio. While the double-delta
encoding needs about 3.3 bytes per sample for real-life data sets, the varbit
encoding went as far down as 1.28 bytes per sample on a typical large
production server at SoundCloud. That's almost three times more space efficient
(and even slightly better than the 1.37 bytes per sample reported for Gorilla –
but take that with a grain of salt as the typical data set at SoundCloud might
look different from the typical data set at Facebook).
Now think of the implications: Three times more samples in RAM, three times
more samples on disk, only a third of disk ops, and since disk ops are
currently the bottleneck for ingestion speed, it will also allow ingestion to
be three times faster. In fact, the recently reported new ingestion record of
800,000 samples per second was only possible with varbit chunks – and with an
SSD, obviously. With spinning disks, the bottleneck is reached far earlier, and
thus the 3x gain matters even more.
All of this sounds too good to be true…
## So where is the catch?
For one, the varbit encoding is more complex. The computational cost to encode
and decode values is therefore somewhat increased, which fundamentally affects
everything that writes or reads sample data. Luckily, it is only a proportional
increase of something that usually contributes only a small part to the total
cost of an operation.
Another property of the varbit encoding is potentially way more relevant:
samples in varbit chunks can only be accessed sequentially, while samples in
double-delta encoded chunks are randomly accessible by index. Since writes in
Prometheus are append-only, the different access patterns only affect reading
of sample data. The practical impact depends heavily on the nature of the
originating PromQL query.
A pretty harmless case is the retrieval of all samples within a time
interval. This happens when evaluating a range selector or rendering a
dashboard with a resolution similar to the scrape frequency. The Prometheus
storage engine needs to find the starting point of the interval. With
double-delta chunks, it can perform a binary search, while it has to scan
sequentially through a varbit chunk. However, once the starting point is found,
all remaining samples in the interval need to be decoded sequentially anyway,
which is only slightly more expensive with the varbit encoding.
The trade-off is different for retrieving a small number of non-adjacent
samples from a chunk, or for plainly retrieving a single sample in a so-called
instant query. Potentially, the storage engine has to iterate through a lot of
samples to find the few samples to be returned. Fortunately, the most common
source of instant queries are rule evaluations referring to the latest sample
in each involved time series. Not completely by coincidence, I recently
improved the retrieval of the latest sample of a time series. Essentially, the
last sample added to a time series is cached now. A query that needs only the
most recent sample of a time series doesn't even hit the chunk layer anymore,
and the chunk encoding is irrelevant in that case.
Even if an instant query refers to a sample in the past and therefore has to
hit the chunk layer, most likely other parts of the query, like the index
lookup, will dominate the total query time. But there are real-life queries
where the sequential access pattern required by varbit chunks will start to
matter a lot.
## What is the worst-case query for varbit chunks?
The worst case for varbit chunks is if you need just one sample from somewhere
in the middle of _each_ chunk of a very long time series. Unfortunately, there
is a real use-case for that. Let's assume a time series compresses nicely
enough to make each chunk last for about eight hours. That's about three chunks
a day, or about 100 chunks a month. If you have a dashboard that displays the
time series in question for the last month with a resolution of 100 data
points, the dashboard will execute a query that retrieves a single sample from
100 different chunks. Even then, the differences between chunk encodings will
be dominated by other parts of the query execution time. Depending on
circumstances, my guess would be that the query might take 50ms with
double-delta encoding and 100ms with varbit encoding.
However, if your dashboard query doesn't only touch a single time series but
aggregates over thousands of time series, the number of chunks to access
multiplies accordingly, and the overhead of the sequential scan will become
dominant. (Such queries are frowned upon, and we usually recommend to use a
[recording rule](https://prometheus.io/docs/querying/rules/#recording-rules)
for queries of that kind that are used frequently, e.g. in a dashboard.) But
with the double-delta encoding, the query time might still have been
acceptable, let's say around one second. After the switch to varbit encoding,
the same query might last tens of seconds, which is clearly not what you want
for a dashboard.
## What are the rules of thumb?
To put it as simply as possible: If you are neither limited on disk capacity
nor on disk ops, don't worry and stick with the default of the classical
double-delta encoding.
However, if you would like a longer retention time or if you are currently
bottle-necked on disk ops, I invite you to play with the new varbit
encoding. Start your Prometheus server with
`-storage.local.chunk-encoding-version=2` and wait for a while until you have
enough new chunks with varbit encoding to vet the effects. If you see queries
that are becoming unacceptably slow, check if you can use
[recording rules](https://prometheus.io/docs/querying/rules/#recording-rules)
to speed them up. Most likely, those queries will gain a lot from that even
with the old double-delta encoding.
If you are interested in how the varbit encoding works behind the scenes, stay
tuned for another blog post in the not too distant future.
---
title: Prometheus to Join the Cloud Native Computing Foundation
created_at: 2016-05-09
kind: article
author_name: Julius Volz on behalf of the Prometheus core developers
---
Since the inception of Prometheus, we have been looking for a sustainable
governance model for the project that is independent of any single company.
Recently, we have been in discussions with the newly formed [Cloud Native
Computing Foundation](https://cncf.io/) (CNCF), which is backed by Google,
CoreOS, Docker, Weaveworks, Mesosphere, and [other leading infrastructure
companies](https://cncf.io/about/members).
Today, we are excited to announce that the CNCF's Technical Oversight Committee
[voted unanimously](http://lists.cncf.io/pipermail/cncf-toc/2016-May/000198.html) to
accept Prometheus as a second hosted project after Kubernetes! You can find
more information about these plans in the
[official press release by the CNCF](https://cncf.io/news/news/2016/05/cloud-native-computing-foundation-accepts-prometheus-second-hosted-project).
By joining the CNCF, we hope to establish a clear and sustainable project
governance model, as well as benefit from the resources, infrastructure, and
advice that the independent foundation provides to its members.
We think that the CNCF and Prometheus are an ideal thematic match, as both
focus on bringing about a modern vision of the cloud.
In the following months, we will be working with the CNCF on finalizing the
project governance structure. We will report back when there are more details
to announce.
<%= atom_feed :title => 'Prometheus Blog', :author_name => '© Prometheus Authors 2015', <%= atom_feed :title => 'Prometheus Blog', :author_name => '© Prometheus Authors 2015',
:author_uri => 'http://prometheus.io/blog/', :limit => 10, :author_uri => 'https://prometheus.io/blog/', :limit => 10,
:logo => 'http://prometheus.io/assets/prometheus_logo.png', :logo => 'https://prometheus.io/assets/prometheus_logo.png',
:icon => 'http://prometheus.io/assets/favicons/favicon.ico' %> :icon => 'https://prometheus.io/assets/favicons/favicon.ico' %>
--- ---
title: Blog title: Blog
--- ---
<div class="col-md-9"> <div class="row">
<% sorted_articles.each do |post| %> <div class="col-md-9">
<div class="blog doc-content"> <% sorted_articles.each do |post| %>
<h1><%= link_to post[:title], post.path %></h1> <div class="blog doc-content">
<aside>Posted at: <%= get_pretty_date(post) %> by <%= post[:author_name]%></aside> <h1><%= link_to post[:title], post.path %></h1>
<article class="doc-content"> <aside>Posted at: <%= get_pretty_date(post) %> by <%= post[:author_name]%></aside>
<%= get_post_start(post) %> <article class="doc-content">
</article> <%= get_post_start(post) %>
</div> </article>
<% end %> </div>
<% end %>
</div>
<%= render 'blog_sidebar' %>
</div> </div>
<%= render 'blog_sidebar' %>
--- ---
title: Community title: Community
--- ---
<div class="col-md-8 col-md-offset-2 doc-content"> <div class="row">
<h1>Community</h1> <div class="col-md-12 doc-content">
<p> <h1>Community</h1>
Prometheus is developed in the open. Here are some of the channels we use <p>
to communicate and contribute: Prometheus is developed in the open. Here are some of the channels we use
</p> to communicate and contribute:
<p> </p>
<strong>Mailing list:</strong> <p>
<a href="https://groups.google.com/forum/#!forum/prometheus-developers">prometheus-developers</a> Google Group <strong>IRC:</strong>
</p> <a href="https://webchat.freenode.net/?channels=#prometheus"><code>#prometheus</code></a> on <a href="http://freenode.net/">irc.freenode.net</a>
<p> </p>
<strong>Twitter:</strong> <p>
<a href="https://twitter.com/PrometheusIO">PrometheusIO</a> <strong>Mailing list:</strong>
</p> <a href="https://groups.google.com/forum/#!forum/prometheus-developers">prometheus-developers</a> Google Group
<p> </p>
<strong>IRC:</strong> <code>#prometheus</code> on <a href="http://freenode.net/">irc.freenode.net</a> <p>
</p> <strong>Twitter:</strong>
<p> <a href="https://twitter.com/PrometheusIO">@PrometheusIO</a>
</p>
<p>
<strong>Issue tracker:</strong> We use the GitHub issue tracker for the various <a href="http://github.com/prometheus">Prometheus repositories</a> <strong>Issue tracker:</strong> We use the GitHub issue tracker for the various <a href="http://github.com/prometheus">Prometheus repositories</a>
</p> </p>
<h1>Contributing</h1> <h1>Contributing</h1>
<p> <p>
We welcome community contributions! Please see the We welcome community contributions! Please see the
<code>CONTRIBUTING.md</code> file in the respective Prometheus repository <code>CONTRIBUTING.md</code> file in the respective Prometheus repository
for instructions on how to submit changes. If you are planning on making for instructions on how to submit changes. If you are planning on making
more elaborate or controversial changes, please discuss them on the mailing more elaborate or controversial changes, please discuss them on the mailing
list before sending a pull request. list before sending a pull request.
</p> </p>
<h1>Commercial support</h1> <h1>Commercial support</h1>
<p> <p>
This is a list of third-party companies which provide support or This is a list of third-party companies which provide support or
consulting services for Prometheus. Prometheus is an independent open source consulting services for Prometheus. Prometheus is an independent open source
project which does not endorse any company. project which does not endorse any company.
</p> </p>
<ul> <ul>
<li><a href="http://www.robustperception.io">Robust Perception</a></li> <li><a href="http://www.robustperception.io">Robust Perception</a></li>
</ul> </ul>
<h1>Acknowledgements</h1> <h1>Acknowledgements</h1>
<p> <p>
Prometheus was initially started by Prometheus was initially started by
<a href="http://www.matttproud.com">Matt T. Proud</a> and <a href="http://www.matttproud.com">Matt T. Proud</a> and
<a href="http://juliusv.com">Julius Volz</a>. The majority of its <a href="http://juliusv.com">Julius Volz</a>. The majority of its
development has been sponsored by <a href="https://soundcloud.com">SoundCloud</a>. development has been sponsored by <a href="https://soundcloud.com">SoundCloud</a>.
</p> </p>
<p> <p>
We would also like to acknowledge early contributions by engineers from We would also like to acknowledge early contributions by engineers from
<a href="https://www.docker.com/">Docker</a> and <a href="http://www.boxever.com/">Boxever</a>. <a href="https://www.docker.com/">Docker</a> and <a href="http://www.boxever.com/">Boxever</a>.
</p> </p>
<p> <p>
Special thanks to <a href="https://digitalocean.com/">DigitalOcean</a> for providing hosting resources. Special thanks to <a href="https://digitalocean.com/">DigitalOcean</a> for providing hosting resources.
</p> </p>
<p> <p>
The Prometheus logo was contributed by <a href="http://www.makingstuffmove.com/">Robin Greenwood</a>. The Prometheus logo was contributed by <a href="http://www.makingstuffmove.com/">Robin Greenwood</a>.
</p> </p>
</div>
</div> </div>
/* Move down content because we have a fixed navbar that is 50px tall */
body { body {
font-family: 'Open Sans', serif; font-family: 'Open Sans', 'Helvetica Neue', Helvetica, sans-serif;
} }
.navbar { .navbar {
margin-bottom: 0; margin-bottom: 0;
min-height: 60px;
} }
.navbar-brand { .navbar-brand {
font-size: 20px; margin-top: 4px;
font-family: Lato, sans-serif;
font-size: 26px;
font-weight: 300;
color: #aaa; color: #aaa;
} }
.navbar-brand img { .navbar-brand img {
height: 27px; height: 30px;
display: inline; display: inline;
margin-top: -4px; margin-top: -5px;
margin-right: 3px; margin-right: 3px;
} }
.navbar-toggle {
margin-top: 22px;
}
.navbar-jumbotron .navbar {
min-height: 83px;
}
.navbar-jumbotron .navbar-brand {
margin-top: 14px;
}
.navbar-jumbotron .main-nav {
margin-top: 17px;
}
.main-nav {
margin-top: 4px;
letter-spacing: 1px;
font-family: 'Lato', sans-serif;
font-size: 16px;
text-transform: uppercase;
}
.jumbotron { .jumbotron {
background-color: #e6522c; background-color: #e6522c;
background-image: url("/assets/jumbotron-background.png"); background-image: url("/assets/jumbotron-background.png");
text-align: center; text-align: center;
font-family: Lato, sans-serif;
text-shadow: rgba(0, 0, 0, 0.2) 0px 2px 0px;
margin-bottom: 50px;
padding: 40px 0; padding: 40px 0;
} }
.jumbotron h1 { .jumbotron h1 {
margin-top: 30px;
font-size: 52px;
font-weight: 300;
color: #fff; color: #fff;
} }
.jumbotron p { .jumbotron .subtitle {
color: rgba(255,255,255,0.7); font-weight: 300;
font-size: 32px;
color: rgba(255,255,255,0.8);
margin-bottom: 20px;
} }
.jumbotron a.btn { .jumbotron a.btn {
border: none; border: none;
background-color: rgba(0,0,0,0.15); background-color: rgba(0,0,0,0.15);
color: #fff; color: #fff;
padding: 20px 25px 20px 25px;
margin: 15px 10px 0 10px;
text-transform: uppercase;
} }
.jumbotron a.btn:hover { .jumbotron a.btn:hover {
...@@ -45,32 +84,126 @@ body { ...@@ -45,32 +84,126 @@ body {
color: #fff; color: #fff;
} }
.jumbotron img { .feature-item {
height: 100px; font-family: 'Lato', sans-serif;
padding-bottom: 15px; font-weight: 300;
cursor: pointer;
} }
.jumbotron .subtitle { .feature-item:hover {
margin-bottom: 20px; background-color: #fad9d1;
border-radius: 3px;
} }
.main .fa { .feature-item a {
color: #e6522c; text-decoration: none;
color: none;
} }
.main h2 { .feature-item h2 {
color: #333;
font-weight: 300;
font-size: 25px; font-size: 25px;
white-space: nowrap; white-space: nowrap;
} }
.feature-item .fa {
margin-right: 5px;
color: #e6522c;
}
.feature-item p {
font-size: 16px;
line-height: 1.8em;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
color: #111;
}
.top-hr {
margin-top: 30px;
}
.quote {
margin: 25px 0 25px 0;
font-family: 'Lato', sans-serif;
font-weight: 300;
text-align: center;
}
.quote-text {
width: 60%;
margin: auto;
font-size: 22px;
font-style: italic;
color: #be3511;
}
.quote-source {
font-size: 16px;
margin-top: 15px;
margin-left: 50px;
}
.open-source {
margin: 25px 0 20px 0;
font-family: 'Lato', sans-serif;
font-weight: 300;
text-align: center;
}
.open-source h1 {
margin-top: 0;
font-weight: 300;
}
.open-source p {
width: 50%;
margin: auto;
font-size: 20px;
}
.open-source .github-stars {
margin-top: 20px;
width: 160px;
height: 30px;
}
.trusted-by {
font-family: 'Lato', sans-serif;
font-size: 20px;
font-weight: 300;
padding-bottom: 10px;
}
.logos {
display: flex;
align-items: center;
justify-content: center;
height: 64px;
}
footer { footer {
font-size: 12px; font-size: 12px;
color: #333; color: #333;
} }
a.sc-logo img { /* Downloads related styles. */
margin-left: 3px; .download h2 {
margin-bottom: 3px; margin-top: 2em;
}
.download-selection {
clear: both;
}
.download-selection .btn-group {
padding-right: 1em;
}
.downloads .checksum {
color: #aaa;
font-style: italic;
} }
/* Docs-related styles. */ /* Docs-related styles. */
...@@ -80,10 +213,11 @@ a.sc-logo img { ...@@ -80,10 +213,11 @@ a.sc-logo img {
.side-nav { .side-nav {
margin-top: 20px; margin-top: 20px;
padding: 5px 20px 20px 20px; padding: 5px 20px 20px 7px;
} }
.side-nav .fa { .side-nav .fa {
width: 20px;
color: #555; color: #555;
} }
...@@ -96,14 +230,18 @@ a.sc-logo img { ...@@ -96,14 +230,18 @@ a.sc-logo img {
} }
.side-nav .nav-header { .side-nav .nav-header {
color: #e6522c;
text-transform: uppercase;
font-size: 16px;
display: block; display: block;
margin: 20px auto 15px auto; margin: 20px auto 15px auto;
font-size: 16px;
} }
.side-nav .active { .side-nav .nav-header a, .side-nav .nav-header {
color: #e6522c;
text-transform: uppercase;
text-decoration: none;
}
.side-nav ul.active li.active {
border-left: 3px solid #e6522c; border-left: 3px solid #e6522c;
margin-left: -2px; margin-left: -2px;
font-weight: bold; font-weight: bold;
...@@ -114,19 +252,17 @@ a.sc-logo img { ...@@ -114,19 +252,17 @@ a.sc-logo img {
} }
.doc-content { .doc-content {
margin-top: 25px; font-size: 16px;
}
.doc-content {
font-size: 18px;
} }
.doc-content p, .doc-content.ul, .doc-content .alert { .doc-content p, .doc-content.ul, .doc-content .alert {
margin: 25px 0 25px 0; margin: 15px 0 15px 0;
line-height: 1.5;
} }
.doc-content > h1 { .doc-content > h1 {
color: #e6522c; color: #e6522c;
font-size: 30px;
text-transform: uppercase; text-transform: uppercase;
margin: 40px 0 10px 0; margin: 40px 0 10px 0;
} }
...@@ -141,7 +277,7 @@ a.sc-logo img { ...@@ -141,7 +277,7 @@ a.sc-logo img {
.doc-content > h2 { .doc-content > h2 {
color: #e6522c; color: #e6522c;
font-size: 25px; font-size: 22px;
} }
.doc-content > h2 code { .doc-content > h2 code {
...@@ -150,7 +286,8 @@ a.sc-logo img { ...@@ -150,7 +286,8 @@ a.sc-logo img {
} }
.doc-content > h3 { .doc-content > h3 {
font-size: 22px; font-size: 20px;
font-weight: bold;
} }
.doc-content > h4 { .doc-content > h4 {
...@@ -203,7 +340,7 @@ a.sc-logo img { ...@@ -203,7 +340,7 @@ a.sc-logo img {
.toc { .toc {
padding: 1em; padding: 1em;
background-color: #eee; background-color: #f5f5f5;
} }
.toc-right { .toc-right {
...@@ -223,33 +360,21 @@ a.sc-logo img { ...@@ -223,33 +360,21 @@ a.sc-logo img {
} }
pre { pre {
border: 1px solid #ddd;
border-left: 4px solid #e6522c;
border-radius: 0;
font-family: "Courier New", Monaco, Menlo, Consolas, monospace; font-family: "Courier New", Monaco, Menlo, Consolas, monospace;
background-color: #f9f2f4; background-color: #f5f5f5;
border: 1px solid #333;
color: #333; color: #333;
padding: 15px;
} }
code { pre code {
color: #333; white-space: pre;
} }
.main p { code {
font-size: 16px; color: #333;
line-height: 1.8em;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
}
.main .desc {
margin-bottom: 20px;
}
.main-nav {
letter-spacing: 1px;
font-family: Avenir, 'Open Sans', serif;
font-size: 12px;
font-weight: 500;
text-transform: uppercase;
} }
aside { aside {
...@@ -261,14 +386,3 @@ aside { ...@@ -261,14 +386,3 @@ aside {
article { article {
margin: 10px 0 60px 0; margin: 10px 0 60px 0;
} }
.read-more > a {
color: #888;
}
.logos {
display: flex;
align-items: center;
justify-content: center;
height: 64px;
}
.routing-table {
font: 12px sans-serif;
}
.node circle {
stroke: #e6522c;
stroke-width: 1.5px;
}
.node text {
font: 10px sans-serif;
}
.link {
fill: none;
stroke: #ccc;
stroke-width: 1.5px;
}
.form-control.label-input {
padding: 2px;
width: 450px;
}
textarea {
border-color: #ddd;
height: 450px;
padding: 2px 0;
width: 100%;
font-family: monospace;
}
.block {
display: block;
}
.inline-block {
display: inline-block;
}
...@@ -6,196 +6,64 @@ nav_icon: sliders ...@@ -6,196 +6,64 @@ nav_icon: sliders
# Alertmanager # Alertmanager
The Alertmanager receives alerts from one or more Prometheus servers. The [Alertmanager](https://github.com/prometheus/alertmanager) handles alerts
It manages those alerts, including silencing, inhibition, aggregation and sent by client applications such as the Prometheus server.
sending out notifications via methods such as email, PagerDuty and HipChat. It takes care of deduplicating, grouping, and routing
them to the correct receiver integration such as email, PagerDuty, or OpsGenie.
**WARNING: The Alertmanager is still considered to be very experimental.** It also takes care of silencing and inhibition of alerts.
## Configuration The following describes the core concepts the Alertmanager implements. Consult
the [configuration documentation](../configuration) to learn how to use them
The Alertmanager is configured via command-line flags and a configuration file. in more detail.
The configuration file is an ASCII protocol buffer. To specify which ## Grouping
configuration file to load, use the `-config.file` flag.
Grouping categorizes alerts of similar nature into a single notification. This
``` is especially useful during larger outages when many systems fail at once and
./alertmanager -config.file alertmanager.conf hundreds to thousands of alerts may be firing simultaneously.
```
**Example:** Dozens or hundreds of instances of a service are running in your
To send all alerts to email, set the `-notification.smtp.smarthost` flag to cluster when a network partition occurs. Half of your service instances
an SMTP smarthost (such as a [Postfix null client](http://www.postfix.org/STANDARD_CONFIGURATION_README.html#null_client)) can no longer reach the database.
and use the following configuration: Alerting rules in Prometheus were configured to send an alert for each service
instance if it cannot communicate with the database. As a result hundreds of
``` alerts are sent to Alertmanager.
notification_config {
name: "alertmanager_test" As a user one only wants to get a single page while still being able to see
email_config { exactly which service instances were affected. Thus one can configure
email: "test@example.org" Alertmanager to group alerts by their cluster and alertname so it sends a
} single compact notification.
}
Grouping of alerts, timing for the grouped notifications, and the receivers
aggregation_rule { of those notificiations are configured by a routing tree in the configuration
notification_config_name: "alertmanager_test" file.
}
``` ## Inhibition
### Filtering Inhibition is a concept of suppressing notifications for certain alerts if
certain other alerts are already firing.
An aggregation rule can be made to apply to only some alerts using a filter.
**Example:** An alert is firing that informs that an entire cluster is not
For example, to apply a rule only to alerts with a `severity` label with the value `page`: reachable. Alertmanager can be configured to mute all other alerts concerning
this cluster if that particular alert is firing.
``` This prevents notifications for hundreds or thousands of firing alerts that
aggregation_rule { are unrelated to the actual issue.
filter {
name_re: "severity" Inhibitions are configured through the Alertmanager's configuration file.
value_re: "page"
} ## Silences
notification_config_name: "alertmanager_test"
} Silences are a straightforward way to simply mute alerts for a given time.
``` A silence is configured based on matchers, just like the routing tree. Incoming
alerts are checked whether they match all the equality or regular expression
Multiple filters can be provided. matchers of an active silence.
If they do, no notifications will be send out for that alert.
### Repeat Rate
By default an aggregation rule will repeat notifications every 2 hours. This can be changed using `repeat_rate_seconds`. Silences are configured in the web interface of the Alertmanager.
```
aggregation_rule { ## Client behavior
repeat_rate_seconds: 3600
notification_config_name: "alertmanager_test" The Alertmanager has [special requirements](../clients) for behavior of its
} client. Those are only relevant for advanced use cases where Prometheus
``` is not used to send alerts.
### Notifications
The Alertmanager has support for a growing number of notification methods.
Multiple notifications methods of one or more types can be used in the same
notification config.
The `send_resolved` field can be used with all notification methods to enable or disable
sending notifications that an alert has stopped firing.
#### Email
The `-notification.smtp.smarthost` flag must be set to an SMTP smarthost.
The `-notification.smtp.sender` flag may be set to change the default From address.
```
notification_config {
name: "alertmanager_email"
email_config {
email: "test@example.org"
}
email_config {
email: "foo@example.org"
}
}
```
Plain and CRAM-MD5 SMTP authentication methods are supported.
The `SMTP_AUTH_USERNAME`, `SMTP_AUTH_SECRET`, `SMTP_AUTH_PASSWORD` and
`SMTP_AUTH_IDENTITY` environment variables are used to configure them.
#### PagerDuty
The Alertmanager integrates as a [Generic API
Service](https://support.pagerduty.com/hc/en-us/articles/202830340-Creating-a-Generic-API-Service)
with PagerDuty.
```
notification_config {
name: "alertmanager_pagerduty"
pagerduty_config {
service_key: "supersecretapikey"
}
}
```
#### Pushover
```
notification_config {
name: "alertmanager_pushover"
pushover_config {
token: "mypushovertoken"
user_key: "mypushoverkey"
}
}
```
#### HipChat
```
notification_config {
name: "alertmanager_hipchat"
hipchat_config {
auth_token: "hipchatauthtoken"
room_id: 123456
}
}
```
#### Slack
```
notification_config {
name: "alertmanager_slack"
slack_config {
webhook_url: "webhookurl"
channel: "channelname"
}
}
```
#### Flowdock
```
notification_config {
name: "alertmanager_flowdock"
flowdock_config {
api_token: "4c7234902348234902384234234cdb59"
from_address: "aliaswithgravatar@example.com"
tag: "monitoring"
}
}
```
#### Generic Webhook
The Alertmanager supports sending notifications as JSON to arbitrary
URLs. This could be used to perform automated actions when an
alert fires or integrate with a system that the Alertmanager does not support.
```
notification_config {
name: "alertmanager_webhook"
webhook_config {
url: "http://example.org/my/hook"
}
}
```
An example of JSON message it sends is below.
```json
{
"version": "1",
"status": "firing",
"alert": [
{
"summary": "summary",
"description": "description",
"labels": {
"alertname": "TestAlert"
},
"payload": {
"activeSince": "2015-06-01T12:55:47.356+01:00",
"alertingRule": "ALERT TestAlert IF absent(metric_name) FOR 0y WITH ",
"generatorURL": "http://localhost:9090/graph#%5B%7B%22expr%22%3A%22absent%28metric_name%29%22%2C%22tab%22%3A0%7D%5D",
"value": "1"
}
}
]
}
```
This format is subject to change.
---
title: Clients
sort_rank: 6
nav_icon: sliders
---
# Sending alerts
__**Disclaimer**: Prometheus automatically takes care of sending alerts
generated by its configured [alerting rules](../rules). It is highly
recommended to configure alerting rules in Prometheus based on time series
data rather than implementing a direct client.__
The Alertmanager listens for alerts on an API endpoint at `/api/v1/alerts`.
Clients are expected to continously re-send alerts as long as they are still
active (usually on the order of 30 seconds to 3 minutes).
Clients can push a list of alerts to that endpoint via a POST request of
the following format:
```
[
{
"labels": {
"<labelname>": "<labelvalue>",
...
},
"annotations": {
"<labelname>": "<labelvalue>",
},
"startsAt": "<rfc3339>",
"endsAt": "<rfc3339>"
"generatorURL": "<generator_url>"
},
...
]
```
The labels are used to identify identical instances of an alert and to perform
deduplication. The annotations are always set to those received most recently
and are not identifying an alert.
Both timestamps are optional. If `startsAt` is omitted, the current time
is assigned by the Alertmanager. `endsAt` is only set if the end time of an
alert is known. Otherwise it will be set to a configurable timeout period from
the time since the alert was last received.
The `generatorURL` field is a unique back-link which identifies the causing
entity of this alert in the client.
Alertmanager also supports a legacy endpoint on `/api/alerts` which is
compatible with Prometheus versions 0.16.2 and lower.
This diff is collapsed.
...@@ -7,14 +7,12 @@ nav_icon: sliders ...@@ -7,14 +7,12 @@ nav_icon: sliders
# Alerting Overview # Alerting Overview
Alerting with Prometheus is separated into two parts. Alerting rules in Alerting with Prometheus is separated into two parts. Alerting rules in
Prometheus servers send alerts to an Alertmanager. The Alertmanager then Prometheus servers send alerts to an Alertmanager. The [Alertmanager](../alertmanager)
manages those alerts, including silencing, inhibition, aggregation and sending then manages those alerts, including silencing, inhibition, aggregation and
out notifications via methods such as email, PagerDuty and HipChat. sending out notifications via methods such as email, PagerDuty and HipChat.
**WARNING: The Alertmanager is still considered to be very experimental.**
The main steps to setting up alerting and notifications are: The main steps to setting up alerting and notifications are:
* Setup and configure the Alertmanager * Setup and [configure](../configuration) the Alertmanager
* Configure Prometheus to talk to the Alertmanager with the `-alertmanager.url` flag * Configure Prometheus to talk to the Alertmanager with the `-alertmanager.url` flag
* Create alerting rules in Prometheus * Create [alerting rules](../rules) in Prometheus
--- ---
title: Alerting rules title: Alerting rules
sort_rank: 3 sort_rank: 5
--- ---
# Alerting rules # Alerting rules
...@@ -15,32 +15,60 @@ Alerting rules are configured in Prometheus in the same way as [recording ...@@ -15,32 +15,60 @@ Alerting rules are configured in Prometheus in the same way as [recording
rules](../../querying/rules). rules](../../querying/rules).
### Defining alerting rules ### Defining alerting rules
Alerting rules are defined in the following syntax: Alerting rules are defined in the following syntax:
ALERT <alert name> ALERT <alert name>
IF <expression> IF <expression>
[FOR <duration>] [ FOR <duration> ]
[WITH <label set>] [ LABELS <label set> ]
SUMMARY "<summary template>" [ ANNOTATIONS <label set> ]
DESCRIPTION "<description template>"
The optional `FOR` clause causes Prometheus to wait for a certain duration The optional `FOR` clause causes Prometheus to wait for a certain duration
between first encountering a new expression output vector element (like an between first encountering a new expression output vector element (like an
instance with a high HTTP error rate) and counting an alert as firing for this instance with a high HTTP error rate) and counting an alert as firing for this
element. Elements that are active, but not firing yet, are in pending state. element. Elements that are active, but not firing yet, are in pending state.
The `WITH` clause allows specifying a set of additional labels to be attached The `LABELS` clause allows specifying a set of additional labels to be attached
to the alert. Any existing conflicting labels will be overwritten. to the alert. Any existing conflicting labels will be overwritten. The label
values can be templated.
The `ANNOTATIONS` clause specifies another set of labels that are not
identifying for an alert instance. They are used to store longer additional
information such as alert descriptions or runbook links. The annotation values
can be templated.
#### Prometheus v0.16.2 and earlier
In previous Prometheus versions the rule syntax is as follows:
ALERT <alert name>
IF <expression>
[ FOR <duration> ]
[ WITH <label set> ]
SUMMARY <string>
DESCRIPTION <string>
[ RUNBOOK <string> ]
Annotations are not free form but fixed to a summary, a description, and a
runbook field. Labels are attached using the `WITH` rather than the `LABELS`
clause.
Label values in the `WITH` clause cannot be templated.
NOTE: **Note:** Old alerting rules can be converted to the new syntax using
[this script](https://gist.github.com/xbglowx/d798da98ff9937e33862b285d0121bde#gistcomment-1752515).
#### Templating
The `SUMMARY` should be a short, human-readable summary of the alert (suitable Label and annotation values can be templated using [console templates](../../visualization/consoles).
for e.g. an email subject line), while the `DESCRIPTION` clause should provide The `$labels` variable holds the label key/value pairs of an alert instance
a longer description. Both string fields allow the inclusion of template and `$value` holds the evaluated value of an alert instance.
variables derived from the firing vector elements of the alert:
# To insert a firing element's label values: # To insert a firing element's label values:
{{$labels.<labelname>}} {{ $labels.<labelname> }}
# To insert the numeric expression value of the firing element: # To insert the numeric expression value of the firing element:
{{$value}} {{ $value }}
Examples: Examples:
...@@ -48,20 +76,23 @@ Examples: ...@@ -48,20 +76,23 @@ Examples:
ALERT InstanceDown ALERT InstanceDown
IF up == 0 IF up == 0
FOR 5m FOR 5m
WITH { LABELS { severity = "page" }
severity="page" ANNOTATIONS {
summary = "Instance {{ $labels.instance }} down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.",
} }
SUMMARY "Instance {{$labels.instance}} down"
DESCRIPTION "{{$labels.instance}} of job {{$labels.job}} has been down for more than 5 minutes."
# Alert for any instance that have a median request latency >1s. # Alert for any instance that have a median request latency >1s.
ALERT ApiHighRequestLatency ALERT APIHighRequestLatency
IF api_http_request_latencies_ms{quantile="0.5"} > 1000 IF api_http_request_latencies_second{quantile="0.5"} > 1
FOR 1m FOR 1m
SUMMARY "High request latency on {{$labels.instance}}" ANNOTATIONS {
DESCRIPTION "{{$labels.instance}} has a median request latency above 1s (current value: {{$value}})" summary = "High request latency on {{ $labels.instance }}",
description = "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)",
}
### Inspecting alerts during runtime ### Inspecting alerts during runtime
To manually inspect which alerts are active (pending or firing), navigate to To manually inspect which alerts are active (pending or firing), navigate to
the "Alerts" tab of your Prometheus instance. This will show you the exact the "Alerts" tab of your Prometheus instance. This will show you the exact
label sets for which each defined alert is currently active. label sets for which each defined alert is currently active.
...@@ -74,6 +105,7 @@ transitions from active to inactive state. Once inactive, the time series does ...@@ -74,6 +105,7 @@ transitions from active to inactive state. Once inactive, the time series does
not get further updates. not get further updates.
### Sending alert notifications ### Sending alert notifications
Prometheus's alerting rules are good at figuring what is broken *right now*, Prometheus's alerting rules are good at figuring what is broken *right now*,
but they are not a fully-fledged notification solution. Another layer is needed but they are not a fully-fledged notification solution. Another layer is needed
to add summarization, notification rate limiting, silencing and alert to add summarization, notification rate limiting, silencing and alert
......
...@@ -27,7 +27,7 @@ scraped time series which serve to identify the scraped target: ...@@ -27,7 +27,7 @@ scraped time series which serve to identify the scraped target:
If either of these labels are already present in the scraped data, the behavior If either of these labels are already present in the scraped data, the behavior
depends on the `honor_labels` configuration option. See the depends on the `honor_labels` configuration option. See the
[scrape configuration documentation](/docs/operating/configuration/#scrape-configurations-scrape_config) [scrape configuration documentation](/docs/operating/configuration/#%3Cscrape_config%3E)
for more information. for more information.
For each instance scrape, Prometheus stores a sample in the following For each instance scrape, Prometheus stores a sample in the following
......
...@@ -22,8 +22,7 @@ currently running goroutines. Use gauges for this use case. ...@@ -22,8 +22,7 @@ currently running goroutines. Use gauges for this use case.
Client library usage documentation for counters: Client library usage documentation for counters:
* [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Counter) * [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Counter)
* [Java](https://github.com/prometheus/client_java/blob/master/client/src/main/java/io/prometheus/client/metrics/Counter.java) * [Java](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Counter.java)
* [Java (simple client)](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Counter.java)
* [Ruby](https://github.com/prometheus/client_ruby#counter) * [Ruby](https://github.com/prometheus/client_ruby#counter)
* [Python](https://github.com/prometheus/client_python#counter) * [Python](https://github.com/prometheus/client_python#counter)
...@@ -39,8 +38,7 @@ running goroutines. ...@@ -39,8 +38,7 @@ running goroutines.
Client library usage documentation for gauges: Client library usage documentation for gauges:
* [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Gauge) * [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Gauge)
* [Java](https://github.com/prometheus/client_java/blob/master/client/src/main/java/io/prometheus/client/metrics/Gauge.java) * [Java](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Gauge.java)
* [Java (simple client)](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Gauge.java)
* [Ruby](https://github.com/prometheus/client_ruby#gauge) * [Ruby](https://github.com/prometheus/client_ruby#gauge)
* [Python](https://github.com/prometheus/client_python#gauge) * [Python](https://github.com/prometheus/client_python#gauge)
...@@ -68,7 +66,7 @@ and differences to [summaries](#summary). ...@@ -68,7 +66,7 @@ and differences to [summaries](#summary).
Client library usage documentation for histograms: Client library usage documentation for histograms:
* [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Histogram) * [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Histogram)
* [Java](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Histogram.java) (histograms are only supported by the simple client but not by the legacy client) * [Java](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Histogram.java)
* [Python](https://github.com/prometheus/client_python#histogram) * [Python](https://github.com/prometheus/client_python#histogram)
## Summary ## Summary
...@@ -92,7 +90,6 @@ to [histograms](#histogram). ...@@ -92,7 +90,6 @@ to [histograms](#histogram).
Client library usage documentation for summaries: Client library usage documentation for summaries:
* [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Summary) * [Go](http://godoc.org/github.com/prometheus/client_golang/prometheus#Summary)
* [Java](https://github.com/prometheus/client_java/blob/master/client/src/main/java/io/prometheus/client/metrics/Summary.java) * [Java](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Summary.java)
* [Java (simple client)](https://github.com/prometheus/client_java/blob/master/simpleclient/src/main/java/io/prometheus/client/Summary.java)
* [Ruby](https://github.com/prometheus/client_ruby#summary) * [Ruby](https://github.com/prometheus/client_ruby#summary)
* [Python](https://github.com/prometheus/client_python#summary) * [Python](https://github.com/prometheus/client_python#summary)
...@@ -21,9 +21,12 @@ HTTP endpoint on your application’s instance: ...@@ -21,9 +21,12 @@ HTTP endpoint on your application’s instance:
Unofficial third-party client libraries: Unofficial third-party client libraries:
* [Bash](https://github.com/aecolley/client_bash) * [Bash](https://github.com/aecolley/client_bash)
* [Common Lisp](https://github.com/deadtrickster/prometheus.cl)
* [Erlang](https://github.com/deadtrickster/prometheus.erl)
* [Haskell](https://github.com/fimad/prometheus-haskell) * [Haskell](https://github.com/fimad/prometheus-haskell)
* [Node.js](https://github.com/siimon/prom-client) * [Lua](https://github.com/knyar/nginx-lua-prometheus) for Nginx
* [.NET / C#](https://github.com/andrasm/prometheus-net) * [.NET / C#](https://github.com/andrasm/prometheus-net)
* [Node.js](https://github.com/siimon/prom-client)
When Prometheus scrapes your instance's HTTP endpoint, the client library When Prometheus scrapes your instance's HTTP endpoint, the client library
sends the current state of all tracked metrics to the server. sends the current state of all tracked metrics to the server.
...@@ -33,7 +36,7 @@ dependencies, you may also implement one of the supported [exposition ...@@ -33,7 +36,7 @@ dependencies, you may also implement one of the supported [exposition
formats](/docs/instrumenting/exposition_formats/) yourself to expose metrics. formats](/docs/instrumenting/exposition_formats/) yourself to expose metrics.
When implementing a new Prometheus client library, please follow the When implementing a new Prometheus client library, please follow the
[Prometheus Client Library Guidelines](https://docs.google.com/document/d/1zHwWVigeAITbaAp6BR4uCByRJH7rtTv4ve6SsoEXJ_Q/edit?usp=sharing). [guidelines on writing client libraries](/docs/instrumenting/writing_clientlibs).
Note that this document is still a work in progress. Please also consider Note that this document is still a work in progress. Please also consider
consulting the [development mailing list](https://groups.google.com/forum/#!forum/prometheus-developers). consulting the [development mailing list](https://groups.google.com/forum/#!forum/prometheus-developers).
We are happy to give advice on how to make your library as useful and We are happy to give advice on how to make your library as useful and
......
--- ---
title: Exporters and third-party integrations title: Exporters and integrations
sort_rank: 3 sort_rank: 4
--- ---
# Exporters and third-party integrations # Exporters and integrations
There are a number of libraries and servers which help in exporting existing There are a number of libraries and servers which help in exporting existing
metrics from third-party systems as Prometheus metrics. This is useful for metrics from third-party systems as Prometheus metrics. This is useful for
...@@ -22,6 +22,7 @@ These exporters are maintained as part of the official ...@@ -22,6 +22,7 @@ These exporters are maintained as part of the official
* [Consul exporter](https://github.com/prometheus/consul_exporter) * [Consul exporter](https://github.com/prometheus/consul_exporter)
* [Graphite exporter](https://github.com/prometheus/graphite_exporter) * [Graphite exporter](https://github.com/prometheus/graphite_exporter)
* [HAProxy exporter](https://github.com/prometheus/haproxy_exporter) * [HAProxy exporter](https://github.com/prometheus/haproxy_exporter)
* [InfluxDB exporter](https://github.com/prometheus/influxdb_exporter)
* [JMX exporter](https://github.com/prometheus/jmx_exporter) * [JMX exporter](https://github.com/prometheus/jmx_exporter)
* [Mesos task exporter](https://github.com/prometheus/mesos_exporter) * [Mesos task exporter](https://github.com/prometheus/mesos_exporter)
* [MySQL server exporter](https://github.com/prometheus/mysqld_exporter) * [MySQL server exporter](https://github.com/prometheus/mysqld_exporter)
...@@ -39,35 +40,48 @@ and maintained. We encourage the creation of more exporters but cannot ...@@ -39,35 +40,48 @@ and maintained. We encourage the creation of more exporters but cannot
vet all of them for best practices. Commonly, those exporters are vet all of them for best practices. Commonly, those exporters are
hosted outside of the Prometheus GitHub organization. hosted outside of the Prometheus GitHub organization.
* [Aerospike exporter](https://github.com/alicebob/asprom)
* [Apache exporter](https://github.com/neezgee/apache_exporter) * [Apache exporter](https://github.com/neezgee/apache_exporter)
* [BIG-IP exporter](https://github.com/ExpressenAB/bigip_exporter)
* [BIND exporter](https://github.com/digitalocean/bind_exporter) * [BIND exporter](https://github.com/digitalocean/bind_exporter)
* [Ceph exporter](https://github.com/digitalocean/ceph_exporter) * [Ceph exporter](https://github.com/digitalocean/ceph_exporter)
* [CouchDB exporter](https://github.com/gesellix/couchdb-exporter) * [CouchDB exporter](https://github.com/gesellix/couchdb-exporter)
* [Django exporter](https://github.com/korfuri/django-prometheus) * [Django exporter](https://github.com/korfuri/django-prometheus)
* [Google's mtail log data extractor](https://github.com/google/mtail) * [Google's mtail log data extractor](https://github.com/google/mtail)
* [Heka exporter](https://github.com/docker-infra/heka_exporter) * [Heka dashboard exporter](https://github.com/docker-infra/heka_exporter)
* [HTTP(s)/TCP/ICMP blackbox prober](https://github.com/discordianfish/blackbox_prober) * [Heka exporter](https://github.com/imgix/heka_exporter)
* [IoT Edison exporter](https://github.com/roman-vynar/edison_exporter)
* [Jenkins exporter](https://github.com/RobustPerception/python_examples/tree/master/jenkins_exporter) * [Jenkins exporter](https://github.com/RobustPerception/python_examples/tree/master/jenkins_exporter)
* [knxd exporter](https://github.com/RichiH/knxd_exporter)
* [Memcached exporter](https://github.com/Snapbug/memcache_exporter) * [Memcached exporter](https://github.com/Snapbug/memcache_exporter)
* [Meteor JS web framework exporter](https://atmospherejs.com/sevki/prometheus-exporter) * [Meteor JS web framework exporter](https://atmospherejs.com/sevki/prometheus-exporter)
* [Minecraft exporter module](https://github.com/Baughn/PrometheusIntegration) * [Minecraft exporter module](https://github.com/Baughn/PrometheusIntegration)
* [Mirth Connect exporter](https://github.com/vynca/mirth_exporter)
* [MongoDB exporter](https://github.com/dcu/mongodb_exporter) * [MongoDB exporter](https://github.com/dcu/mongodb_exporter)
* [Munin exporter](https://github.com/pvdh/munin_exporter) * [Munin exporter](https://github.com/pvdh/munin_exporter)
* [New Relic exporter](https://github.com/jfindley/newrelic_exporter) * [New Relic exporter](https://github.com/jfindley/newrelic_exporter)
* [Nginx metric library](https://github.com/knyar/nginx-lua-prometheus)
* [NSQ exporter](https://github.com/lovoo/nsq_exporter) * [NSQ exporter](https://github.com/lovoo/nsq_exporter)
* [OpenWeatherMap exporter](https://github.com/RichiH/openweathermap_exporter)
* [Passenger exporter](https://github.com/stuartnelson3/passenger_exporter)
* [PgBouncer exporter](http://git.cbaines.net/prometheus-pgbouncer-exporter/about) * [PgBouncer exporter](http://git.cbaines.net/prometheus-pgbouncer-exporter/about)
* [PostgreSQL exporter](https://github.com/wrouesnel/postgres_exporter) * [PostgreSQL exporter](https://github.com/wrouesnel/postgres_exporter)
* [PowerDNS exporter](https://github.com/janeczku/powerdns_exporter) * [PowerDNS exporter](https://github.com/janeczku/powerdns_exporter)
* [RabbitMQ exporter](https://github.com/kbudde/rabbitmq_exporter) * [RabbitMQ exporter](https://github.com/kbudde/rabbitmq_exporter)
* [Rancher exporter](https://github.com/infinityworksltd/prometheus-rancher-exporter)
* [Redis exporter](https://github.com/oliver006/redis_exporter) * [Redis exporter](https://github.com/oliver006/redis_exporter)
* [RethinkDB exporter](https://github.com/oliver006/rethinkdb_exporter) * [RethinkDB exporter](https://github.com/oliver006/rethinkdb_exporter)
* [Rsyslog exporter](https://github.com/digitalocean/rsyslog_exporter) * [Rsyslog exporter](https://github.com/digitalocean/rsyslog_exporter)
* [rTorrent exporter](https://github.com/mdlayher/rtorrent_exporter)
* [scollector exporter](https://github.com/tgulacsi/prometheus_scollector) * [scollector exporter](https://github.com/tgulacsi/prometheus_scollector)
* [SMTP/Maildir MDA blackbox prober](https://github.com/cherti/mailexporter) * [SMTP/Maildir MDA blackbox prober](https://github.com/cherti/mailexporter)
* [Speedtest.net exporter](https://github.com/RichiH/speedtest_exporter)
* [SQL query result set metrics exporter](https://github.com/chop-dbhi/prometheus-sql) * [SQL query result set metrics exporter](https://github.com/chop-dbhi/prometheus-sql)
* [Ubiquiti UniFi exporter](https://github.com/mdlayher/unifi_exporter)
* [Varnish exporter](https://github.com/jonnenauha/prometheus_varnish_exporter)
When implementing a new Prometheus exporter, please follow the When implementing a new Prometheus exporter, please follow the
[Prometheus Exporter Guidelines](https://docs.google.com/document/d/1JapuiRbp-XoyECgl2lPdxITrhm5IyCUq9iA_h6jp3OY/edit). [guidelines on writing exporters](/docs/instrumenting/writing_exporters)
Please also consider consulting the [development mailing Please also consider consulting the [development mailing
list](https://groups.google.com/forum/#!forum/prometheus-developers). We are list](https://groups.google.com/forum/#!forum/prometheus-developers). We are
happy to give advice on how to make your exporter as useful and consistent as happy to give advice on how to make your exporter as useful and consistent as
...@@ -79,11 +93,13 @@ Some third-party software already exposes Prometheus metrics natively, so no ...@@ -79,11 +93,13 @@ Some third-party software already exposes Prometheus metrics natively, so no
separate exporters are needed: separate exporters are needed:
* [cAdvisor](https://github.com/google/cadvisor) * [cAdvisor](https://github.com/google/cadvisor)
* [Doorman](https://github.com/youtube/doorman)
* [Etcd](https://github.com/coreos/etcd) * [Etcd](https://github.com/coreos/etcd)
* [Kubernetes-Mesos](https://github.com/mesosphere/kubernetes-mesos) * [Kubernetes-Mesos](https://github.com/mesosphere/kubernetes-mesos)
* [Kubernetes](https://github.com/GoogleCloudPlatform/kubernetes) * [Kubernetes](https://github.com/GoogleCloudPlatform/kubernetes)
* [RobustIRC](http://robustirc.net/) * [RobustIRC](http://robustirc.net/)
* [SkyDNS](https://github.com/skynetservices/skydns) * [SkyDNS](https://github.com/skynetservices/skydns)
* [Weave Flux](http://weaveworks.github.io/flux/)
## Other third-party utilities ## Other third-party utilities
......
--- ---
title: Exposition formats title: Exposition formats
sort_rank: 4 sort_rank: 6
--- ---
# Exposition formats # Exposition formats
...@@ -40,7 +40,7 @@ Prometheus). ...@@ -40,7 +40,7 @@ Prometheus).
| **Inception** | April 2014 | April 2014 | | **Inception** | April 2014 | April 2014 |
| **Supported in** | Prometheus version `>=0.4.0` | Prometheus version `>=0.4.0` | | **Supported in** | Prometheus version `>=0.4.0` | Prometheus version `>=0.4.0` |
| **Transmission** | HTTP | HTTP | | **Transmission** | HTTP | HTTP |
| **Encoding** | [32-bit varint-encoded record length-delimited](https://developers.google.com/protocol-buffers/docs/reference/java/com/google/protobuf/AbstractMessageLite#writeDelimitedTo(java. io.OutputStream)) Protocol Buffer messages of type [io.prometheus.client.MetricFamily](https://github.com/prometheus/client_model/blob/086fe7ca28bde6cec2acd5223423c1475a362858/metrics.proto#L76- L81) | UTF-8, `\n` line endings | | **Encoding** | [32-bit varint-encoded record length-delimited](https://developers.google.com/protocol-buffers/docs/reference/java/com/google/protobuf/AbstractMessageLite#writeDelimitedTo(java.io.OutputStream)) Protocol Buffer messages of type [io.prometheus.client.MetricFamily](https://github.com/prometheus/client_model/blob/086fe7ca28bde6cec2acd5223423c1475a362858/metrics.proto#L76- L81) | UTF-8, `\n` line endings |
| **HTTP `Content-Type`** | `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited` | `text/plain; version=0.0.4` (A missing `version` value will lead to a fall-back to the most recent text format version.) | | **HTTP `Content-Type`** | `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited` | `text/plain; version=0.0.4` (A missing `version` value will lead to a fall-back to the most recent text format version.) |
| **Optional HTTP `Content-Encoding`** | `gzip` | `gzip` | | **Optional HTTP `Content-Encoding`** | `gzip` | `gzip` |
| **Advantages** | <ul><li>Cross-platform</li><li>Size</li><li>Encoding and decoding costs</li><li>Strict schema</li><li>Supports concatenation and theoretically streaming (only server-side behavior would need to change)</li></ul> | <ul><li>Human-readable</li><li>Easy to assemble, especially for minimalistic cases (no nesting required)</li><li>Readable line by line (with the exception of type hints and docstrings)</li></ul> | | **Advantages** | <ul><li>Cross-platform</li><li>Size</li><li>Encoding and decoding costs</li><li>Strict schema</li><li>Supports concatenation and theoretically streaming (only server-side behavior would need to change)</li></ul> | <ul><li>Human-readable</li><li>Easy to assemble, especially for minimalistic cases (no nesting required)</li><li>Readable line by line (with the exception of type hints and docstrings)</li></ul> |
...@@ -113,13 +113,13 @@ format. The following conventions apply: ...@@ -113,13 +113,13 @@ format. The following conventions apply:
See also the example below. See also the example below.
``` ```
# HELP api_http_request_count The total number of HTTP requests. # HELP http_requests_total The total number of HTTP requests.
# TYPE api_http_request_count counter # TYPE http_requests_total counter
http_request_count{method="post",code="200"} 1027 1395066363000 http_requests_total{method="post",code="200"} 1027 1395066363000
http_request_count{method="post",code="400"} 3 1395066363000 http_requests_total{method="post",code="400"} 3 1395066363000
# Escaping in label values: # Escaping in label values:
msdos_file_access_time_ms{path="C:\\DIR\\FILE.TXT",error="Cannot find file:\n\"FILE.TXT\""} 1.234e3 msdos_file_access_time_seconds{path="C:\\DIR\\FILE.TXT",error="Cannot find file:\n\"FILE.TXT\""} 1.458255915e9
# Minimalistic line: # Minimalistic line:
metric_without_timestamp_and_labels 12.47 metric_without_timestamp_and_labels 12.47
......
--- ---
title: Pushing metrics title: Pushing metrics
sort_rank: 2 sort_rank: 3
--- ---
# Pushing metrics # Pushing metrics
...@@ -17,13 +17,15 @@ makes it easy to instrument even shell scripts without a client library. ...@@ -17,13 +17,15 @@ makes it easy to instrument even shell scripts without a client library.
[README.md](https://github.com/prometheus/pushgateway/blob/master/README.md). [README.md](https://github.com/prometheus/pushgateway/blob/master/README.md).
* For use from Java see the * For use from Java see the
[PushGateway](http://prometheus.github.io/client_java/io/prometheus/client/exporter/PushGateway.html) [PushGateway](https://prometheus.io/client_java/io/prometheus/client/exporter/PushGateway.html)
class. class.
* For use from Go see the [Push](http://godoc.org/github.com/prometheus/client_golang/prometheus#Push) and [PushAdd](http://godoc.org/github.com/prometheus/client_golang/prometheus#PushAdd) functions. * For use from Go see the [Push](http://godoc.org/github.com/prometheus/client_golang/prometheus#Push) and [PushAdd](http://godoc.org/github.com/prometheus/client_golang/prometheus#PushAdd) functions.
* For use from Python see [Exporting to a Pushgateway](https://github.com/prometheus/client_python#exporting-to-a-pushgateway). * For use from Python see [Exporting to a Pushgateway](https://github.com/prometheus/client_python#exporting-to-a-pushgateway).
* For use from Ruby see the [Pushgateway documentation](https://github.com/prometheus/client_ruby#pushgateway).
## Java batch job example ## Java batch job example
This example illustrates how to instrument a batch job and alert on it not having succeeded recently. This example illustrates how to instrument a batch job and alert on it not having succeeded recently.
......
This diff is collapsed.
This diff is collapsed.
...@@ -65,7 +65,7 @@ also works well for many short-lived, frequently changing sets of time series. ...@@ -65,7 +65,7 @@ also works well for many short-lived, frequently changing sets of time series.
## Prometheus vs. InfluxDB ## Prometheus vs. InfluxDB
[InfluxDB](http://influxdb.com/) is a very promising new open-source time [InfluxDB](https://influxdata.com/) is a very promising new open-source time
series database. It did not exist when Prometheus development began, so we were series database. It did not exist when Prometheus development began, so we were
unable to consider it as an alternative at the time. Still, there are unable to consider it as an alternative at the time. Still, there are
significant differences between Prometheus and InfluxDB, and both systems are significant differences between Prometheus and InfluxDB, and both systems are
......
...@@ -35,7 +35,8 @@ on the Robust Perception blog to get started. ...@@ -35,7 +35,8 @@ on the Robust Perception blog to get started.
### What language is Prometheus written in? ### What language is Prometheus written in?
Most Prometheus components are written in Go. Some are also written in Java and Ruby. Most Prometheus components are written in Go. Some are also written in Java,
Python, and Ruby.
### How stable are Prometheus features, storage formats, and APIs? ### How stable are Prometheus features, storage formats, and APIs?
...@@ -146,8 +147,14 @@ the [exposition formats](/docs/instrumenting/exposition_formats/). ...@@ -146,8 +147,14 @@ the [exposition formats](/docs/instrumenting/exposition_formats/).
### Can I monitor machines? ### Can I monitor machines?
Yes, the [Node Exporter](https://github.com/prometheus/node_exporter) exposes Yes, the [Node Exporter](https://github.com/prometheus/node_exporter) exposes
an extensive set of machine-level metrics on Linux such as CPU usage, memory, an extensive set of machine-level metrics on Linux and other Unix systems such
disk utilization, filesystem fullness and network bandwidth. as CPU usage, memory, disk utilization, filesystem fullness and network
bandwidth.
### Can I monitor network devices?
Yes, the [SNMP Exporter](https://github.com/prometheus/snmp_exporter) allows
monitoring of devices that support SNMP.
### Can I monitor batch jobs? ### Can I monitor batch jobs?
...@@ -159,12 +166,6 @@ jobs. ...@@ -159,12 +166,6 @@ jobs.
See [exporters for third-party systems](/docs/instrumenting/exporters/). See [exporters for third-party systems](/docs/instrumenting/exporters/).
### Which Java client should I use?
New users are advised to use the
[simpleclient](https://github.com/prometheus/client_java/tree/master/simpleclient).
For more information, see the [comparison](https://github.com/prometheus/client_java/wiki).
### Can I monitor JVM applications via JMX? ### Can I monitor JVM applications via JMX?
Yes, for applications that you cannot instrument directly with the Java client Yes, for applications that you cannot instrument directly with the Java client
...@@ -233,9 +234,9 @@ you an idea, here are some results from benchmarks: ...@@ -233,9 +234,9 @@ you an idea, here are some results from benchmarks:
sustained an ingestion rate of 34k samples per second, belonging to sustained an ingestion rate of 34k samples per second, belonging to
170k time series, scraped from 600 targets. 170k time series, scraped from 600 targets.
* On a modern server with 64GiB RAM and SSD, Prometheus sustained an * On a modern server with 64GiB RAM, 32 CPU cores, and SSD, Prometheus
ingestion rate of 340k samples per second, belonging to 2M time sustained an ingestion rate of 525k samples per second, belonging to 1.4M
series, scraped from 1800 targets. time series, scraped from 1650 targets.
In both cases, there were no obvious bottlenecks. Various stages of the In both cases, there were no obvious bottlenecks. Various stages of the
processing pipelines reached their limits more or less at the same processing pipelines reached their limits more or less at the same
...@@ -245,3 +246,21 @@ Running out of inodes is highly unlikely in a usual set-up. There is a ...@@ -245,3 +246,21 @@ Running out of inodes is highly unlikely in a usual set-up. There is a
possible downside: If you want to delete Prometheus's storage possible downside: If you want to delete Prometheus's storage
directory, you will notice that some file systems are very slow when directory, you will notice that some file systems are very slow when
deleting files. deleting files.
### Why don't the Prometheus server components support TLS or authentication? Can I add those?
While TLS and authentication are frequently requested features, we have
intentionally not implemented them in any of Prometheus's server-side
components. There are so many different options and parameters for both (10+
options for TLS alone) that we have decided to focus on building the best
monitoring system possible rather than supporting fully generic TLS and
authentication solutions in every server component.
If you need TLS or authentication, we recommend putting a reverse proxy in
front of Prometheus. See for example [Adding Basic Auth to Prometheus with
Nginx](http://www.robustperception.io/adding-basic-auth-to-prometheus-with-nginx/).
Note that this applies only to inbound connections. Prometheus does support
[scraping TLS- and auth-enabled
targets](/docs/operating/configuration/#%3Cscrape_config%3E), and other
Prometheus components that create outbound connections have similar support.
...@@ -13,16 +13,15 @@ series data. ...@@ -13,16 +13,15 @@ series data.
## Downloading and running Prometheus ## Downloading and running Prometheus
[Download the latest release](https://github.com/prometheus/prometheus/releases) [Download the latest release](/download) of Prometheus for your platform, then
of Prometheus for your platform, then extract and run it: extract and run it:
``` ```
tar xvfz prometheus-*.tar.gz tar xvfz prometheus-*.tar.gz
cd prometheus-* cd prometheus-*
./prometheus
``` ```
It should fail to start, complaining about the absence of a configuration file. Before starting Prometheus, let's configure it.
## Configuring Prometheus to monitor itself ## Configuring Prometheus to monitor itself
...@@ -37,8 +36,6 @@ Prometheus configuration as a file named `prometheus.yml`: ...@@ -37,8 +36,6 @@ Prometheus configuration as a file named `prometheus.yml`:
``` ```
global: global:
scrape_interval: 15s # By default, scrape targets every 15 seconds. scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with # Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager). # external systems (federation, remote storage, Alertmanager).
...@@ -53,7 +50,6 @@ scrape_configs: ...@@ -53,7 +50,6 @@ scrape_configs:
# Override the global default and scrape targets from this job every 5 seconds. # Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s scrape_interval: 5s
scrape_timeout: 10s
target_groups: target_groups:
- targets: ['localhost:9090'] - targets: ['localhost:9090']
...@@ -89,6 +85,10 @@ Blindly setting `GOMAXPROCS` to a high value can be ...@@ -89,6 +85,10 @@ Blindly setting `GOMAXPROCS` to a high value can be
counterproductive. See the relevant [Go counterproductive. See the relevant [Go
FAQs](http://golang.org/doc/faq#Why_no_multi_CPU). FAQs](http://golang.org/doc/faq#Why_no_multi_CPU).
Note that Prometheus by default uses around 3GB in memory. If you have a
smaller machine, you can tune Prometheus to use less memory. For details,
see the [memory usage documentation](/docs/operating/storage/#memory-usage).
## Using the expression browser ## Using the expression browser
Let us try looking at some data that Prometheus has collected about itself. To Let us try looking at some data that Prometheus has collected about itself. To
...@@ -190,7 +190,6 @@ scrape_configs: ...@@ -190,7 +190,6 @@ scrape_configs:
# Override the global default and scrape targets from this job every 5 seconds. # Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s scrape_interval: 5s
scrape_timeout: 10s
target_groups: target_groups:
- targets: ['localhost:8080', 'localhost:8081'] - targets: ['localhost:8080', 'localhost:8081']
...@@ -238,8 +237,7 @@ look like this: ...@@ -238,8 +237,7 @@ look like this:
``` ```
global: global:
scrape_interval: 15s # By default, scrape targets every 15 seconds. scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds. evaluation_interval: 15s # Evaluate rules every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these extra labels to all timeseries collected by this Prometheus instance. # Attach these extra labels to all timeseries collected by this Prometheus instance.
external_labels: external_labels:
...@@ -253,7 +251,6 @@ scrape_configs: ...@@ -253,7 +251,6 @@ scrape_configs:
# Override the global default and scrape targets from this job every 5 seconds. # Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s scrape_interval: 5s
scrape_timeout: 10s
target_groups: target_groups:
- targets: ['localhost:9090'] - targets: ['localhost:9090']
...@@ -262,7 +259,6 @@ scrape_configs: ...@@ -262,7 +259,6 @@ scrape_configs:
# Override the global default and scrape targets from this job every 5 seconds. # Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s scrape_interval: 5s
scrape_timeout: 10s
target_groups: target_groups:
- targets: ['localhost:8080', 'localhost:8081'] - targets: ['localhost:8080', 'localhost:8081']
......
...@@ -7,39 +7,32 @@ sort_rank: 2 ...@@ -7,39 +7,32 @@ sort_rank: 2
## Using pre-compiled binaries ## Using pre-compiled binaries
We provide precompiled binaries for released versions for most Prometheus We provide precompiled binaries for most official Prometheus components.
components. These may be found under the "Releases" tab of the respective Check out the [download section](/download) for a list of all available
GitHub repositories. For example, for the main Prometheus server, binary versions.
releases are available at
[https://github.com/prometheus/prometheus/releases](https://github.com/prometheus/prometheus/releases).
Debian and RPM packages are being worked on.
## From source ## From source
For building Prometheus from source, see the relevant [`README.md` For building Prometheus components from source, see the `Makefile` targets in
section](https://github.com/prometheus/prometheus/blob/master/README.md#use-make). the respective repository.
Note that this documentation (as published on NOTE: **Note:** The documentation on this website refers to the latest stable
[prometheus.io](http://prometheus.io)) refers to the latest production release (excluding pre-releases). The branch
release. The head of the [next-release](https://github.com/prometheus/docs/compare/next-release) refers
[prometheus/docs](https://github.com/prometheus/docs) GitHub to unreleased changes that are in master branches of source repos.
repository refers to the (possibly not yet released) head of the
[prometheus/prometheus](https://github.com/prometheus/prometheus) (and
other) repositories.
## Using Docker ## Using Docker
All Prometheus services are available as Docker images under the All Prometheus services are available as Docker images under the
[prom](https://hub.docker.com/u/prom/) organization. [prom](https://hub.docker.com/u/prom/) organization.
Running Prometheus on Docker is as simple as Running Prometheus on Docker is as simple as `docker run -p 9090:9090
`docker run -p 9090:9090 prom/prometheus`. This starts Prometheus with prom/prometheus`. This starts Prometheus with a sample configuration and
a sample configuration and exposes it on port 9090. exposes it on port 9090.
The Prometheus image uses a volume to store the actual metrics. For The Prometheus image uses a volume to store the actual metrics. For
production deployments it is highly recommended to use the production deployments it is highly recommended to use the
[Data Volume Container](https://docs.docker.com/userguide/dockervolumes/#creating-and-mounting-a-data-volume-container) [Data Volume Container](https://docs.docker.com/engine/userguide/containers/dockervolumes/#creating-and-mounting-a-data-volume-container)
pattern to ease managing the data on Prometheus upgrades. pattern to ease managing the data on Prometheus upgrades.
To provide your own configuration, there are several options. Here are To provide your own configuration, there are several options. Here are
......
...@@ -13,13 +13,11 @@ with Prometheus. ...@@ -13,13 +13,11 @@ with Prometheus.
## Blogs ## Blogs
* This site has its own [blog](http://prometheus.io/blog/). * This site has its own [blog](/blog/).
* [SoundCloud's blog post announcing Prometheus](https://developers.soundcloud.com/blog/prometheus-monitoring-at-soundcloud) * [SoundCloud's blog post announcing Prometheus](https://developers.soundcloud.com/blog/prometheus-monitoring-at-soundcloud)
– a more elaborate overview than the one given on this site. – a more elaborate overview than the one given on this site.
* Prometheus-related posts on the * Prometheus-related posts on the
[Robust Perception blog](http://www.robustperception.io/tag/prometheus/). [Robust Perception blog](http://www.robustperception.io/tag/prometheus/).
* The [monitoring series](http://www.boxever.com/tag/monitoring) on Boxever's
tech blog.
## Tutorials ## Tutorials
...@@ -34,10 +32,11 @@ with Prometheus. ...@@ -34,10 +32,11 @@ with Prometheus.
## Recorded talks ## Recorded talks
* [Prometheus: A Next-Generation Monitoring System](https://www.usenix.org/conference/srecon15europe/program/presentation/rabenstein) – Julius Volz and Björn Rabenstein at SREcon15 Europe, Dublin. * [Prometheus: A Next-Generation Monitoring System](https://www.usenix.org/conference/srecon15europe/program/presentation/rabenstein) – Julius Volz and Björn Rabenstein at SREcon15 Europe, Dublin.
* [Prometheus: A Next-Generation Monitoring System](https://www.youtube.com/watch?v=cwRmXqXKGtk) - Brian Brazil at FOSDEM 2016 ([slides](http://www.slideshare.net/brianbrazil/prometheus-a-next-generation-monitoring-system-fosdem-2016)).
* [What is your application doing right now?](http://youtu.be/Z0LlilNpX1U) – Matthias Gruter, Transmode, at DevOps Stockholm Meetup. * [What is your application doing right now?](http://youtu.be/Z0LlilNpX1U) – Matthias Gruter, Transmode, at DevOps Stockholm Meetup.
* [Prometheus workshop](https://vimeo.com/131581353) – Jamie Wilkinson at Monitorama PDX 2015 ([slides](https://docs.google.com/presentation/d/1X1rKozAUuF2MVc1YXElFWq9wkcWv3Axdldl8LOH9Vik/edit)). * [Prometheus workshop](https://vimeo.com/131581353) – Jamie Wilkinson at Monitorama PDX 2015 ([slides](https://docs.google.com/presentation/d/1X1rKozAUuF2MVc1YXElFWq9wkcWv3Axdldl8LOH9Vik/edit)).
* [Monitoring Hadoop with Prometheus](https://www.youtube.com/watch?v=qs2sqOLNGtw) – Brian Brazil at the Hadoop User Group Ireland ([slides](http://www.slideshare.net/brianbrazil/monitoring-hadoop-with-prometheus-hadoop-user-group-ireland-december-2015)). * [Monitoring Hadoop with Prometheus](https://www.youtube.com/watch?v=qs2sqOLNGtw) – Brian Brazil at the Hadoop User Group Ireland ([slides](http://www.slideshare.net/brianbrazil/monitoring-hadoop-with-prometheus-hadoop-user-group-ireland-december-2015)).
* In German: [Monitoring mit Prometheus](https://entropia.de/GPN15:Monitoring_mit_Prometheus) – Michael Stapelberg at Gulaschprogrammiernacht 15. * In German: [Monitoring mit Prometheus](https://media.ccc.de/v/eh16-43-monitoring_mit_prometheus#video&t=2804) – Michael Stapelberg at [Easterhegg 2016](https://eh16.easterhegg.eu/).
## Presentation slides ## Presentation slides
......
...@@ -11,7 +11,7 @@ sort_rank: 1 ...@@ -11,7 +11,7 @@ sort_rank: 1
monitoring and alerting toolkit originally built at monitoring and alerting toolkit originally built at
[SoundCloud](http://soundcloud.com). Since its inception in 2012, many [SoundCloud](http://soundcloud.com). Since its inception in 2012, many
companies and organizations have adopted Prometheus, and the project has a very companies and organizations have adopted Prometheus, and the project has a very
active developer and user community. It is now a standalone open source project active developer and user [community](/community). It is now a standalone open source project
and maintained independently of any company. and maintained independently of any company.
For a more elaborate overview, see the resources linked from the For a more elaborate overview, see the resources linked from the
......
...@@ -54,7 +54,7 @@ global: ...@@ -54,7 +54,7 @@ global:
# How long until a scrape request times out. # How long until a scrape request times out.
[ scrape_timeout: <duration> | default = 10s ] [ scrape_timeout: <duration> | default = 10s ]
# How frequently to evaluate rules by default. # How frequently to evaluate rules.
[ evaluation_interval: <duration> | default = 1m ] [ evaluation_interval: <duration> | default = 1m ]
# The labels to add to any time series or alerts when communicating with # The labels to add to any time series or alerts when communicating with
...@@ -169,6 +169,10 @@ kubernetes_sd_configs: ...@@ -169,6 +169,10 @@ kubernetes_sd_configs:
serverset_sd_configs: serverset_sd_configs:
[ - <serverset_sd_config> ... ] [ - <serverset_sd_config> ... ]
# List of AirBnB's Nerve service discovery configurations.
nerve_sd_configs:
[ - <nerve_sd_config> ... ]
# List of EC2 service discovery configurations. # List of EC2 service discovery configurations.
ec2_sd_configs: ec2_sd_configs:
[ - <ec2_sd_config> ... ] [ - <ec2_sd_config> ... ]
...@@ -237,8 +241,8 @@ A DNS-SD configuration allows specifying a set of DNS record names which ...@@ -237,8 +241,8 @@ A DNS-SD configuration allows specifying a set of DNS record names which
are periodically queried to discover a list of targets (host-port pairs). The are periodically queried to discover a list of targets (host-port pairs). The
DNS servers to be contacted are read from `/etc/resolv.conf`. DNS servers to be contacted are read from `/etc/resolv.conf`.
During the [relabeling phase](#target-relabeling-relabel_config), the meta During the [relabeling phase](#relabel_config), the meta
label `__meta_dns_srv_name` is available on each target and is set to the SRV label `__meta_dns_name` is available on each target and is set to the SRV
record name that produced the discovered target. record name that produced the discovered target.
``` ```
...@@ -295,9 +299,9 @@ services: ...@@ -295,9 +299,9 @@ services:
``` ```
Note that the IP number and port used to scrape the targets is assembled as Note that the IP number and port used to scrape the targets is assembled as
`<__meta_consul_address>:<__meta_consul_service_port`. However, in some `<__meta_consul_address>:<__meta_consul_service_port>`. However, in some
Consul setups, the relevant address is in `__meta_consul_service_address`. Consul setups, the relevant address is in `__meta_consul_service_address`.
In those cases, you can use the [relabel](#target-relabeling-relabel_config) In those cases, you can use the [relabel](#relabel_config)
feature to replace the special `__address__` label. feature to replace the special `__address__` label.
### `<kubernetes_sd_config>` ### `<kubernetes_sd_config>`
...@@ -372,7 +376,7 @@ tls_config: ...@@ -372,7 +376,7 @@ tls_config:
[ retry_interval: <duration> | default = 1s ] [ retry_interval: <duration> | default = 1s ]
``` ```
### `<marathon_sd_configs>` ### `<marathon_sd_config>`
CAUTION: Marathon SD is in beta: breaking changes to configuration are still CAUTION: Marathon SD is in beta: breaking changes to configuration are still
likely in future releases. likely in future releases.
...@@ -438,6 +442,29 @@ paths: ...@@ -438,6 +442,29 @@ paths:
Serverset data must be in the JSON format, the Thrift format is not currently supported. Serverset data must be in the JSON format, the Thrift format is not currently supported.
### `<nerve_sd_config>`
Nerve SD configurations allow retrieving scrape targets from [AirBnB's Nerve]
(https://github.com/airbnb/nerve) which are stored in
[Zookeeper](https://zookeeper.apache.org/).
The following meta labels are available on targets during relabeling:
* `__meta_nerve_path`: the full path to the emdpoint node in Zookeeper
* `__meta_nerve_endpoint_host`: the host of the endpoint
* `__meta_nerve_endpoint_port`: the port of the endpoint
* `__meta_nerve_endpoint_name`: the name of the endpoint
```
# The Zookeeper servers.
servers:
- <host>
# Paths can point to a single service, or the root of a tree of services.
paths:
- <string>
[ timeout: <duration> | default = 10s ]
```
### `<ec2_sd_config>` ### `<ec2_sd_config>`
CAUTION: EC2 SD is in beta: breaking changes to configuration are still CAUTION: EC2 SD is in beta: breaking changes to configuration are still
...@@ -449,10 +476,16 @@ the public IP address with relabeling. ...@@ -449,10 +476,16 @@ the public IP address with relabeling.
The following meta labels are available on targets during relabeling: The following meta labels are available on targets during relabeling:
* `__meta_ec2_availability_zone`: the availability zone in which the instance is running
* `__meta_ec2_instance_id`: the EC2 instance ID * `__meta_ec2_instance_id`: the EC2 instance ID
* `__meta_ec2_public_ip`: the public IP address of the instance
* `__meta_ec2_private_ip`: the private IP address of the instance, if present * `__meta_ec2_private_ip`: the private IP address of the instance, if present
* `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available
* `__meta_ec2_public_ip`: the public IP address of the instance, if available
* `__meta_ec2_subnet_id`: comma separated list of subnets IDs in which the instance is running, if available
* `__meta_ec2_tag_<tagkey>`: each tag value of the instance * `__meta_ec2_tag_<tagkey>`: each tag value of the instance
* `__meta_ec2_vpc_id`: the ID of the VPC in which the instance is running, if available
See below for the configuration options for EC2 discovery: See below for the configuration options for EC2 discovery:
...@@ -503,7 +536,7 @@ As a fallback, the file contents are also re-read periodically at the specified ...@@ -503,7 +536,7 @@ As a fallback, the file contents are also re-read periodically at the specified
refresh interval. refresh interval.
Each target has a meta label `__meta_filepath` during the Each target has a meta label `__meta_filepath` during the
[relabeling phase](#target-relabeling-relabel_config). Its value is set to the [relabeling phase](#relabel_config). Its value is set to the
filepath from which the target was extracted. filepath from which the target was extracted.
``` ```
...@@ -579,7 +612,8 @@ the `replace`, `keep`, `drop` and `labelmap` actions. The regex is fully anchore ...@@ -579,7 +612,8 @@ the `replace`, `keep`, `drop` and `labelmap` actions. The regex is fully anchore
* `replace`: Match `regex` against the concatenated `source_labels`. Then, set * `replace`: Match `regex` against the concatenated `source_labels`. Then, set
`target_label` to `replacement`, with match group references `target_label` to `replacement`, with match group references
(`${1}`, `${2}`, ...) in `replacement` substituted by their value. (`${1}`, `${2}`, ...) in `replacement` substituted by their value. If `regex`
does not match, no replacement takes place.
* `keep`: Drop targets for which `regex` does not match the concatenated `source_labels`. * `keep`: Drop targets for which `regex` does not match the concatenated `source_labels`.
* `drop`: Drop targets for which `regex` matches the concatenated `source_labels`. * `drop`: Drop targets for which `regex` matches the concatenated `source_labels`.
* `hashmod`: Set `target_label` to the `modulus` of a hash of the concatenated `source_labels`. * `hashmod`: Set `target_label` to the `modulus` of a hash of the concatenated `source_labels`.
...@@ -587,7 +621,7 @@ the `replace`, `keep`, `drop` and `labelmap` actions. The regex is fully anchore ...@@ -587,7 +621,7 @@ the `replace`, `keep`, `drop` and `labelmap` actions. The regex is fully anchore
to label names given by `replacement` with match group references to label names given by `replacement` with match group references
(`${1}`, `${2}`, ...) in `replacement` substituted by their value. (`${1}`, `${2}`, ...) in `replacement` substituted by their value.
### `<metric_relabel_configs>` ### `<metric_relabel_config>`
Metric relabeling is applied to samples as the last step before ingestion. It Metric relabeling is applied to samples as the last step before ingestion. It
has the same configuration format and actions as target relabeling. Metric has the same configuration format and actions as target relabeling. Metric
......
...@@ -175,7 +175,7 @@ the number of chunks waiting for persistence in relation to the ...@@ -175,7 +175,7 @@ the number of chunks waiting for persistence in relation to the
chunks in memory exceeds the `storage.local.memory-chunks` value (if at all, chunks in memory exceeds the `storage.local.memory-chunks` value (if at all,
and only if there is a minimum number of chunks waiting for persistence so that and only if there is a minimum number of chunks waiting for persistence so that
faster persisting of chunks can help at all). The score is between 0 and 1, faster persisting of chunks can help at all). The score is between 0 and 1,
where 1 corresponds to the highest unrgency. Depending on the score, Prometheus where 1 corresponds to the highest urgency. Depending on the score, Prometheus
will write to disk more frequently. Should the score ever pass the threshold will write to disk more frequently. Should the score ever pass the threshold
of 0.8, Prometheus enters “rushed mode” (which you can see in the logs). In of 0.8, Prometheus enters “rushed mode” (which you can see in the logs). In
rushed mode, the following strategies are applied to speed up persisting chunks: rushed mode, the following strategies are applied to speed up persisting chunks:
......
...@@ -221,10 +221,9 @@ Two rules of thumb: ...@@ -221,10 +221,9 @@ Two rules of thumb:
## What can I do if my client library does not support the metric type I need? ## What can I do if my client library does not support the metric type I need?
Implement it! [Code contributions are welcome](/community/). In Implement it! [Code contributions are welcome](/community/). In general, we
general, we expect histograms to be more urgently needed than expect histograms to be more urgently needed than summaries. Histograms are
summaries. Histograms are also easier to implement in a client also easier to implement in a client library, so we recommend to implement
library, so we recommend to implement histograms first, if in histograms first, if in doubt. The reason why some libraries offer summaries
doubt. The reason why some libraries offer summaries but not but not histograms (such as the Ruby client) is that histograms are a more
histograms (the Ruby client and the legacy Java client) is that recent feature of Prometheus.
histograms are a more recent feature of Prometheus.
...@@ -202,7 +202,7 @@ the value can go down, it is a gauge. ...@@ -202,7 +202,7 @@ the value can go down, it is a gauge.
Counters can only go up (and reset, such as when a process restarts). They are Counters can only go up (and reset, such as when a process restarts). They are
useful for accumulating the number of events, or the amount of something at useful for accumulating the number of events, or the amount of something at
each event. For example, the total number of HTTP requests, or the total number of each event. For example, the total number of HTTP requests, or the total number of
of bytes sent in HTTP requests. Raw counters are rarely useful. Use the bytes sent in HTTP requests. Raw counters are rarely useful. Use the
`rate()` function to get the per-second rate at which they are increasing. `rate()` function to get the per-second rate at which they are increasing.
Gauges can be set, go up, and go down. They are useful for snapshots of state, Gauges can be set, go up, and go down. They are useful for snapshots of state,
...@@ -230,7 +230,7 @@ For code which is performance-critical or called more than 100k times a second ...@@ -230,7 +230,7 @@ For code which is performance-critical or called more than 100k times a second
inside a given process, you may wish to take some care as to how many metrics inside a given process, you may wish to take some care as to how many metrics
you update. you update.
A Java Simpleclient counter takes A Java counter takes
[12-17ns](https://github.com/prometheus/client_java/blob/master/benchmark/README.md) [12-17ns](https://github.com/prometheus/client_java/blob/master/benchmark/README.md)
to increment depending on contention. Other languages will have similar to increment depending on contention. Other languages will have similar
performance. If that amount of time is significant for your inner loop, limit performance. If that amount of time is significant for your inner loop, limit
...@@ -250,5 +250,5 @@ to correctly handle them. To avoid this, export `0` (or `NaN`, if `0` ...@@ -250,5 +250,5 @@ to correctly handle them. To avoid this, export `0` (or `NaN`, if `0`
would be misleading) for any time series you know may exist in would be misleading) for any time series you know may exist in
advance. advance.
Most Prometheus client libraries (including Go and Java Simpleclient) will Most Prometheus client libraries (including Go, Java, and Python) will
automatically export a `0` for you for metrics with no labels. automatically export a `0` for you for metrics with no labels.
---
title: When to use the Pushgateway
sort_rank: 7
---
# When to use the Pushgateway
The Pushgateway is an intermediary service which allows you to push metrics
from jobs which cannot be scraped. For details, see [Pushing metrics](/docs/instrumenting/pushing/).
## Should I be using the Pushgateway?
**We only recommend using the Pushgateway in certain limited cases.** There are
several pitfalls when blindly using the Pushgateway instead of Prometheus's
usual pull model for general metrics collection:
* When monitoring multiple instances through a single Pushgateway, the
Pushgateway becomes both a single point of failure and a potential
bottleneck.
* You lose Prometheus's automatic instance health monitoring via the `up`
metric (generated on every scrape).
* The Pushgateway never forgets series pushed to it and will expose them to
Prometheus forever unless those series are manually deleted via the
Pushgateway's API.
The latter point is especially relevant when multiple instances of a job
differentiate their metrics in the Pushgateway via an `instance` label or
similar. Metrics for an instance will then remain in the Pushgateway even if
the originating instance is renamed or removed. This is because the lifecycle
of the Pushgateway as a metrics cache is fundamentally separate from the
lifecycle of the processes that push metrics to it. Contrast this to
Prometheus's usual pull-style monitoring: when an instance disappears
(intentional or not), its metrics will automatically disappear along with it.
When using the Pushgateway, this is not the case, and you would now have to
delete any stale metrics manually or automate this lifecycle synchronization
yourself.
**Usually, the only valid use case for the Pushgateway is for capturing the
outcome of a service-level batch job**. A "service-level" batch job is one
which is not semantically related to a specific machine or job instance (for
example, a batch job that deletes a number of users for an entire service).
Such a job's metrics should not include a machine or instance label to decouple
the lifecycle of specific machines or instances from the pushed metrics. This
decreases the burden for managing stale metrics in the Pushgateway. See also
the [best practices for monitoring batch jobs](https://prometheus.io/docs/practices/instrumentation/#batch-jobs).
## Alternative strategies
If an inbound firewall or NAT is preventing you from pulling metrics from
targets, consider moving the Prometheus server behind the network barrier as
well. We generally recommend running Prometheus servers on the same network as
the monitored instances.
For batch jobs that are related to a machine (such as automatic
security update cronjobs or configuration management client runs), expose the
resulting metrics using the [Node Exporter's](https://github.com/prometheus/node_exporter)
textfile module instead of the Pushgateway.
...@@ -158,6 +158,16 @@ for quantiles located in the lowest bucket. ...@@ -158,6 +158,16 @@ for quantiles located in the lowest bucket.
If `b` contains fewer than two buckets, `NaN` is returned. For φ < 0, `-Inf` is If `b` contains fewer than two buckets, `NaN` is returned. For φ < 0, `-Inf` is
returned. For φ > 1, `+Inf` is returned. returned. For φ > 1, `+Inf` is returned.
## `holt_winters()`
`holt_winters(v range-vector, sf scalar, tf scalar)` produces a smoothed value
for time series based on the range in `v`. The lower the smoothing factor `sf`,
the more importance is given to old data. The higher the trend factor `tf`, the
more trends in the data is considered. Both `sf` and `tf` must be between 0 and
1.
`holt_winters` should only be used with gauges.
## `increase()` ## `increase()`
`increase(v range-vector)` calculates the increase in the `increase(v range-vector)` calculates the increase in the
......
...@@ -102,17 +102,20 @@ matching behavior: ...@@ -102,17 +102,20 @@ matching behavior:
**One-to-one** finds a unique pair of entries from each side of the operation. **One-to-one** finds a unique pair of entries from each side of the operation.
In the default case, that is an operation following the format `vector1 <operator> vector2`. In the default case, that is an operation following the format `vector1 <operator> vector2`.
Two entries match if they have the exact same set of labels and corresponding values. Two entries match if they have the exact same set of labels and corresponding values.
The `on` keyword allows reducing the set of considered labels to a provided list: The `ignoring` keyword allows ignoring certain labels when matching, while the
`on` keyword allows reducing the set of considered labels to a provided list:
<vector expr> <bin-op> ignoring(<label list>) <vector expr>
<vector expr> <bin-op> on(<label list>) <vector expr> <vector expr> <bin-op> on(<label list>) <vector expr>
Example input: Example input:
method:http_errors:rate5m{source="internal", method="get", code="500"} 24 method_code:http_errors:rate5m{method="get", code="500"} 24
method:http_errors:rate5m{source="external", method="get", code="404"} 30 method_code:http_errors:rate5m{method="get", code="404"} 30
method:http_errors:rate5m{source="internal", method="put", code="501"} 3 method_code:http_errors:rate5m{method="put", code="501"} 3
method:http_errors:rate5m{source="internal", method="post", code="500"} 6 method_code:http_errors:rate5m{method="post", code="500"} 6
method:http_errors:rate5m{source="external", method="post", code="404"} 21 method_code:http_errors:rate5m{method="post", code="404"} 21
method:http_requests:rate5m{method="get"} 600 method:http_requests:rate5m{method="get"} 600
method:http_requests:rate5m{method="del"} 34 method:http_requests:rate5m{method="del"} 34
...@@ -120,35 +123,41 @@ Example input: ...@@ -120,35 +123,41 @@ Example input:
Example query: Example query:
method:http_errors:rate5m{code="500"} / on(method) method:http_requests:rate5m method_code:http_errors:rate5m{code="500"} / ignoring(code) method:http_requests:rate5m
This returns a result vector containing the fraction of HTTP requests with status code This returns a result vector containing the fraction of HTTP requests with status code
of 500 for each method, as measured over the last 5 minutes. Without `on(method)` there of 500 for each method, as measured over the last 5 minutes. Without `ignoring(code)` there
would have been no match as the metrics do not share the same set of labels. would have been no match as the metrics do not share the same set of labels.
The entries with methods `put` and `del` have no match and will not show up in the result: The entries with methods `put` and `del` have no match and will not show up in the result:
{method="get"} 0.04 // 24 / 600 {method="get"} 0.04 // 24 / 600
{method="post"} 0.1 // 12 / 120 {method="post"} 0.1 // 12 / 120
**Many-to-one** and **one-to-many** matchings refer to the case where each vector element on **Many-to-one** and **one-to-many** matchings refer to the case where each vector element on
the "one"-side can match with multiple elements on the "many"-side. This has to the "one"-side can match with multiple elements on the "many"-side. This has to
be explicitly requested using the `group_left` or `group_right` modifier, where be explicitly requested using the `group_left` or `group_right` modifier, where
left/right determines which vector has the higher cardinality. left/right determines which vector has the higher cardinality.
<vector expr> <bin-op> ignoring(<label list>) group_left(<label list>) <vector expr>
<vector expr> <bin-op> ignoring(<label list>) group_right(<label list>) <vector expr>
<vector expr> <bin-op> on(<label list>) group_left(<label list>) <vector expr> <vector expr> <bin-op> on(<label list>) group_left(<label list>) <vector expr>
<vector expr> <bin-op> on(<label list>) group_right(<label list>) <vector expr> <vector expr> <bin-op> on(<label list>) group_right(<label list>) <vector expr>
The label list provided with the group modifier contains additional labels from the "many"-side The label list provided with the group modifier contains additional labels from
to be included in the result metrics. A label can only appear in one of the lists. Every time the "one"-side to be included in the result metrics. For `on` a label can only
series of the result vector must be uniquely identifiable by the labels from both lists combined. appear in one of the lists. Every time series of the result vector must be
uniquely identifiable.
_Grouping modifiers can only be used for [comparison/filtering](#comparison-/-filter-binary-operators) _Grouping modifiers can only be used for
and [arithmetic](#arithmetic-binary-operators) operations as `and` and `or` operations [comparison](#comparison-binary-operators) and
match with all possible entries in the right vector by default._ [arithmetic](#arithmetic-binary-operators). Operations as `and`, `unless` and
`or` operations match with all possible entries in the right vector by
default._
Example query: Example query:
method:http_errors:rate5m / on(method) group_left(code,source) method:http_requests:rate5m method_code:http_errors:rate5m / ignoring(code) group_left method:http_requests:rate5m
In this case the left vector contains more than one entry per `method` label value. Thus, In this case the left vector contains more than one entry per `method` label value. Thus,
we indicate this using `group_left`. To ensure that the result vector entries are unique, additional we indicate this using `group_left`. To ensure that the result vector entries are unique, additional
...@@ -156,14 +165,13 @@ labels have to be provided. Either `code` or `source` satisfy this requirement, ...@@ -156,14 +165,13 @@ labels have to be provided. Either `code` or `source` satisfy this requirement,
can be added for a more detailed result. The elements from the right side can be added for a more detailed result. The elements from the right side
are now matched with multiple elements with the same `method` label on the left: are now matched with multiple elements with the same `method` label on the left:
{source="internal", method="get", code="500"} 0.04 // 24 / 600 {method="get", code="500"} 0.04 // 24 / 600
{source="external", method="get", code="404"} 0.05 // 30 / 600 {method="get", code="404"} 0.05 // 30 / 600
{source="internal", method="post", code="500"} 0.1 // 12 / 120 {method="post", code="500"} 0.1 // 12 / 120
{source="external", method="post", code="404"} 0.175 // 21 / 120 {method="post", code="404"} 0.175 // 21 / 120
_Many-to-one and one-to-many matching are advanced use cases that should be carefully considered. _Many-to-one and one-to-many matching are advanced use cases that should be carefully considered.
Often a proper use of `on(<labels>)` provides the desired outcome._ Often a proper use of `ignoring(<labels>)` provides the desired outcome._
## Aggregation operators ## Aggregation operators
...@@ -187,7 +195,7 @@ or preserve distinct dimensions by including a `without` or `by` clause. ...@@ -187,7 +195,7 @@ or preserve distinct dimensions by including a `without` or `by` clause.
`without` removes the listed labels from the result vector, while all other `without` removes the listed labels from the result vector, while all other
labels are preserved the output. `by` does the opposite and drops labels that labels are preserved the output. `by` does the opposite and drops labels that
are not listed in the `by` clause, even if their label values are identical are not listed in the `by` clause, even if their label values are identical
between all elements of the vector. The `keep_common` clause allows to keep between all elements of the vector. The `keep_common` clause allows keeping
those extra labels (labels that are identical between elements, but not in the those extra labels (labels that are identical between elements, but not in the
`by` clause). `by` clause).
......
--- ---
title: Grafana title: Grafana
sort_rank: 7 sort_rank: 2
--- ---
# Grafana support for Prometheus # Grafana support for Prometheus
......
--- ---
title: PromDash title: PromDash
sort_rank: 2 sort_rank: 7
toc: full-width
--- ---
# PromDash # PromDash
CAUTION: <b>NOTE:</b> We recommend [Grafana](/docs/visualization/grafana) for
visualization of Prometheus metrics nowadays, as it has native Prometheus
support and is widely adopted and powerful. There will be less focus on
PromDash development in the future.
## Overview ## Overview
PromDash is a browser-based dashboard builder for Prometheus. It is a PromDash is a browser-based dashboard builder for Prometheus. It is a
......
...@@ -39,7 +39,7 @@ If functions are used in a pipeline, the pipeline value is passed as the last ar ...@@ -39,7 +39,7 @@ If functions are used in a pipeline, the pipeline value is passed as the last ar
| Name | Arguments | Returns | Notes | | Name | Arguments | Returns | Notes |
| ------------- | ------------- | -------- | -------- | | ------------- | ------------- | -------- | -------- |
| query | query string | []sample | Queries the databases, does not support returning range vectors. | | query | query string | []sample | Queries the database, does not support returning range vectors. |
| first | []sample | sample | Equivalent to `index a 0` | | first | []sample | sample | Equivalent to `index a 0` |
| label | label, sample | string | Equivalent to `index sample.Labels label` | | label | label, sample | string | Equivalent to `index sample.Labels label` |
| value | sample | float64 | Equivalent to `sample.Value` | | value | sample | float64 | Equivalent to `sample.Value` |
......
---
title: Download
---
<div class="row">
<div class="col-md-12 doc-content download">
<h1>Download</h1>
<div class="toc toc-right">
<ul>
<% Downloads.repositories.each do |repo| %>
<li><a href="#<%= repo.name %>"><%= repo.name %></a></li>
<% end %>
</ul>
</div>
<p>
We provide precompiled binaries and <a href="https://hub.docker.com/r/prom/">Docker images</a>
for most officially maintained Prometheus components. If a component is
not listed here, check the respective repository on Github for further
instructions.
</p>
<p>
There is also a constantly growing number of independently maintained
exporters listed at <a href="/docs/instrumenting/exporters/">Exporters
and integrations</a>.
</p>
<p>
After downloading a binary release suitable for your system, please follow
the <a href="/docs/introduction/getting_started/">installation instructions</a>.
</p>
<div class="alert alert-info" role="alert">
<strong>Work in progress!</strong>
We will provide more precompiled binary versions as well as checksums soon.
</div>
<div class="panel panel-default download-selection">
<div class="panel-body">
Operating system <%= dropdown(:os, Downloads.operating_systems, :popular, popular: %w(darwin linux windows)) %>
Architecture <%= dropdown(:arch, Downloads.architectures, :amd64) %>
</div>
</div>
<% Downloads.repositories.each do |repository| %>
<h2 id="<%= repository.name %>"><%= repository.name %></h2>
<p><%= repository.description %> <a href="<%= repository.url %>"><i class="fa fa-github"></i> <%= repository.full_name %></a></p>
<table class="table table-bordered downloads">
<% repository.releases.each do |release| %>
<thead>
<tr>
<td colspan="5">
<strong><%= release.name %></strong>
<%= %(<span class="label label-primary">Pre-release</span>) if release.prerelease %>
<small><a href="<%= release.url %>">Release notes</a></small>
</td>
</tr>
<tr class="first">
<th>File name</th>
<th>OS</th>
<th>Arch</th>
<th>Size</th>
<th>SHA256 Checksum</th>
</tr>
</thead>
<tbody>
<% release.assets.each do |asset| %>
<tr data-os="<%= asset.os %>" data-arch="<%= asset.arch %>">
<td class="filename"><a class="download" href="<%= asset.url %>"><%= asset.name %></a></td>
<td><%= asset.os %></td>
<td><%= asset.arch %></td>
<td><%= format_bytes asset.size %></td>
<td class="checksum">not available yet</td>
</tr>
<% end %>
</tbody>
<% end %>
</table>
<% end %>
</div>
</div>
This diff is collapsed.
---
title: Routing tree editor
---
<h1 id="routing-tree-editor" class="page-header">
Routing tree editor
<a class="header-anchor" href="#routing-tree-editor" name="routing-tree-editor"></a>
</h1>
<div class="form-group">
<p class="block">Copy and paste your Alertmanager config.yml:</p>
<div class="form-group">
<textarea class="js-config-yml form-control" cols="50" rows="10"></textarea>
</div>
<button type="button" class="js-parse-and-draw btn btn-default">Draw Routing Tree</button>
</div>
<div class="form-inline">
<div class="form-group">
<div class="form-group">
<input class="js-label-set-input label-input form-control" type="text" placeholder='{service="foo-service", severity="critical"}' \>
<button type="button" class="js-find-match btn btn-default">Match Label Set</button>
</div>
</div>
</div>
<script src="/assets/d3.v3.min.js"></script>
<script src="/assets/js-yaml.min.js"></script>
<script src="/assets/routing-tree.js"></script>
<% render 'default' do %> <% render 'default' do %>
<div class="row">
<div class="col-md-9 blog doc-content"> <div class="col-md-9 blog doc-content">
<h1><%= item[:title] %></h1> <h1><%= item[:title] %></h1>
<aside>Posted at: <%= get_pretty_date(item) %> by <%= item[:author_name]%></aside> <aside>Posted at: <%= get_pretty_date(item) %> by <%= item[:author_name]%></aside>
...@@ -22,4 +23,5 @@ ...@@ -22,4 +23,5 @@
</div> </div>
<%= render 'blog_sidebar' %> <%= render 'blog_sidebar' %>
</div>
<% end %> <% end %>
...@@ -2,10 +2,11 @@ ...@@ -2,10 +2,11 @@
<ul class="nav navbar-nav side-nav"> <ul class="nav navbar-nav side-nav">
<li> <li>
<span class="nav-header">Blog posts</span> <span class="nav-header">Blog posts</span>
<ul class="nav"> <ul class="nav active">
<% sorted_articles.each do |post| %> <% sorted_articles.each do |post| %>
<li><%= link_to post[:title], post.path %></li> <li><%= link_to post[:title], post.path %></li>
<% end %> <% end %>
</ul> </ul>
</li>
</ul> </ul>
</div> </div>
...@@ -2,6 +2,6 @@ ...@@ -2,6 +2,6 @@
<footer> <footer>
<p class="pull-left"> <p class="pull-left">
&copy; Prometheus Authors 2015 &copy; Prometheus Authors 2016
</p> </p>
</footer> </footer>
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
<div class="container"> <div class="container">
<%= yield %> <%= yield %>
<%= render 'container_footer' %>
</div> </div>
<%= render 'footer' %> <%= render 'footer' %>
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<div class="col-md-3 side-nav-col"> <div class="col-md-3 side-nav-col">
<ul class="nav navbar-nav side-nav"> <ul class="nav navbar-nav side-nav">
<% @items['/docs/'].children.sort_by { |i| i[:sort_rank] || 0 }.each do |i| %> <% @items['/docs/'].children.sort_by { |i| i[:sort_rank] || 0 }.each do |i| %>
<%= nav(i, @item) %> <%= nav(i) %>
<% end %> <% end %>
</ul> </ul>
</div> </div>
......
<!-- Bootstrap core JavaScript <!-- Bootstrap core JavaScript
================================================== --> ================================================== -->
<!-- Placed at the end of the document so the pages load faster --> <!-- Placed at the end of the document so the pages load faster -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script> <script src="https://code.jquery.com/jquery-2.2.2.min.js" integrity="sha256-36cp2Co+/62rEAAYHLmRCPIych47CvdM+uTBJwSzWjI=" crossorigin="anonymous"></script>
<script src="/assets/bootstrap-3.3.1/js/bootstrap.min.js"></script> <script src="/assets/bootstrap-3.3.1/js/bootstrap.min.js"></script>
<script src="/assets/docs.js"></script>
<!-- IE10 viewport hack for Surface/desktop Windows 8 bug --> <!-- IE10 viewport hack for Surface/desktop Windows 8 bug -->
<script src="/assets/ie10-viewport-bug-workaround.js"></script> <script src="/assets/ie10-viewport-bug-workaround.js"></script>
<!-- Google Analytics --> <!-- Google Analytics -->
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
<meta charset="utf-8"> <meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="Prometheus monitoring system and time series database"> <meta name="description" content="An open-source monitoring system with a dimensional data model, flexible query language, efficient time series database and modern alerting approach.">
<meta name="keywords" content="prometheus, monitoring, monitoring system, time series, time series database, alerting, metrics, telemetry"> <meta name="keywords" content="prometheus, monitoring, monitoring system, time series, time series database, alerting, metrics, telemetry">
<meta name="author" content="Prometheus"> <meta name="author" content="Prometheus">
...@@ -32,28 +32,26 @@ ...@@ -32,28 +32,26 @@
<% if @item[:title] %> <% if @item[:title] %>
<title><%= @item[:title] %> | Prometheus</title> <title><%= @item[:title] %> | Prometheus</title>
<% else %> <% else %>
<title>Prometheus</title> <title>Prometheus - Monitoring system &amp; time series database</title>
<% end %> <% end %>
<!-- Bootstrap core CSS --> <!-- Bootstrap core CSS -->
<link href="/assets/bootstrap-3.3.1/css/bootstrap.min.css" rel="stylesheet"> <link href="/assets/bootstrap-3.3.1/css/bootstrap.min.css" rel="stylesheet">
<!-- Custom styles for this template --> <!-- Custom styles for this template -->
<link href="/assets/docs.css" rel="stylesheet"> <link href="/css/docs.css" rel="stylesheet">
<link href="/css/routing-tree-editor.css" rel="stylesheet">
<!-- Custom Fonts --> <!-- Custom Fonts -->
<link href="/assets/font-awesome-4.2.0/css/font-awesome.min.css" rel="stylesheet" type="text/css"> <link href="/assets/font-awesome-4.2.0/css/font-awesome.min.css" rel="stylesheet" type="text/css">
<link rel="stylesheet" type="text/css" href="//fonts.googleapis.com/css?family=Open+Sans"> <link href='https://fonts.googleapis.com/css?family=Open+Sans' rel='stylesheet' type='text/css'>
<link href='https://fonts.googleapis.com/css?family=Lato:300,300italic,400' rel='stylesheet' type='text/css'>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head> </head>
<body> <body>
<div class="<%= @item[:layout] == 'jumbotron' ? 'navbar-jumbotron' : '' %>">
<nav class="navbar navbar-inverse navbar-static-top" role="navigation"> <nav class="navbar navbar-inverse navbar-static-top" role="navigation">
<div class="container"> <div class="container">
<div class="navbar-header"> <div class="navbar-header">
...@@ -67,12 +65,14 @@ ...@@ -67,12 +65,14 @@
</div> </div>
<div class="collapse navbar-collapse" id="navbar"> <div class="collapse navbar-collapse" id="navbar">
<ul class="nav navbar-nav navbar-right main-nav"> <ul class="nav navbar-nav navbar-right main-nav">
<li><a href="/">Overview</a></li> <li><a href="/docs/introduction/overview/">Docs</a></li>
<li><a href="/docs/introduction/overview/">Documentation</a></li> <li><a href="/download/">Download</a></li>
<li><a href="/community/">Community</a></li> <li><a href="/community/">Community</a></li>
<li><a href="/blog/">Blog</a></li> <li><a href="/blog/">Blog</a></li>
<li><a href="https://github.com/prometheus"><i class="fa fa-github"></i> Github</a></li> <li><a href="https://github.com/prometheus"><i class="fa fa-github"></i></a></li>
<li><a href="https://twitter.com/PrometheusIO"><i class="fa fa-twitter"></i></a></li>
</ul> </ul>
</div> </div>
</div> </div>
</nav> </nav>
</div>
<%= render 'header' %> <%= render 'header' %>
<%= yield %>
<div class="jumbotron">
<div class="container">
<h1><img src="/assets/prometheus_logo.svg" alt="Prometheus logo"> Prometheus</h1>
<p class="subtitle">An open-source service monitoring system and time series database.</p>
<p><a class="btn btn-default btn-lg" href="/docs/introduction/getting_started/" role="button">Get Started</a></p>
</div>
</div>
<div class="container">
<%= yield %>
<%= render 'container_footer' %>
</div>
<%= render 'footer' %> <%= render 'footer' %>
# All files in the 'lib' directory will be loaded # All files in the 'lib' directory will be loaded
# before nanoc starts compiling. # before nanoc starts compiling.
require 'nanoc/cachebuster'
include Nanoc::Helpers::LinkTo include Nanoc::Helpers::LinkTo
include Nanoc::Helpers::Rendering include Nanoc::Helpers::Rendering
include Nanoc3::Helpers::Blogging include Nanoc3::Helpers::Blogging
include Nanoc3::Helpers::Tagging include Nanoc3::Helpers::Tagging
include Nanoc::Helpers::CacheBusting
module BlogHelper module BlogHelper
def get_pretty_date(post) def get_pretty_date(post)
...@@ -15,10 +18,9 @@ module BlogHelper ...@@ -15,10 +18,9 @@ module BlogHelper
content = post.compiled_content content = post.compiled_content
if content =~ /\s<!-- more -->\s/ if content =~ /\s<!-- more -->\s/
content = content.partition('<!-- more -->').first + content = content.partition('<!-- more -->').first +
"<div class='read-more'><a href='#{post.path}'>Continue reading &rsaquo;</a></div>" "<div class='read-more'><a class='btn btn-primary' href='#{post.path}'>Continue reading &raquo;</a></div>"
end end
return content return content
end end
end end
include BlogHelper include BlogHelper
require 'json'
module Downloads
# repositories returns a list of all repositories with releases.
def self.repositories
@repositories ||= begin
repos = Dir.glob('downloads/*').map { |dir| Repository.new(dir) }
repos.sort_by { |r| r.name == 'prometheus' ? '0' : r.name }
end
end
# operating_systems returns a list of all operating systems downloads can be
# provided for.
def self.operating_systems
repositories.inject([]) do |list, repo|
list += repo.releases.map { |r| r.assets.map(&:os) }.flatten
end.uniq.sort
end
# architectures returns a list of all architectures downloads can be
# provided for.
def self.architectures
repositories.inject([]) do |list, repo|
list += repo.releases.map { |r| r.assets.map(&:arch) }.flatten
end.uniq.sort
end
class Repository
def initialize(dir)
@repo = JSON.parse(File.read(File.join(dir, 'repo.json')))
@releases = JSON.parse(File.read(File.join(dir, 'releases.json')))
end
def name
@repo['name']
end
def full_name
@repo['full_name']
end
def description
@repo['description']
end
def url
@repo['html_url']
end
def releases
releases = []
@releases.each do |r|
if r['prerelease']
releases << r if releases.empty?
else
releases << r
break
end
end
releases.map { |r| Release.new(r) }
end
end
class Release
def initialize(data)
@data = data
end
def name
@data['name']
end
def url
@data['html_url']
end
def prerelease
@data['prerelease']
end
def assets
@data['assets'].map { |d| Asset.new(d) }
end
end
class Asset
def initialize(data)
@data = data
end
def name
@data['name']
end
def url
@data['browser_download_url']
end
def kind
'Binary'
end
# TODO(ts): validate
def os
name.split('.')[3].split('-').first
end
# TODO(ts): validate
def arch
name.split('.')[3].split('-').last
end
def size
@data['size']
end
end
module Helper
def format_bytes(bytes)
'%.2f MiB' % (bytes.to_f / 1024 / 1024)
end
def dropdown(name, items, default, groups = {})
additional = groups.map do |name, items|
%(<li data-group="#{items.join(' ')}"><a href="#">#{name}</a></li>)
end.join('')
caption = %(<span class="caption">#{default}</span> <span class="caret"></span>)
button = %(<button type="button" class="btn btn-default dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">#{caption}</button>)
header = %(<li><a href="#">all</a></li>#{additional}<li role="separator" class="divider"></li>)
list = %(<ul class="dropdown-menu">#{header} #{items.map { |i| %(<li><a href="#">#{i}</a></li>) }.join('') }</ul>)
%(<div class="btn-group #{name}">#{button} #{list}</div>)
end
end
end
include Downloads::Helper
def nav_title_of(i) def nav(root_item, buffer='', layer=0)
i[:nav_title] || i[:title] || ''
end
def nav(root_item, focused_item, buffer='', layer=0)
# Skip non-written or hidden items
return buffer if root_item.nil? || root_item.path.nil? || root_item[:is_hidden] return buffer if root_item.nil? || root_item.path.nil? || root_item[:is_hidden]
# Open list element children = nav_children(root_item)
is_active = @item_rep && @item_rep.path == root_item.path
if is_active if nav_active?(root_item)
buffer << "<li class=\"active\">" buffer << "<li class=\"active\">"
else else
buffer << "<li>" buffer << "<li>"
...@@ -16,29 +11,36 @@ def nav(root_item, focused_item, buffer='', layer=0) ...@@ -16,29 +11,36 @@ def nav(root_item, focused_item, buffer='', layer=0)
title = nav_title_of(root_item) title = nav_title_of(root_item)
if layer == 0 if layer == 0
# Add section header. buffer << "<span class=\"nav-header\"><i class=\"fa fa-#{root_item[:nav_icon]}\"></i> <span>#{title}</span></span>"
buffer << "<span class=\"nav-header\"><i class=\"fa fa-#{root_item[:nav_icon]}\"></i> #{title}</span>"
else else
# Add link.
buffer << link_to(title, root_item.path) buffer << link_to(title, root_item.path)
end end
# Add children to sitemap, recursively if children.any?
visible_children = root_item.children.select { |child| !child[:is_hidden] && child.path } buffer << %(<ul class="nav #{nav_active?(root_item) ? 'active' : ''}">)
visible_children = visible_children.sort_by { |child| child[:sort_rank] || 0 }
if visible_children.size > 0
buffer << '<ul class="nav">'
visible_children.each do |child| children.each do |child|
nav(child, focused_item, buffer, layer + 1) nav(child, buffer, layer + 1)
end end
buffer << '</ul>' buffer << '</ul>'
end end
# Close list element
buffer << '</li>' buffer << '</li>'
# Return sitemap
buffer buffer
end end
def nav_active?(item)
active = @item_rep.respond_to?(:path) && @item_rep.path == item.path
active || nav_children(item).any? { |child| nav_active?(child) }
end
def nav_title_of(i)
i[:nav_title] || i[:title] || ''
end
def nav_children(item)
item.children
.select { |child| !child[:is_hidden] && child.path }
.sort_by { |child| child[:sort_rank] || 0 }
end
...@@ -77,4 +77,4 @@ checks: ...@@ -77,4 +77,4 @@ checks:
exclude: [] exclude: []
# The base url required by atom_feed # The base url required by atom_feed
base_url: "http://prometheus.io" base_url: "https://prometheus.io"
This source diff could not be displayed because it is too large. You can view the blob instead.
// Use CSS to hide elements without a delay during page load.
$('head').append('<style type="text/css"> \
.side-nav ul { display: none; } \
.side-nav ul.active { display: block; } \
</style>');
$(document).ready(function() {
var navToggle = function(event) {
event.preventDefault();
var visible = $(this).closest('li').children('ul.nav').is(':visible');
$(this).closest('ul').find('ul.nav').slideUp(200);
if (!visible) {
$(this).closest('li').children('ul.nav').slideDown(200);
}
};
$('.nav-header span').each(function() {
var link = $('<a href="#">').text($(this).text()).click(navToggle);
$(this).replaceWith(link);
});
var selected = function(value, want, group) {
switch(want) {
case 'all':
return true;
default:
if (group.length > 0) {
return group.indexOf(value) > -1;
}
return value === want;
}
}
var selectDownloads = function() {
var os = $('.download-selection .os .caption').text();
var osGroup = $('.download-selection .os li:contains("'+os+'")').data("group");
var arch = $('.download-selection .arch .caption').text();
$('.downloads tbody tr').each(function() {
if (selected($(this).data('os').toString(), os, osGroup !== undefined ? osGroup.split(' ') : [])
&& selected($(this).data('arch').toString(), arch, [])) {
$(this).show();
} else {
$(this).hide();
}
});
};
selectDownloads();
$('.download-selection a').on('click', function() {
event.preventDefault();
$(this).parents('.btn-group').find('button .caption').text($(this).text());
selectDownloads();
});
});
This diff is collapsed.
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 16.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
width="115.333px" height="114px" viewBox="0 0 115.333 114" enable-background="new 0 0 115.333 114" xml:space="preserve">
<g id="Layer_2">
</g>
<g>
<path fill="#FFFFFF" d="M56.667,0.667C25.372,0.667,0,26.036,0,57.332c0,31.295,25.372,56.666,56.667,56.666
s56.666-25.371,56.666-56.666C113.333,26.036,87.961,0.667,56.667,0.667z M56.667,106.722c-8.904,0-16.123-5.948-16.123-13.283
H72.79C72.79,100.773,65.571,106.722,56.667,106.722z M83.297,89.04H30.034v-9.658h53.264V89.04z M83.106,74.411h-52.92
c-0.176-0.203-0.356-0.403-0.526-0.609c-5.452-6.62-6.736-10.076-7.983-13.598c-0.021-0.116,6.611,1.355,11.314,2.413
c0,0,2.42,0.56,5.958,1.205c-3.397-3.982-5.414-9.044-5.414-14.218c0-11.359,8.712-21.285,5.569-29.308
c3.059,0.249,6.331,6.456,6.552,16.161c3.252-4.494,4.613-12.701,4.613-17.733c0-5.21,3.433-11.262,6.867-11.469
c-3.061,5.045,0.793,9.37,4.219,20.099c1.285,4.03,1.121,10.812,2.113,15.113C63.797,33.534,65.333,20.5,71,16
c-2.5,5.667,0.37,12.758,2.333,16.167c3.167,5.5,5.087,9.667,5.087,17.548c0,5.284-1.951,10.259-5.242,14.148
c3.742-0.702,6.326-1.335,6.326-1.335l12.152-2.371C91.657,60.156,89.891,67.418,83.106,74.411z"/>
</g>
</svg>
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment