Skip to content

Установка Prometheus

Создание пользователя prometheus и группу prometheus, от имени которых вы будете запускать prometheus

bash
groupadd --system prometheus
useradd --system -g prometheus -s /bin/false prometheus

Скачайте архив prometheus и распакуйте его в папку /tmp

bash
wget https://github.com/prometheus/prometheus/releases/download/v2.31.1/prometheus-2.31.1.linux-amd64.tar.gz -O - | tar -xzv -C /tmp

Создайте директорию для конфигурационного файла

bash
mkdir /etc/prometheus

Создайте директорию для данных

bash
mkdir /var/lib/prometheus

Скопируйте содержимое распакованного архива:

bash
cp /tmp/prometheus-2.31.1.linux-amd64/prometheus /usr/local/bin
cp /tmp/prometheus-2.31.1.linux-amd64/promtool /usr/local/bin
cp -r /tmp/prometheus-2.31.1.linux-amd64/console* /etc/prometheus

Удалите содержимое распакованного архива из папки /tmp

bash
rm -rf /tmp/prometheus-2.30.3.linux-amd64/

Создайте конфигурационный файл /etc/prometheus/prometheus.yml со следующим содержимым

yaml
global:
  scrape_interval: 30s
  evaluation_interval: 30s

scrape_configs:
  - job_name: "prometheus"
    static_configs:
      - targets: ["localhost:9090"]

Измените владельца созданных файлов

bash
chown -R prometheus:prometheus /var/lib/prometheus /etc/prometheus
chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool

Создайте сценарий запуска systemd сервиса Prometheus. Для этого создайте файл /etc/systemd/system/prometheus.service со следующим содержимым

ini
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target

[Service]
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/prometheus \
    --config.file /etc/prometheus/prometheus.yml \
    --storage.tsdb.path /var/lib/prometheus \
    --web.console.templates=/etc/prometheus/consoles \
    --web.console.libraries=/etc/prometheus/console_libraries
ExecReload=/bin/kill -HUP $MAINPID
[Install]
WantedBy=default.target

Изменить порт, добавить в настройки службы /etc/systemd/system/prometheus.service

ini
--web.listen-address=:9901

Запустите Prometheus

bash
systemctl daemon-reload
systemctl start prometheus.service
systemctl enable prometheus.service
systemctl status prometheus.service

Защита Prometheus /root/gen-pass.py

Установка ПО

bash
apt-get install -y python3 python3-bcrypt

Создание скрипта

python
import getpass
import bcrypt

password = getpass.getpass("password: ")
hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt())
print(hashed_password.decode())

Генерация пароля

bash
python3 /root/gen-pass.py

Создание конфига /etc/prometheus/web.yml

yaml
basic_auth_users:
    admin: $2b$12$hNf2lSsxfm0.i4a.1kVpSOVyBCfIB51VRjgBUyv6kdnyTlgWj81Ay

Проверка конфига

bash
promtool check config /etc/prometheus/prometheus.yml

Проверка web конфига

bash
promtool check web-config /etc/prometheus/web.yml

Прорка правил

bash
promtool check rules /etc/prometheus/alert.rules.yml

Измените владельца созданных файлов

bash
chown prometheus:prometheus /etc/prometheus/web.yml

Добавить в настройки службы конфиг /etc/systemd/system/prometheus.service

ini
--web.config.file=/etc/prometheus/web.yml

Добавить в конфиг /etc/prometheus/prometheus.yml

yaml
  - job_name: "prometheus"
    static_configs:
      - targets: ["localhost:9090"]
    basic_auth:
      username: admin 
      password: admin

Nginx config for Prometheus

nginx
server {
  listen 8091;
  listen [::]:8091;
  server_name 95.213.159.131;

  access_log  /var/log/nginx/prometheus_access.log;
  error_log   /var/log/nginx/prometheus_error.log;

  location / {
    gzip off;

    proxy_read_timeout      300;
    proxy_connect_timeout   300;
    proxy_redirect          off;

    proxy_set_header    Host                $http_host;
    proxy_set_header    X-Real-IP           $remote_addr;
    proxy_set_header    X-Forwarded-For     $proxy_add_x_forwarded_for;
    proxy_set_header    X-Forwarded-Proto   $scheme;

    proxy_pass http://127.0.0.1:9901;
  }
}

Установка Node_exporter

Скачайте архив node_exporter и распакуйте его в папку /tmp

bash
wget https://github.com/prometheus/node_exporter/releases/download/v1.2.2/node_exporter-1.2.2.linux-amd64.tar.gz -O - | tar -xzv -C /tmp

Скопируйте содержимое распакованного архива в папку /usr/local/bin

bash
cp /tmp/node_exporter-1.2.2.linux-amd64/node_exporter /usr/local/bin

Измените владельца созданных файлов

bash
chown -R prometheus:prometheus /usr/local/bin/node_exporter

Создайте сценарий запуска systemd сервиса node_exporter. Для этого создайте файл /etc/systemd/system/node_exporter.service со следующим содержимым

ini
[Unit]
Description=Prometheus Node Exporter
After=network.target

[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/node_exporter \
    --collector.systemd

[Install]
WantedBy=multi-user.target

Изменить порт

ini
--web.listen-address=:9902

Запустите node_exporter

bash
systemctl daemon-reload
systemctl start node_exporter.service
systemctl enable node_exporter.service
systemctl status node_exporter.service

В файла /etc/prometheus/prometheus.yml в секцию scrape_configs добавьте секцию работы с node_exporter

yaml
  - job_name: "node_exporter"
    scrape_interval: 30s
    static_configs:
      - targets: ["localhost:9100"]
        labels:
          alias: localhost
yaml
 - job_name: "node_exporter_clients"
   static_configs:
    - targets: ['192.168.1.117:9100']
      labels:
        instance: 'linux-ina'
    - targets: ['192.168.1.138:9100']
      labels:
        instance: 'linux-inb'

Защита node_exporter

Создать конфиг /etc/prometheus/node_exporter/web.yml

yaml
basic_auth_users:
    admin: $2b$12$dqdAESkuJGrwXm0OhiJ/Z.bwAE9xntCsA8.EpihHNHXZCCHiKu43a

Измените владельца созданных файлов

bash
chown -R prometheus:prometheus /etc/prometheus/node_exporter/

Добавить в настройки службы конфиг /etc/systemd/system/node_exporter.service

ini
--web.config=/etc/prometheus/node_exporter/web.yml

Добавить в конфиг /etc/prometheus/prometheus.yml

yaml
- job_name: "node_exporter"
  static_configs:
    - targets: ["localhost:9100"]
  basic_auth:
    username: admin 
    password: admin

Bash скрипт

bash
groupadd --system prometheus
useradd --system -g prometheus -s /bin/false prometheus

wget https://github.com/prometheus/node_exporter/releases/download/v1.2.2/node_exporter-1.2.2.linux-amd64.tar.gz -O - | tar -xzv -C /tmp
cp /tmp/node_exporter-1.2.2.linux-amd64/node_exporter /usr/local/bin
rm -rf /tmp/node_exporter-1.2.2.linux-amd64/

mkdir /etc/prometheus

echo -e 'basic_auth_users:\n  admin: $2b$12$A845HEbsu7MNJirEYlNcCehIJAE3OjW1RnNe8XaUauxvRz3.oqdy.' >> /etc/prometheus/web.yml

chown -R prometheus:prometheus /usr/local/bin/node_exporter
chown -R prometheus:prometheus /etc/prometheus

cat <<'EOF' > /etc/systemd/system/node_exporter.service
[Unit]
Description=Prometheus Node Exporter
After=network.target

[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/node_exporter \
    --web.config=/etc/prometheus/web.yml \
    --web.listen-address=:9902 \
    --collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/) \
    --collector.systemd

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl start node_exporter.service
systemctl enable node_exporter.service
systemctl status node_exporter.service

Установка blackbox_exporter

Скачайте архив blackbox_exporter и распакуйте его в папку /tmp

bash
wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.19.0/blackbox_exporter-0.19.0.linux-amd64.tar.gz -O - | tar -xzv -C /tmp

Скопируйте содержимое распакованного архива в папку /usr/local/bin

bash
cp /tmp/blackbox_exporter-0.19.0.linux-amd64/blackbox_exporter /usr/local/bin
mkdir /etc/prometheus/blackbox_exporter/
cp /tmp/blackbox_exporter-0.19.0.linux-amd64/blackbox.yml /etc/prometheus/blackbox_exporter/

Измените владельца созданных файлов

bash
chown -R prometheus:prometheus /usr/local/bin/blackbox_exporter
chown -R prometheus:prometheus /etc/prometheus/blackbox_exporter/

Создайте сценарий запуска systemd сервиса blackbox_exporter. Для этого создайте файл /etc/systemd/system/blackbox_exporter.service со следующим содержимым

ini
[Unit]
Description=Prometheus Blackbox Exporter
After=network.target

[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/blackbox_exporter \
    --config.file /etc/prometheus/blackbox_exporter/blackbox.yml

[Install]
WantedBy=multi-user.target

Изменить порт

ini
--web.listen-address=:9903

Изменить конфиг /etc/prometheus/blackbox_exporter/blackbox.yml модуля http_2xx

yaml
  http_2xx:
    prober: http
    timeout: 5s
    http:
      preferred_ip_protocol: ip4

Запустите blackbox_exporter

bash
systemctl daemon-reload
systemctl start blackbox_exporter.service
systemctl enable blackbox_exporter.service
systemctl status blackbox_exporter.service

Добавить в конфиг /etc/prometheus/prometheus.yml

yaml
  - job_name: blackbox
    metrics_path: /metrics
    static_configs:
      - targets:
        - localhost:9115
  - job_name: 'blackbox_exporter'
    metrics_path: /probe
    params:
      module: [http_2xx]
    static_configs:
      - targets:
        - https://prometheus.io
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: localhost:9115

Закрыть порт 9115

bash
iptables -A INPUT -p tcp --dport 9115 -j DROP

Установка AlertManager

Скачайте архив alertmanager и распакуйте его в папку /tmp

bash
wget https://github.com/prometheus/alertmanager/releases/download/v0.23.0/alertmanager-0.23.0.linux-amd64.tar.gz -O - | tar -xzv -C /tmp

Создайте папку

bash
mkdir /etc/prometheus/alertmanager /var/lib/prometheus/alertmanager

Скопируйте содержимое распакованного архива в папку /usr/local/bin

bash
cp /tmp/alertmanager-0.23.0.linux-amd64/alertmanager /usr/local/bin
cp /tmp/alertmanager-0.23.0.linux-amd64/amtool /usr/local/bin
cp /tmp/alertmanager-0.23.0.linux-amd64/alertmanager.yml /etc/prometheus/alertmanager

Измените владельца созданных файлов

bash
chown -R prometheus:prometheus /etc/prometheus/alertmanager /var/lib/prometheus/alertmanager /usr/local/bin/alertmanager /usr/local/bin/amtool

Создайте сценарий запуска systemd сервиса alertmanager. Для этого создайте файл /etc/systemd/system/alertmanager.service со следующим содержимым

ini
[Unit]
Description=Prometheus Alertmanager
After=network.target

[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/alertmanager \
         --config.file=/etc/prometheus/alertmanager/alertmanager.yml \
         --storage.path=/var/lib/prometheus/alertmanager \
         --cluster.advertise-address=127.0.0.1:9093
ExecReload=/bin/kill -HUP $MAINPID

[Install]
WantedBy=multi-user.target

Изменить порт

ini
[Unit]
Description=Prometheus Alertmanager
After=network.target

[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/alertmanager \
         --config.file=/etc/prometheus/alertmanager/alertmanager.yml \
         --storage.path=/var/lib/prometheus/alertmanager \
         --cluster.listen-address=:9904 \
         --web.listen-address=:9905
ExecReload=/bin/kill -HUP $MAINPID

[Install]
WantedBy=multi-user.target

Запустите alertmanager

bash
systemctl daemon-reload
systemctl start alertmanager.service
systemctl enable alertmanager.service
systemctl status alertmanager.service

Создайте правило для мониторинга, при котором система будет считать, что виртуальный сервер работает неправильно /etc/prometheus/alert.rules.yml

yaml
groups:
- name: alert.rules
  rules:
  - alert: InstanceDown
    expr: up == 0
    for: 30s
    labels:
      severity: critical
    annotations:
      description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds. '
      summary: Instance {{ $labels.instance }} down

Поместите правило в config /etc/prometheus/prometheus.yml Prometheus и Alertmanager

yaml
rule_files:
  - 'alert.rules.yml'

alerting:
  alertmanagers:
  - static_configs:
    - targets:
      - 'localhost:9093'

Настройка уведомления на почту /etc/prometheus/alertmanager/alertmanager.yml

yaml
global:
  resolve_timeout: 5m
  smtp_from: bajdakov@runway-agency.ru

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'web.hook'
  routes:
    - receiver: 'web.hook'
      continue: true
    - receiver: 'email'
      continue: true
receivers:
- name: 'web.hook'
  webhook_configs:
  - url: 'http://127.0.0.1:5001/'
- name: 'email'
  email_configs:
  - to: 'bajdakov@runway-agency.ru'
    from: 'bajdakov@runway-agency.ru'
    smarthost: 'smtp.yandex.ru:587'
    require_tls: true
    auth_username: "bajdakov@runway-agency.ru"
    auth_identity: "bajdakov@runway-agency.ru"
    auth_password: "user877"
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

Проверка конфига

bash
amtool check-config /etc/prometheus/alertmanager/alertmanager.yml

Закрыть порт 9093

bash
iptables -A INPUT -p tcp --dport 9093 -j DROP

Закрыть порт кроме localhost

bash
iptables -A INPUT -p tcp ! -s 127.0.0.1 --dport 9905 -j DROP

Установка Grafana

Установите необходимое дополнительное ПО

bash
apt-get install -y software-properties-common wget apt-transport-https

Добавьте ключ репозитория Grafana

bash
wget -q -O - https://packages.grafana.com/gpg.key | apt-key add -

Добавьте репозиторий Grafana

bash
add-apt-repository "deb https://packages.grafana.com/oss/deb stable main"

Обновите репозитории и установите Grafana:

bash
apt-get update && apt-get -y install grafana

Смена порта /etc/grafana/grafana.ini

ini
[server]
http_port=9906

Запустите Grafana

bash
systemctl start grafana-server.service
systemctl enable grafana-server.service
systemctl status grafana-server.service

Dashbord Node Exporter Full

1860

Nginx config Grafana

nginx
map $http_upgrade $connection_upgrade {
  default upgrade;
  '' close;
}

server {
  listen 8092;
  listen [::]:8092;
  server_name 95.213.159.131;
  
  root /usr/share/nginx/html;
  index index.html index.htm;

  access_log  /var/log/nginx/grafana_access.log;
  error_log   /var/log/nginx/grafana_error.log;

  location / {
    proxy_pass http://localhost:9906/;
  }

  # Proxy Grafana Live WebSocket connections.
  location /api/live {
    proxy_http_version 1.1;
    proxy_set_header Upgrade $http_upgrade;
    proxy_set_header Connection "Upgrade";
    proxy_set_header Host $http_host;
    proxy_pass http://localhost:9906/;
  }
}

Проверка портов

bash
sudo lsof -i -P -n

Чтобы удалить развернутые инструменты:

Удалите Grafana:

bash
systemctl stop grafana-server.service
systemctl disable grafana-server.service
apt -y remove grafana

Удалите Node_exporter:

bash
systemctl stop node_exporter.service
systemctl disable node_exporter.service
rm /etc/systemd/system/node_exporter.service
rm -rf /opt/node_exporter

Удалите AlertManager:

bash
systemctl stop alertmanager.service
systemctl disable alertmanager.service
rm /etc/systemd/system/alertmanager.service
rm -rf /opt/alertmanager

Удалите Prometheus:

bash
systemctl stop prometheus.service
systemctl disable prometheus.service
rm /etc/systemd/system/prometheus.service
rm -rf /opt/prometheus

Удалите пользователя и группу:

bash
userdel prometheus
groupdel prometheus

Примеры правил

yaml
groups:
- name: alert.rules
  rules:
  - alert: InstanceDown
    expr: up == 0
    for: 30s
    labels:
      severity: critical
    annotations:
      description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds.'
      summary: Instance {{ $labels.instance }} down

  - alert: HostHighCpuLoad
    expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Host high CPU load (instance {{ $labels.instance }})
      description: "CPU load is > 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: HostOutOfMemory
    expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host out of memory (instance {{ $labels.instance }})
      description: "Node memory is filling up (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: HostUnusualDiskReadRate
    expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
    for: 20m
    labels:
      severity: warning
    annotations:
      summary: Host unusual disk read rate (instance {{ $labels.instance }})
      description: "Disk is probably reading too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: HostUnusualDiskWriteRate
    expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host unusual disk write rate (instance {{ $labels.instance }})
      description: "Disk is probably writing too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: HostOutOfDiskSpace
    expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host out of disk space (instance {{ $labels.instance }})
      description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: HostSystemdServiceCrashed
    expr: node_systemd_unit_state{state="failed"} == 1
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Host systemd service crashed (instance {{ $labels.instance }})
      description: "systemd service crashed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: HostSystemdServiceSiteInactive
    expr: node_systemd_unit_state{name=~"nginx.service|php7.4-fpm.service|php7.2-fpm.service|php5.6-fpm.service|mysql.service", state="inactive"} == 1
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Host systemd service crashed (instance {{ $labels.instance }})
      description: "systemd service crashed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"      
      
  - alert: BlackboxProbeFailed
    expr: probe_success{job="blackbox_exporter"} == 0
    for: 0m
    labels:
      severity: critical
    annotations:
      summary: Blackbox probe failed (instance {{ $labels.instance }})
      description: "Probe failed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: BlackboxSlowProbe
    expr: avg_over_time(probe_duration_seconds{job="blackbox_exporter"}[1m]) > 3
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: Blackbox slow probe (instance {{ $labels.instance }})
      description: "Blackbox probe took more than 1s to complete\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      
  - alert: BlackboxSslCertificateWillExpireSoon
    expr: probe_ssl_earliest_cert_expiry{job="blackbox_exporter"} - time() < 86400 * 7
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
      description: "SSL certificate expires in 7 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"