Appearance
Установка Prometheus
Создание пользователя prometheus и группу prometheus, от имени которых вы будете запускать prometheus
bash
groupadd --system prometheus
useradd --system -g prometheus -s /bin/false prometheusСкачайте архив prometheus и распакуйте его в папку /tmp
bash
wget https://github.com/prometheus/prometheus/releases/download/v2.31.1/prometheus-2.31.1.linux-amd64.tar.gz -O - | tar -xzv -C /tmpСоздайте директорию для конфигурационного файла
bash
mkdir /etc/prometheusСоздайте директорию для данных
bash
mkdir /var/lib/prometheusСкопируйте содержимое распакованного архива:
bash
cp /tmp/prometheus-2.31.1.linux-amd64/prometheus /usr/local/bin
cp /tmp/prometheus-2.31.1.linux-amd64/promtool /usr/local/bin
cp -r /tmp/prometheus-2.31.1.linux-amd64/console* /etc/prometheusУдалите содержимое распакованного архива из папки /tmp
bash
rm -rf /tmp/prometheus-2.30.3.linux-amd64/Создайте конфигурационный файл /etc/prometheus/prometheus.yml со следующим содержимым
yaml
global:
scrape_interval: 30s
evaluation_interval: 30s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]Измените владельца созданных файлов
bash
chown -R prometheus:prometheus /var/lib/prometheus /etc/prometheus
chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtoolСоздайте сценарий запуска systemd сервиса Prometheus. Для этого создайте файл /etc/systemd/system/prometheus.service со следующим содержимым
ini
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/prometheus \
--config.file /etc/prometheus/prometheus.yml \
--storage.tsdb.path /var/lib/prometheus \
--web.console.templates=/etc/prometheus/consoles \
--web.console.libraries=/etc/prometheus/console_libraries
ExecReload=/bin/kill -HUP $MAINPID
[Install]
WantedBy=default.targetИзменить порт, добавить в настройки службы /etc/systemd/system/prometheus.service
ini
--web.listen-address=:9901Запустите Prometheus
bash
systemctl daemon-reload
systemctl start prometheus.service
systemctl enable prometheus.service
systemctl status prometheus.serviceЗащита Prometheus /root/gen-pass.py
Установка ПО
bash
apt-get install -y python3 python3-bcryptСоздание скрипта
python
import getpass
import bcrypt
password = getpass.getpass("password: ")
hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt())
print(hashed_password.decode())Генерация пароля
bash
python3 /root/gen-pass.pyСоздание конфига /etc/prometheus/web.yml
yaml
basic_auth_users:
admin: $2b$12$hNf2lSsxfm0.i4a.1kVpSOVyBCfIB51VRjgBUyv6kdnyTlgWj81AyПроверка конфига
bash
promtool check config /etc/prometheus/prometheus.ymlПроверка web конфига
bash
promtool check web-config /etc/prometheus/web.ymlПрорка правил
bash
promtool check rules /etc/prometheus/alert.rules.ymlИзмените владельца созданных файлов
bash
chown prometheus:prometheus /etc/prometheus/web.ymlДобавить в настройки службы конфиг /etc/systemd/system/prometheus.service
ini
--web.config.file=/etc/prometheus/web.ymlДобавить в конфиг /etc/prometheus/prometheus.yml
yaml
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
basic_auth:
username: admin
password: adminNginx config for Prometheus
nginx
server {
listen 8091;
listen [::]:8091;
server_name 95.213.159.131;
access_log /var/log/nginx/prometheus_access.log;
error_log /var/log/nginx/prometheus_error.log;
location / {
gzip off;
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_redirect off;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://127.0.0.1:9901;
}
}Установка Node_exporter
Скачайте архив node_exporter и распакуйте его в папку /tmp
bash
wget https://github.com/prometheus/node_exporter/releases/download/v1.2.2/node_exporter-1.2.2.linux-amd64.tar.gz -O - | tar -xzv -C /tmpСкопируйте содержимое распакованного архива в папку /usr/local/bin
bash
cp /tmp/node_exporter-1.2.2.linux-amd64/node_exporter /usr/local/binИзмените владельца созданных файлов
bash
chown -R prometheus:prometheus /usr/local/bin/node_exporterСоздайте сценарий запуска systemd сервиса node_exporter. Для этого создайте файл /etc/systemd/system/node_exporter.service со следующим содержимым
ini
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/node_exporter \
--collector.systemd
[Install]
WantedBy=multi-user.targetИзменить порт
ini
--web.listen-address=:9902Запустите node_exporter
bash
systemctl daemon-reload
systemctl start node_exporter.service
systemctl enable node_exporter.service
systemctl status node_exporter.serviceВ файла /etc/prometheus/prometheus.yml в секцию scrape_configs добавьте секцию работы с node_exporter
yaml
- job_name: "node_exporter"
scrape_interval: 30s
static_configs:
- targets: ["localhost:9100"]
labels:
alias: localhostyaml
- job_name: "node_exporter_clients"
static_configs:
- targets: ['192.168.1.117:9100']
labels:
instance: 'linux-ina'
- targets: ['192.168.1.138:9100']
labels:
instance: 'linux-inb'Защита node_exporter
Создать конфиг /etc/prometheus/node_exporter/web.yml
yaml
basic_auth_users:
admin: $2b$12$dqdAESkuJGrwXm0OhiJ/Z.bwAE9xntCsA8.EpihHNHXZCCHiKu43aИзмените владельца созданных файлов
bash
chown -R prometheus:prometheus /etc/prometheus/node_exporter/Добавить в настройки службы конфиг /etc/systemd/system/node_exporter.service
ini
--web.config=/etc/prometheus/node_exporter/web.ymlДобавить в конфиг /etc/prometheus/prometheus.yml
yaml
- job_name: "node_exporter"
static_configs:
- targets: ["localhost:9100"]
basic_auth:
username: admin
password: adminBash скрипт
bash
groupadd --system prometheus
useradd --system -g prometheus -s /bin/false prometheus
wget https://github.com/prometheus/node_exporter/releases/download/v1.2.2/node_exporter-1.2.2.linux-amd64.tar.gz -O - | tar -xzv -C /tmp
cp /tmp/node_exporter-1.2.2.linux-amd64/node_exporter /usr/local/bin
rm -rf /tmp/node_exporter-1.2.2.linux-amd64/
mkdir /etc/prometheus
echo -e 'basic_auth_users:\n admin: $2b$12$A845HEbsu7MNJirEYlNcCehIJAE3OjW1RnNe8XaUauxvRz3.oqdy.' >> /etc/prometheus/web.yml
chown -R prometheus:prometheus /usr/local/bin/node_exporter
chown -R prometheus:prometheus /etc/prometheus
cat <<'EOF' > /etc/systemd/system/node_exporter.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/node_exporter \
--web.config=/etc/prometheus/web.yml \
--web.listen-address=:9902 \
--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/) \
--collector.systemd
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl start node_exporter.service
systemctl enable node_exporter.service
systemctl status node_exporter.serviceУстановка blackbox_exporter
Скачайте архив blackbox_exporter и распакуйте его в папку /tmp
bash
wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.19.0/blackbox_exporter-0.19.0.linux-amd64.tar.gz -O - | tar -xzv -C /tmpСкопируйте содержимое распакованного архива в папку /usr/local/bin
bash
cp /tmp/blackbox_exporter-0.19.0.linux-amd64/blackbox_exporter /usr/local/bin
mkdir /etc/prometheus/blackbox_exporter/
cp /tmp/blackbox_exporter-0.19.0.linux-amd64/blackbox.yml /etc/prometheus/blackbox_exporter/Измените владельца созданных файлов
bash
chown -R prometheus:prometheus /usr/local/bin/blackbox_exporter
chown -R prometheus:prometheus /etc/prometheus/blackbox_exporter/Создайте сценарий запуска systemd сервиса blackbox_exporter. Для этого создайте файл /etc/systemd/system/blackbox_exporter.service со следующим содержимым
ini
[Unit]
Description=Prometheus Blackbox Exporter
After=network.target
[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/blackbox_exporter \
--config.file /etc/prometheus/blackbox_exporter/blackbox.yml
[Install]
WantedBy=multi-user.targetИзменить порт
ini
--web.listen-address=:9903Изменить конфиг /etc/prometheus/blackbox_exporter/blackbox.yml модуля http_2xx
yaml
http_2xx:
prober: http
timeout: 5s
http:
preferred_ip_protocol: ip4Запустите blackbox_exporter
bash
systemctl daemon-reload
systemctl start blackbox_exporter.service
systemctl enable blackbox_exporter.service
systemctl status blackbox_exporter.serviceДобавить в конфиг /etc/prometheus/prometheus.yml
yaml
- job_name: blackbox
metrics_path: /metrics
static_configs:
- targets:
- localhost:9115
- job_name: 'blackbox_exporter'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://prometheus.io
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9115Закрыть порт 9115
bash
iptables -A INPUT -p tcp --dport 9115 -j DROPУстановка AlertManager
Скачайте архив alertmanager и распакуйте его в папку /tmp
bash
wget https://github.com/prometheus/alertmanager/releases/download/v0.23.0/alertmanager-0.23.0.linux-amd64.tar.gz -O - | tar -xzv -C /tmpСоздайте папку
bash
mkdir /etc/prometheus/alertmanager /var/lib/prometheus/alertmanagerСкопируйте содержимое распакованного архива в папку /usr/local/bin
bash
cp /tmp/alertmanager-0.23.0.linux-amd64/alertmanager /usr/local/bin
cp /tmp/alertmanager-0.23.0.linux-amd64/amtool /usr/local/bin
cp /tmp/alertmanager-0.23.0.linux-amd64/alertmanager.yml /etc/prometheus/alertmanagerИзмените владельца созданных файлов
bash
chown -R prometheus:prometheus /etc/prometheus/alertmanager /var/lib/prometheus/alertmanager /usr/local/bin/alertmanager /usr/local/bin/amtoolСоздайте сценарий запуска systemd сервиса alertmanager. Для этого создайте файл /etc/systemd/system/alertmanager.service со следующим содержимым
ini
[Unit]
Description=Prometheus Alertmanager
After=network.target
[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/alertmanager \
--config.file=/etc/prometheus/alertmanager/alertmanager.yml \
--storage.path=/var/lib/prometheus/alertmanager \
--cluster.advertise-address=127.0.0.1:9093
ExecReload=/bin/kill -HUP $MAINPID
[Install]
WantedBy=multi-user.targetИзменить порт
ini
[Unit]
Description=Prometheus Alertmanager
After=network.target
[Service]
Type=simple
Restart=always
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/alertmanager \
--config.file=/etc/prometheus/alertmanager/alertmanager.yml \
--storage.path=/var/lib/prometheus/alertmanager \
--cluster.listen-address=:9904 \
--web.listen-address=:9905
ExecReload=/bin/kill -HUP $MAINPID
[Install]
WantedBy=multi-user.targetЗапустите alertmanager
bash
systemctl daemon-reload
systemctl start alertmanager.service
systemctl enable alertmanager.service
systemctl status alertmanager.serviceСоздайте правило для мониторинга, при котором система будет считать, что виртуальный сервер работает неправильно /etc/prometheus/alert.rules.yml
yaml
groups:
- name: alert.rules
rules:
- alert: InstanceDown
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds. '
summary: Instance {{ $labels.instance }} downПоместите правило в config /etc/prometheus/prometheus.yml Prometheus и Alertmanager
yaml
rule_files:
- 'alert.rules.yml'
alerting:
alertmanagers:
- static_configs:
- targets:
- 'localhost:9093'Настройка уведомления на почту /etc/prometheus/alertmanager/alertmanager.yml
yaml
global:
resolve_timeout: 5m
smtp_from: bajdakov@runway-agency.ru
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
routes:
- receiver: 'web.hook'
continue: true
- receiver: 'email'
continue: true
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
- name: 'email'
email_configs:
- to: 'bajdakov@runway-agency.ru'
from: 'bajdakov@runway-agency.ru'
smarthost: 'smtp.yandex.ru:587'
require_tls: true
auth_username: "bajdakov@runway-agency.ru"
auth_identity: "bajdakov@runway-agency.ru"
auth_password: "user877"
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']Проверка конфига
bash
amtool check-config /etc/prometheus/alertmanager/alertmanager.ymlЗакрыть порт 9093
bash
iptables -A INPUT -p tcp --dport 9093 -j DROPЗакрыть порт кроме localhost
bash
iptables -A INPUT -p tcp ! -s 127.0.0.1 --dport 9905 -j DROPУстановка Grafana
Установите необходимое дополнительное ПО
bash
apt-get install -y software-properties-common wget apt-transport-httpsДобавьте ключ репозитория Grafana
bash
wget -q -O - https://packages.grafana.com/gpg.key | apt-key add -Добавьте репозиторий Grafana
bash
add-apt-repository "deb https://packages.grafana.com/oss/deb stable main"Обновите репозитории и установите Grafana:
bash
apt-get update && apt-get -y install grafanaСмена порта /etc/grafana/grafana.ini
ini
[server]
http_port=9906Запустите Grafana
bash
systemctl start grafana-server.service
systemctl enable grafana-server.service
systemctl status grafana-server.serviceDashbord Node Exporter Full
1860Nginx config Grafana
nginx
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 8092;
listen [::]:8092;
server_name 95.213.159.131;
root /usr/share/nginx/html;
index index.html index.htm;
access_log /var/log/nginx/grafana_access.log;
error_log /var/log/nginx/grafana_error.log;
location / {
proxy_pass http://localhost:9906/;
}
# Proxy Grafana Live WebSocket connections.
location /api/live {
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header Host $http_host;
proxy_pass http://localhost:9906/;
}
}Проверка портов
bash
sudo lsof -i -P -nЧтобы удалить развернутые инструменты:
Удалите Grafana:
bash
systemctl stop grafana-server.service
systemctl disable grafana-server.service
apt -y remove grafanaУдалите Node_exporter:
bash
systemctl stop node_exporter.service
systemctl disable node_exporter.service
rm /etc/systemd/system/node_exporter.service
rm -rf /opt/node_exporterУдалите AlertManager:
bash
systemctl stop alertmanager.service
systemctl disable alertmanager.service
rm /etc/systemd/system/alertmanager.service
rm -rf /opt/alertmanagerУдалите Prometheus:
bash
systemctl stop prometheus.service
systemctl disable prometheus.service
rm /etc/systemd/system/prometheus.service
rm -rf /opt/prometheusУдалите пользователя и группу:
bash
userdel prometheus
groupdel prometheusПримеры правил
yaml
groups:
- name: alert.rules
rules:
- alert: InstanceDown
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds.'
summary: Instance {{ $labels.instance }} down
- alert: HostHighCpuLoad
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
for: 0m
labels:
severity: warning
annotations:
summary: Host high CPU load (instance {{ $labels.instance }})
description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 2m
labels:
severity: warning
annotations:
summary: Host out of memory (instance {{ $labels.instance }})
description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostUnusualDiskReadRate
expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
for: 20m
labels:
severity: warning
annotations:
summary: Host unusual disk read rate (instance {{ $labels.instance }})
description: "Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostUnusualDiskWriteRate
expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
for: 2m
labels:
severity: warning
annotations:
summary: Host unusual disk write rate (instance {{ $labels.instance }})
description: "Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host out of disk space (instance {{ $labels.instance }})
description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostSystemdServiceCrashed
expr: node_systemd_unit_state{state="failed"} == 1
for: 0m
labels:
severity: warning
annotations:
summary: Host systemd service crashed (instance {{ $labels.instance }})
description: "systemd service crashed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostSystemdServiceSiteInactive
expr: node_systemd_unit_state{name=~"nginx.service|php7.4-fpm.service|php7.2-fpm.service|php5.6-fpm.service|mysql.service", state="inactive"} == 1
for: 0m
labels:
severity: warning
annotations:
summary: Host systemd service crashed (instance {{ $labels.instance }})
description: "systemd service crashed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: BlackboxProbeFailed
expr: probe_success{job="blackbox_exporter"} == 0
for: 0m
labels:
severity: critical
annotations:
summary: Blackbox probe failed (instance {{ $labels.instance }})
description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: BlackboxSlowProbe
expr: avg_over_time(probe_duration_seconds{job="blackbox_exporter"}[1m]) > 3
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox slow probe (instance {{ $labels.instance }})
description: "Blackbox probe took more than 1s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: BlackboxSslCertificateWillExpireSoon
expr: probe_ssl_earliest_cert_expiry{job="blackbox_exporter"} - time() < 86400 * 7
for: 0m
labels:
severity: warning
annotations:
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
description: "SSL certificate expires in 7 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"