多页打印视图 点击此处打印.

返回常规视图.

参考

Pigsty详细参考信息

1 - 配置文件

配置参数详细介绍

以下是用于沙箱环境的默认配置文件:pigsty.yml

---
######################################################################
# File      :   pigsty.yml
# Desc      :   Pigsty Configuration Example
# Note      :   Pigsty Sandbox Demo
# Link      :   https://pigsty.cc/zh/docs/config/
# Ctime     :   2020-05-22
# Mtime     :   2021-04-19
# Copyright (C) 2018-2021 Ruohang Feng
######################################################################


######################################################################
#               Development Environment Inventory                    #
######################################################################


all: # top-level namespace

  #==================================================================#
  #                           Clusters                               #
  #==================================================================#
  # postgres database clusters are defined as kv pair in `all.children`
  # where the key is cluster name and the value is the object consist
  # of cluster members (hosts) and cluster specific variables (vars)
  # meta nodes are defined in special group "meta" with `meta_node=true`

  children:

    #-----------------------------
    # meta controller
    #-----------------------------
    meta:      # special group 'meta' defines the main controller machine
      vars:
        meta_node: true                    # mark node as meta controller
        ansible_group_priority: 99         # meta group has top priority
      hosts:
        10.10.10.10: {}

    #-----------------------------
    # cluster: pg-meta
    #-----------------------------
    # pg-meta is a single-node pgsql cluster deployed on meta node (10.10.10.10)
    pg-meta:
      # - cluster members - #
      hosts:
        10.10.10.10: {pg_seq: 1, pg_role: primary, pg_offline_query: true}

      # - cluster configs - #
      vars:
        pg_cluster: pg-meta                 # define actual cluster name
        pg_version: 13                      # define installed pgsql version
        node_tune: tiny                     # tune node into oltp|olap|crit|tiny mode
        pg_conf: tiny.yml                   # tune pgsql into oltp|olap|crit|tiny mode
        patroni_mode: pause                 # enter maintenance mode, {default|pause|remove}
        patroni_watchdog_mode: off          # disable watchdog (require|automatic|off)
        pg_lc_ctype: en_US.UTF8             # enabled pg_trgm i18n char support

        # - defining business users - #
        pg_users:
          # default production read-write user dbuser_meta
          - name: dbuser_meta                              # user's name is required
            password: md5d3d10d8cad606308bdb180148bf663e1  # md5 password is acceptable
            pgbouncer: true                                # add user to pgbouncer userlist
            roles: [dbrole_readwrite]                      # grant roles to user
            comment: default production read-write user for meta database

          # default production read-only user for grafana direct access
          - name: dbuser_grafana
            password: DBUser.Grafana
            pgbouncer: true
            roles: [dbrole_readonly]
            comment: default readonly access for grafana datasource

          # complete example of user/role definition
          - name: dbuser_pigsty             # pigsty user have admin access (DDL|DML)
            password: DBUser.Pigsty         # example user's password, can be md5 encrypted
            login: true                     # can login, true by default (should be false for role)
            superuser: false                # is superuser? false by default
            createdb: false                 # can create database? false by default
            createrole: false               # can create role? false by default
            inherit: true                   # can this role use inherited privileges?
            replication: false              # can this role do replication? false by default
            bypassrls: false                # can this role bypass row level security? false by default
            pgbouncer: true                 # add this user to pgbouncer? false by default (true for production user)
            connlimit: -1                   # connection limit, -1 disable limit
            expire_in: 3650                 # now + n days when this role is expired (OVERWRITE expire_at)
            expire_at: '2030-12-31'         # 'timestamp' when this role is expired (OVERWRITTEN by expire_in)
            comment: pigsty admin user      # comment on user/role
            roles: [dbrole_admin]           # dbrole_{admin,readonly,readwrite,offline}
            parameters:                     # additional role level parameters with ALTER ROLE SET
              search_path: pigsty,public    # add pigsty schema into search_path

        # - defining business databases - #
        pg_databases:
          - name: meta                      # name is the only required field for a database
            # baseline: metadb/schema.sql   # pigsty meta database baseline
            # owner: postgres               # optional, database owner
            # template: template1           # optional, template1 by default
            # encoding: UTF8                # optional, UTF8 by default , must same as template database, leave blank to set to db default
            # locale: C                     # optional, C by default , must same as template database, leave blank to set to db default
            # lc_collate: C                 # optional, C by default , must same as template database, leave blank to set to db default
            # lc_ctype: C                   # optional, C by default , must same as template database, leave blank to set to db default
            # tablespace: pg_default        # optional, 'pg_default' is the default tablespace
            # allowconn: true               # optional, true by default, false disable connect at all
            # revokeconn: false             # optional, false by default, true revoke connect from public # (only default user and owner have connect privilege on database)
            # pgbouncer: true               # optional, add this database to pgbouncer list? true by default
            comment: pigsty meta database   # optional, comment string for database
            connlimit: -1                   # optional, connection limit, -1 or none disable limit (default)
            schemas: [pigsty]               # optional, create additional schema
            extensions:                     # optional, extension name and which schema to create
              - {name: adminpack, schema: pg_catalog}
            parameters:                       # optional, extra parameters with ALTER DATABASE
              search_path: 'pigsty,public'    # add pigsty to search_path

        pg_default_database: meta           # default database will be used as primary monitor target
        vip_mode: l2                        # none|l2|l4, l2 vip are used in sandbox demo
        vip_address: 10.10.10.2             # virtual ip address
        vip_cidrmask: 8                     # cidr network mask length
        vip_interface: eth1                 # interface to add virtual ip


    #-----------------------------
    # cluster: pg-test
    #-----------------------------
    # uncomment this for complete 4-node sandbox demo environment

    #pg-test: # define cluster named 'pg-test'
    #  # - cluster members - #
    #  hosts:
    #    10.10.10.11: {pg_seq: 1, pg_role: primary}
    #    10.10.10.12: {pg_seq: 2, pg_role: replica}
    #    10.10.10.13: {pg_seq: 3, pg_role: offline}
    #
    #  # - cluster configs - #
    #  vars:
    #    # basic settings
    #    pg_cluster: pg-test                 # define actual cluster name
    #    pg_version: 13                      # define installed pgsql version
    #    node_tune: tiny                     # tune node into oltp|olap|crit|tiny mode
    #    pg_conf: tiny.yml                   # tune pgsql into oltp|olap|crit|tiny mode
    #    pg_users:
    #      - name: test                      # admin user for pg-test, have DDL
    #        password: test
    #        roles: [dbrole_admin]
    #        pgbouncer: true
    #        comment: default admin user for test database
    #
    #      - name: dbuser_test               # production rw-user
    #        password: DBUser.Test
    #        roles: [dbrole_readwrite]
    #        pgbouncer: true
    #        comment: default test user for production usage
    #
    #    pg_databases:                       # create a business database 'test'
    #      - name: test                      # use the simplest form
    #        extensions:                     # install postgis to test database
    #          - {name: postgis, schema: public}
    #    pg_default_database: test           # default database will be used as primary monitor target
    #
    #    # extra service settings
    #    pg_services_extra:                  # extra services to be added
    #      - name: standby                   # service name pg-meta-standby
    #        src_ip: "*"
    #        src_port: 5435                  # 5435 routes to sync replica
    #        dst_port: postgres
    #        check_url: /sync                # use /sync health check
    #        selector: "[]"                  # jmespath to filter instances
    #        selector_backup: "[? pg_role == `primary`]"  # primary used as backup server for standby service
    #
    #    # proxy settings
    #    vip_mode: l2                        # enable/disable vip (require members in same LAN)
    #    vip_address: 10.10.10.3             # virtual ip address
    #    vip_cidrmask: 8                     # cidr network mask length
    #    vip_interface: eth1                 # interface to add virtual ip


  #==================================================================#
  #                           Globals                                #
  #==================================================================#
  vars:

    #------------------------------------------------------------------------------
    # CONNECTION PARAMETERS
    #------------------------------------------------------------------------------
    # this section defines connection parameters

    # ansible_user: vagrant                       # admin user with ssh access and sudo privilege

    proxy_env: # global proxy env when downloading packages
      no_proxy: "localhost,127.0.0.1,10.0.0.0/8,192.168.0.0/16,*.pigsty,*.aliyun.com,mirrors.aliyuncs.com,mirrors.tuna.tsinghua.edu.cn,mirrors.zju.edu.cn,*.myqcloud.com"
      # http_proxy: ''
      # https_proxy: ''
      # all_proxy: ''


    #------------------------------------------------------------------------------
    # REPO PROVISION
    #------------------------------------------------------------------------------
    # this section defines how to build a local repo

    # - repo basic - #
    repo_enabled: true                            # build local yum repo on meta nodes?
    repo_name: pigsty                             # local repo name
    repo_address: yum.pigsty                      # repo external address (ip:port or url)
    repo_port: 80                                 # listen address, must same as repo_address
    repo_home: /www                               # default repo dir location
    repo_rebuild: false                           # force re-download packages
    repo_remove: true                             # remove existing repos

    # - where to download - #
    repo_upstreams:
      - name: base
        description: CentOS-$releasever - Base - Aliyun Mirror
        baseurl:
          - http://mirrors.aliyun.com/centos/$releasever/os/$basearch/
          - http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/
          - http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/
        gpgcheck: no
        failovermethod: priority

      - name: updates
        description: CentOS-$releasever - Updates - Aliyun Mirror
        baseurl:
          - http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/
          - http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/
          - http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/
        gpgcheck: no
        failovermethod: priority

      - name: extras
        description: CentOS-$releasever - Extras - Aliyun Mirror
        baseurl:
          - http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/
          - http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/
          - http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/
        gpgcheck: no
        failovermethod: priority

      - name: epel
        description: CentOS $releasever - EPEL - Aliyun Mirror
        baseurl: http://mirrors.aliyun.com/epel/$releasever/$basearch
        gpgcheck: no
        failovermethod: priority

      - name: grafana
        description: Grafana - TsingHua Mirror
        gpgcheck: no
        baseurl: https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm

      - name: prometheus
        description: Prometheus and exporters
        gpgcheck: no
        baseurl: https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch

      # consider using ZJU PostgreSQL mirror in mainland china
      - name: pgdg-common
        description: PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch
        gpgcheck: no
        baseurl: https://download.postgresql.org/pub/repos/yum/common/redhat/rhel-$releasever-$basearch
        # baseurl: http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch

      - name: pgdg13
        description: PostgreSQL 13 for RHEL/CentOS $releasever - $basearch
        gpgcheck: no
        baseurl: https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-$releasever-$basearch
        # baseurl: http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch

      - name: centos-sclo
        description: CentOS-$releasever - SCLo
        gpgcheck: no
        mirrorlist: http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo

      - name: centos-sclo-rh
        description: CentOS-$releasever - SCLo rh
        gpgcheck: no
        mirrorlist: http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh

      - name: nginx
        description: Nginx Official Yum Repo
        skip_if_unavailable: true
        gpgcheck: no
        baseurl: http://nginx.org/packages/centos/$releasever/$basearch/

      - name: haproxy
        description: Copr repo for haproxy
        skip_if_unavailable: true
        gpgcheck: no
        baseurl: https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/

      # for latest consul & kubernetes
      - name: harbottle
        description: Copr repo for main owned by harbottle
        skip_if_unavailable: true
        gpgcheck: no
        baseurl: https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/

    # - what to download - #
    repo_packages:
      # repo bootstrap packages
      - epel-release nginx wget yum-utils yum createrepo sshpass unzip                        # bootstrap packages

      # node basic packages
      - ntp chrony uuid lz4 nc pv jq vim-enhanced make patch bash lsof wget git tuned         # basic system util
      - readline zlib openssl libyaml libxml2 libxslt perl-ExtUtils-Embed ca-certificates     # basic pg dependency
      - numactl grubby sysstat dstat iotop bind-utils net-tools tcpdump socat ipvsadm telnet  # system utils

      # dcs & monitor packages
      - grafana prometheus2 pushgateway alertmanager                                          # monitor and ui
      - node_exporter postgres_exporter nginx_exporter blackbox_exporter                      # exporter
      - consul consul_exporter consul-template etcd                                           # dcs

      # python3 dependencies
      - ansible python python-pip python-psycopg2 audit                                       # ansible & python
      - python3 python3-psycopg2 python36-requests python3-etcd python3-consul                # python3
      - python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography               # patroni extra deps

      # proxy and load balancer
      - haproxy keepalived dnsmasq                                                            # proxy and dns

      # postgres common Packages
      - patroni patroni-consul patroni-etcd pgbouncer pg_cli pgbadger pg_activity               # major components
      - pgcenter boxinfo check_postgres emaj pgbconsole pg_bloat_check pgquarrel                # other common utils
      - barman barman-cli pgloader pgFormatter pitrery pspg pgxnclient PyGreSQL pgadmin4 tail_n_mail

      # postgres 13 packages
      - postgresql13* postgis31* citus_13 timescaledb_13 # pgrouting_13                         # postgres 13 and postgis 31
      - pg_repack13 pg_squeeze13                                                                # maintenance extensions
      - pg_qualstats13 pg_stat_kcache13 system_stats_13 bgw_replstatus13                        # stats extensions
      - plr13 plsh13 plpgsql_check_13 plproxy13 plr13 plsh13 plpgsql_check_13 pldebugger13      # PL extensions
      - hdfs_fdw_13 mongo_fdw13 mysql_fdw_13 ogr_fdw13 redis_fdw_13 pgbouncer_fdw13             # FDW extensions
      - wal2json13 count_distinct13 ddlx_13 geoip13 orafce13                                    # MISC extensions
      - rum_13 hypopg_13 ip4r13 jsquery_13 logerrors_13 periods_13 pg_auto_failover_13 pg_catcheck13
      - pg_fkpart13 pg_jobmon13 pg_partman13 pg_prioritize_13 pg_track_settings13 pgaudit15_13
      - pgcryptokey13 pgexportdoc13 pgimportdoc13 pgmemcache-13 pgmp13 pgq-13
      - pguint13 pguri13 prefix13  safeupdate_13 semver13  table_version13 tdigest13

    repo_url_packages:
      # additional rpm packages
      - https://github.com/Vonng/pg_exporter/releases/download/v0.3.2/pg_exporter-0.3.2-1.el7.x86_64.rpm
      - https://github.com/cybertec-postgresql/vip-manager/releases/download/v0.6/vip-manager_0.6-1_amd64.rpm
      - http://guichaz.free.fr/polysh/files/polysh-0.4-1.noarch.rpm

      # tar.gz and zip binary packages
      - https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz # monitor binary
      - https://github.com/Vonng/pg_exporter/releases/download/v0.3.2/pg_exporter_v0.3.2_linux-amd64.tar.gz
      - https://github.com/grafana/loki/releases/download/v2.2.1/loki-linux-amd64.zip           # loki binary
      - https://github.com/grafana/loki/releases/download/v2.2.1/promtail-linux-amd64.zip
      - https://github.com/grafana/loki/releases/download/v2.2.1/logcli-linux-amd64.zip
      - https://github.com/grafana/loki/releases/download/v2.2.1/loki-canary-linux-amd64.zip

      # mirror in mainland china (use commented packages to install from official site)
      # - http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg/pg_exporter-0.3.2-1.el7.x86_64.rpm
      # - http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg/vip-manager_0.6-1_amd64.rpm
      # - http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg/polysh-0.4-1.noarch.rpm

    #------------------------------------------------------------------------------
    # NODE PROVISION
    #------------------------------------------------------------------------------
    # this section defines how to provision nodes
    # nodename:                                   # if defined, node's hostname will be overwritten

    # - node dns - #
    node_dns_hosts: # static dns records in /etc/hosts
      - 10.10.10.10 yum.pigsty
    node_dns_server: add                          # add (default) | none (skip) | overwrite (remove old settings)
    node_dns_servers:                             # dynamic nameserver in /etc/resolv.conf
      - 10.10.10.10
    node_dns_options:                             # dns resolv options
      - options single-request-reopen timeout:1 rotate
      - domain service.consul

    # - node repo - #
    node_repo_method: local                       # none|local|public (use local repo for production env)
    node_repo_remove: true                        # whether remove existing repo
    node_local_repo_url:                          # local repo url (if method=local, make sure firewall is configured or disabled)
      - http://yum.pigsty/pigsty.repo

    # - node packages - #
    node_packages:                                # common packages for all nodes
      - wget,yum-utils,sshpass,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl
      - numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq
      - python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul
      - python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography
      - node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager
    node_extra_packages:                          # extra packages for all nodes
      - patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity
    node_meta_packages:                           # packages for meta nodes only
      - grafana,prometheus2,alertmanager,nginx_exporter,blackbox_exporter,pushgateway
      - dnsmasq,nginx,ansible,pgbadger,polysh

    # build & devel packages (add to repo_packages too if you want build database & extensions from source)
    # - gcc,gcc-c++,clang,coreutils,diffutils,rpm-build,rpm-devel,rpmlint,rpmdevtools
    # - zlib-devel,openssl-libs,openssl-devel,pam-devel,libxml2-devel,libxslt-devel,openldap-devel,systemd-devel,tcl-devel,python-devel


    # - node features - #
    node_disable_numa: false                      # disable numa, important for production database, reboot required
    node_disable_swap: false                      # disable swap, important for production database
    node_disable_firewall: true                   # disable firewall (required if using kubernetes)
    node_disable_selinux: true                    # disable selinux  (required if using kubernetes)
    node_static_network: true                     # keep dns resolver settings after reboot
    node_disk_prefetch: false                     # setup disk prefetch on HDD to increase performance

    # - node kernel modules - #
    node_kernel_modules:
      - softdog
      - br_netfilter
      - ip_vs
      - ip_vs_rr
      - ip_vs_rr
      - ip_vs_wrr
      - ip_vs_sh
      - nf_conntrack_ipv4

    # - node tuned - #
    node_tune: tiny                               # install and activate tuned profile: none|oltp|olap|crit|tiny
    node_sysctl_params: {}                        # set additional sysctl parameters, k:v format
    # net.bridge.bridge-nf-call-iptables: 1     # example kv parameters

    # - node user - #
    node_admin_setup: true                        # setup an default admin user ?
    node_admin_uid: 88                            # uid and gid for admin user
    node_admin_username: dba                      # default admin user: dba
    node_admin_ssh_exchange: true                 # exchange admin's ssh key among cluster ?
    node_admin_pk_current: false                  # add current user's ~/.ssh/id_rsa.pub to admin pk
    node_admin_pks:                               # ssh public keys to be added to admin user
      - 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com'

    # - node ntp - #
    node_ntp_service: ntp                         # ntp or chrony
    node_ntp_config: true                         # overwrite existing ntp config?
    node_timezone: Asia/Shanghai                  # default node timezone
    node_ntp_servers:                             # default NTP servers
      - pool cn.pool.ntp.org iburst
      - pool pool.ntp.org iburst
      - pool time.pool.aliyun.com iburst
      - server 10.10.10.10 iburst


    #------------------------------------------------------------------------------
    # META PROVISION
    #------------------------------------------------------------------------------
    # - ca - #
    ca_method: create                             # create|copy|recreate
    ca_subject: "/CN=root-ca"                     # self-signed CA subject
    ca_homedir: /ca                               # ca cert directory
    ca_cert: ca.crt                               # ca public key/cert
    ca_key: ca.key                                # ca private key

    # - nginx - #
    nginx_upstream:
      - { name: home,          host: pigsty,   url: "127.0.0.1:3000"}
      - { name: consul,        host: c.pigsty, url: "127.0.0.1:8500" }
      - { name: grafana,       host: g.pigsty, url: "127.0.0.1:3000" }
      - { name: prometheus,    host: p.pigsty, url: "127.0.0.1:9090" }
      - { name: alertmanager,  host: a.pigsty, url: "127.0.0.1:9093" }
      - { name: haproxy,       host: h.pigsty, url: "127.0.0.1:9091" }

    # - nameserver - #
    dns_records: # dynamic dns record resolved by dnsmasq
      - 10.10.10.2  pg-meta                       # sandbox vip for pg-meta
      - 10.10.10.3  pg-test                       # sandbox vip for pg-test
      - 10.10.10.10 meta-1                        # sandbox node meta-1 (node-0)
      - 10.10.10.11 node-1                        # sandbox node node-1
      - 10.10.10.12 node-2                        # sandbox node node-2
      - 10.10.10.13 node-3                        # sandbox node node-3
      - 10.10.10.10 pigsty
      - 10.10.10.10 y.pigsty yum.pigsty
      - 10.10.10.10 c.pigsty consul.pigsty
      - 10.10.10.10 g.pigsty grafana.pigsty
      - 10.10.10.10 p.pigsty prometheus.pigsty
      - 10.10.10.10 a.pigsty alertmanager.pigsty
      - 10.10.10.10 n.pigsty ntp.pigsty
      - 10.10.10.10 h.pigsty haproxy.pigsty

    # - prometheus - #
    prometheus_data_dir: /export/prometheus/data  # prometheus data dir
    prometheus_options: '--storage.tsdb.retention=30d'
    prometheus_reload: false                      # reload prometheus instead of recreate it
    prometheus_sd_method: consul                  # service discovery method: static|consul|etcd
    prometheus_scrape_interval: 5s                # global scrape & evaluation interval
    prometheus_scrape_timeout: 4s                 # scrape timeout
    prometheus_sd_interval: 5s                    # service discovery refresh interval

    # - grafana - #
    grafana_url: http://admin:admin@10.10.10.10:3000 # grafana url
    grafana_admin_password: admin                    # default grafana admin user password
    grafana_plugin: install                          # none|install|reinstall
    grafana_cache: /www/pigsty/grafana/plugins.tgz   # path to grafana plugins tarball
    grafana_customize: false                         # customize grafana resources
    grafana_plugins: # default grafana plugins list
      - redis-datasource
      - simpod-json-datasource
      - fifemon-graphql-datasource
      - sbueringer-consul-datasource
      - camptocamp-prometheus-alertmanager-datasource
      - ryantxu-ajax-panel
      - marcusolsson-hourly-heatmap-panel
      - michaeldmoore-multistat-panel
      - marcusolsson-treemap-panel
      - pr0ps-trackmap-panel
      - dalvany-image-panel
      - magnesium-wordcloud-panel
      - cloudspout-button-panel
      - speakyourcode-button-panel
      - jdbranham-diagram-panel
      - grafana-piechart-panel
      - snuids-radar-panel
      - digrich-bubblechart-panel
    grafana_git_plugins:
      - https://github.com/Vonng/grafana-echarts

    # - loki - #
    loki_clean: false                 # whether remove existing loki data
    loki_data_dir: /export/loki       # default loki data dir


    #------------------------------------------------------------------------------
    # DCS PROVISION
    #------------------------------------------------------------------------------
    service_registry: consul                      # where to register services: none | consul | etcd | both
    dcs_type: consul                              # consul | etcd | both
    dcs_name: pigsty                              # consul dc name | etcd initial cluster token
    dcs_servers:                                  # dcs server dict in name:ip format
      meta-1: 10.10.10.10                         # you could use existing dcs cluster
      # meta-2: 10.10.10.11                       # host which have their IP listed here will be init as server
      # meta-3: 10.10.10.12                       # 3 or 5 dcs nodes are recommend for production environment
    dcs_exists_action: clean                      # abort|skip|clean if dcs server already exists
    dcs_disable_purge: false                      # set to true to disable purge functionality for good (force dcs_exists_action = abort)
    consul_data_dir: /var/lib/consul              # consul data dir (/var/lib/consul by default)
    etcd_data_dir: /var/lib/etcd                  # etcd data dir (/var/lib/consul by default)


    #------------------------------------------------------------------------------
    # POSTGRES INSTALLATION
    #------------------------------------------------------------------------------
    # - dbsu - #
    pg_dbsu: postgres                             # os user for database, postgres by default (change it is not recommended!)
    pg_dbsu_uid: 26                               # os dbsu uid and gid, 26 for default postgres users and groups
    pg_dbsu_sudo: limit                           # none|limit|all|nopass (Privilege for dbsu, limit is recommended)
    pg_dbsu_home: /var/lib/pgsql                  # postgresql binary
    pg_dbsu_ssh_exchange: false                   # exchange ssh key among same cluster

    # - postgres packages - #
    pg_version: 13                                # default postgresql version
    pgdg_repo: false                              # use official pgdg yum repo (disable if you have local mirror)
    pg_add_repo: false                            # add postgres related repo before install (useful if you want a simple install)
    pg_bin_dir: /usr/pgsql/bin                    # postgres binary dir
    pg_packages:
      - postgresql${pg_version}*
      - postgis31_${pg_version}*
      - pgbouncer patroni pg_exporter pgbadger
      - patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity
      - python3 python3-psycopg2 python36-requests python3-etcd python3-consul
      - python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography

    pg_extensions:
      - pg_repack${pg_version} pg_qualstats${pg_version} pg_stat_kcache${pg_version} wal2json${pg_version}
      # - ogr_fdw${pg_version} mysql_fdw_${pg_version} redis_fdw_${pg_version} mongo_fdw${pg_version} hdfs_fdw_${pg_version}
      # - count_distinct${version}  ddlx_${version}  geoip${version}  orafce${version}                                   # popular features
      # - hypopg_${version}  ip4r${version}  jsquery_${version}  logerrors_${version}  periods_${version}  pg_auto_failover_${version}  pg_catcheck${version}
      # - pg_fkpart${version}  pg_jobmon${version}  pg_partman${version}  pg_prioritize_${version}  pg_track_settings${version}  pgaudit15_${version}
      # - pgcryptokey${version}  pgexportdoc${version}  pgimportdoc${version}  pgmemcache-${version}  pgmp${version}  pgq-${version}  pgquarrel pgrouting_${version}
      # - pguint${version}  pguri${version}  prefix${version}   safeupdate_${version}  semver${version}   table_version${version}  tdigest${version}



    #------------------------------------------------------------------------------
    # POSTGRES PROVISION
    #------------------------------------------------------------------------------
    # - identity - #
    # pg_cluster:                                 # [REQUIRED] cluster name (cluster level,  validated during pg_preflight)
    # pg_seq: 0                                   # [REQUIRED] instance seq (instance level, validated during pg_preflight)
    # pg_role: replica                            # [REQUIRED] service role (instance level, validated during pg_preflight)
    # pg_shard:                                   # [OPTIONAL] shard name  (cluster level)
    # pg_sindex:                                  # [OPTIONAl] shard index (cluster level)

    # - identity option -#
    pg_hostname: false                            # overwrite node hostname with pg instance name
    pg_nodename: true                             # overwrite consul nodename with pg instance name

    # - retention - #
    # pg_exists_action, available options: abort|clean|skip
    #  - abort: abort entire play's execution (default)
    #  - clean: remove existing cluster (dangerous)
    #  - skip: end current play for this host
    # pg_exists: false                            # auxiliary flag variable (DO NOT SET THIS)
    pg_exists_action: clean
    pg_disable_purge: false                       # set to true to disable pg purge functionality for good (force pg_exists_action = abort)

    # - storage - #
    pg_data: /pg/data                             # postgres data directory
    pg_fs_main: /export                           # data disk mount point     /pg -> {{ pg_fs_main }}/postgres/{{ pg_instance }}
    pg_fs_bkup: /var/backups                      # backup disk mount point   /pg/* -> {{ pg_fs_bkup }}/postgres/{{ pg_instance }}/*

    # - connection - #
    pg_listen: '0.0.0.0'                          # postgres listen address, '0.0.0.0' by default (all ipv4 addr)
    pg_port: 5432                                 # postgres port (5432 by default)
    pg_localhost: /var/run/postgresql             # localhost unix socket dir for connection
    # pg_upstream:                                # [OPTIONAL] specify replication upstream (set on primary transform cluster into a standby cluster)


    # - patroni - #
    # patroni_mode, available options: default|pause|remove
    #   - default: default ha mode
    #   - pause:   into maintenance mode
    #   - remove:  remove patroni after bootstrap
    patroni_mode: default                         # pause|default|remove
    pg_namespace: /pg                             # top level key namespace in dcs
    patroni_port: 8008                            # default patroni port
    patroni_watchdog_mode: automatic              # watchdog mode: off|automatic|required
    pg_conf: tiny.yml                             # user provided patroni config template path

    # - flags - #
    pg_backup: false                              # store base backup on this node
    pg_delay: 0                                   # apply delay for offline|delayed instance

    # - localization - #
    pg_encoding: UTF8                             # default to UTF8
    pg_locale: C                                  # default to C
    pg_lc_collate: C                              # default to C
    pg_lc_ctype: en_US.UTF8                       # default to en_US.UTF8

    # - pgbouncer - #
    pgbouncer_port: 6432                          # pgbouncer port (6432 by default)
    pgbouncer_poolmode: transaction               # pooling mode: (transaction pooling by default)
    pgbouncer_max_db_conn: 100                    # important! do not set this larger than postgres max conn or conn limit


    #------------------------------------------------------------------------------
    # POSTGRES TEMPLATE
    #------------------------------------------------------------------------------
    # - template - #
    pg_init: pg-init                              # init script for cluster template

    # - system roles - #
    pg_replication_username: replicator           # system replication user
    pg_replication_password: DBUser.Replicator    # system replication password
    pg_monitor_username: dbuser_monitor           # system monitor user
    pg_monitor_password: DBUser.Monitor           # system monitor password
    pg_admin_username: dbuser_dba                 # system admin user
    pg_admin_password: DBUser.DBA                 # system admin password

    # - default roles - #
    # chekc http://pigsty.cc/zh/docs/concepts/provision/acl/ for more detail
    pg_default_roles:

      # common production readonly user
      - name: dbrole_readonly                 # production read-only roles
        login: false
        comment: role for global readonly access

      # common production read-write user
      - name: dbrole_readwrite                # production read-write roles
        login: false
        roles: [dbrole_readonly]             # read-write includes read-only access
        comment: role for global read-write access

      # offline have same privileges as readonly, but with limited hba access on offline instance only
      # for the purpose of running slow queries, interactive queries and perform ETL tasks
      - name: dbrole_offline
        login: false
        comment: role for restricted read-only access (offline instance)

      # admin have the privileges to issue DDL changes
      - name: dbrole_admin
        login: false
        bypassrls: true
        comment: role for object creation
        roles: [dbrole_readwrite,pg_monitor,pg_signal_backend]

      # dbsu, name is designated by `pg_dbsu`. It's not recommend to set password for dbsu
      - name: postgres
        superuser: true
        comment: system superuser

      # default replication user, name is designated by `pg_replication_username`, and password is set by `pg_replication_password`
      - name: replicator
        replication: true                          # for replication user
        bypassrls: true                            # logical replication require bypassrls
        roles: [pg_monitor, dbrole_readonly]       # logical replication require select privileges
        comment: system replicator

      # default monitor user, name is designated by `pg_monitor_username`, and password is set by `pg_monitor_password`
      - name: dbuser_monitor
        connlimit: 16
        comment: system monitor user
        roles: [pg_monitor, dbrole_readonly]
        parameters:
          log_min_duration_statement: 1000

      # default admin super user, name is designated by `pg_admin_username`, and password is set by `pg_admin_password`
      - name: dbuser_dba
        superuser: true
        comment: system admin user
        roles: [dbrole_admin]

      # default stats user, for ETL and slow queries
      - name: dbuser_stats
        password: DBUser.Stats
        comment: business offline user for offline queries and ETL
        roles: [dbrole_offline]


    # - privileges - #
    # object created by dbsu and admin will have their privileges properly set
    pg_default_privileges:
      - GRANT USAGE                         ON SCHEMAS   TO dbrole_readonly
      - GRANT SELECT                        ON TABLES    TO dbrole_readonly
      - GRANT SELECT                        ON SEQUENCES TO dbrole_readonly
      - GRANT EXECUTE                       ON FUNCTIONS TO dbrole_readonly
      - GRANT USAGE                         ON SCHEMAS   TO dbrole_offline
      - GRANT SELECT                        ON TABLES    TO dbrole_offline
      - GRANT SELECT                        ON SEQUENCES TO dbrole_offline
      - GRANT EXECUTE                       ON FUNCTIONS TO dbrole_offline
      - GRANT INSERT, UPDATE, DELETE        ON TABLES    TO dbrole_readwrite
      - GRANT USAGE,  UPDATE                ON SEQUENCES TO dbrole_readwrite
      - GRANT TRUNCATE, REFERENCES, TRIGGER ON TABLES    TO dbrole_admin
      - GRANT CREATE                        ON SCHEMAS   TO dbrole_admin

    # - schemas - #
    pg_default_schemas: [monitor]                 # default schemas to be created

    # - extension - #
    pg_default_extensions:                        # default extensions to be created
      - { name: 'pg_stat_statements',  schema: 'monitor' }
      - { name: 'pgstattuple',         schema: 'monitor' }
      - { name: 'pg_qualstats',        schema: 'monitor' }
      - { name: 'pg_buffercache',      schema: 'monitor' }
      - { name: 'pageinspect',         schema: 'monitor' }
      - { name: 'pg_prewarm',          schema: 'monitor' }
      - { name: 'pg_visibility',       schema: 'monitor' }
      - { name: 'pg_freespacemap',     schema: 'monitor' }
      - { name: 'pg_repack',           schema: 'monitor' }
      - name: postgres_fdw
      - name: file_fdw
      - name: btree_gist
      - name: btree_gin
      - name: pg_trgm
      - name: intagg
      - name: intarray

    # - hba - #
    pg_offline_query: false                       # set to true to enable offline query on instance
    pg_reload: true                               # reload postgres after hba changes
    pg_hba_rules:                                 # postgres host-based authentication rules
      - title: allow meta node password access
        role: common
        rules:
          - host    all     all                         10.10.10.10/32      md5

      - title: allow intranet admin password access
        role: common
        rules:
          - host    all     +dbrole_admin               10.0.0.0/8          md5
          - host    all     +dbrole_admin               172.16.0.0/12       md5
          - host    all     +dbrole_admin               192.168.0.0/16      md5

      - title: allow intranet password access
        role: common
        rules:
          - host    all             all                 10.0.0.0/8          md5
          - host    all             all                 172.16.0.0/12       md5
          - host    all             all                 192.168.0.0/16      md5

      - title: allow local read/write (local production user via pgbouncer)
        role: common
        rules:
          - local   all     +dbrole_readonly                                md5
          - host    all     +dbrole_readonly           127.0.0.1/32         md5

      - title: allow offline query (ETL,SAGA,Interactive) on offline instance
        role: offline
        rules:
          - host    all     +dbrole_offline               10.0.0.0/8        md5
          - host    all     +dbrole_offline               172.16.0.0/12     md5
          - host    all     +dbrole_offline               192.168.0.0/16    md5

    pg_hba_rules_extra: []                        # extra hba rules (for cluster/instance overwrite)

    pgbouncer_hba_rules:                          # pgbouncer host-based authentication rules
      - title: local password access
        role: common
        rules:
          - local  all          all                                     md5
          - host   all          all                     127.0.0.1/32    md5

      - title: intranet password access
        role: common
        rules:
          - host   all          all                     10.0.0.0/8      md5
          - host   all          all                     172.16.0.0/12   md5
          - host   all          all                     192.168.0.0/16  md5

    pgbouncer_hba_rules_extra: []                 # extra pgbouncer hba rules (for cluster/instance overwrite)
    # pg_users: []                                # business users
    # pg_databases: []                            # business databases

    #------------------------------------------------------------------------------
    # MONITOR PROVISION
    #------------------------------------------------------------------------------
    # - install - #
    exporter_install: none                        # none|yum|binary, none by default
    exporter_repo_url: ''                         # if set, repo will be added to /etc/yum.repos.d/ before yum installation

    # - collect - #
    exporter_metrics_path: /metrics               # default metric path for pg related exporter

    # - node exporter - #
    node_exporter_enabled: true                   # setup node_exporter on instance
    node_exporter_port: 9100                      # default port for node exporter
    node_exporter_options: '--no-collector.softnet --collector.systemd --collector.ntp --collector.tcpstat --collector.processes'

    # - pg exporter - #
    pg_exporter_config: pg_exporter-demo.yaml     # default config files for pg_exporter
    pg_exporter_enabled: true                     # setup pg_exporter on instance
    pg_exporter_port: 9630                        # default port for pg exporter
    pg_exporter_url: ''                           # optional, if not set, generate from reference parameters

    # - pgbouncer exporter - #
    pgbouncer_exporter_enabled: true              # setup pgbouncer_exporter on instance (if you don't have pgbouncer, disable it)
    pgbouncer_exporter_port: 9631                 # default port for pgbouncer exporter
    pgbouncer_exporter_url: ''                    # optional, if not set, generate from reference parameters

    # - promtail - #                              # promtail is a beta feature which requires manual deployment
    promtail_enabled: true                        # enable promtail logging collector?
    promtail_clean: false                         # remove promtail status file? false by default
    promtail_port: 9080                           # default listen address for promtail
    promtail_status_file: /tmp/promtail-status.yml
    promtail_send_url: http://10.10.10.10:3100/loki/api/v1/push  # loki url to receive logs

    #------------------------------------------------------------------------------
    # SERVICE PROVISION
    #------------------------------------------------------------------------------
    pg_weight: 100              # default load balance weight (instance level)

    # - service - #
    pg_services:                                  # how to expose postgres service in cluster?
      # primary service will route {ip|name}:5433 to primary pgbouncer (5433->6432 rw)
      - name: primary           # service name {{ pg_cluster }}-primary
        src_ip: "*"
        src_port: 5433
        dst_port: pgbouncer     # 5433 route to pgbouncer
        check_url: /primary     # primary health check, success when instance is primary
        selector: "[]"          # select all instance as primary service candidate

      # replica service will route {ip|name}:5434 to replica pgbouncer (5434->6432 ro)
      - name: replica           # service name {{ pg_cluster }}-replica
        src_ip: "*"
        src_port: 5434
        dst_port: pgbouncer
        check_url: /read-only   # read-only health check. (including primary)
        selector: "[]"          # select all instance as replica service candidate
        selector_backup: "[? pg_role == `primary`]"   # primary are used as backup server in replica service

      # default service will route {ip|name}:5436 to primary postgres (5436->5432 primary)
      - name: default           # service's actual name is {{ pg_cluster }}-default
        src_ip: "*"             # service bind ip address, * for all, vip for cluster virtual ip address
        src_port: 5436          # bind port, mandatory
        dst_port: postgres      # target port: postgres|pgbouncer|port_number , pgbouncer(6432) by default
        check_method: http      # health check method: only http is available for now
        check_port: patroni     # health check port:  patroni|pg_exporter|port_number , patroni by default
        check_url: /primary     # health check url path, / as default
        check_code: 200         # health check http code, 200 as default
        selector: "[]"          # instance selector
        haproxy:                # haproxy specific fields
          maxconn: 3000         # default front-end connection
          balance: roundrobin   # load balance algorithm (roundrobin by default)
          default_server_options: 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'

      # offline service will route {ip|name}:5438 to offline postgres (5438->5432 offline)
      - name: offline           # service name {{ pg_cluster }}-offline
        src_ip: "*"
        src_port: 5438
        dst_port: postgres
        check_url: /replica     # offline MUST be a replica
        selector: "[? pg_role == `offline` || pg_offline_query ]"         # instances with pg_role == 'offline' or instance marked with 'pg_offline_query == true'
        selector_backup: "[? pg_role == `replica` && !pg_offline_query]"  # replica are used as backup server in offline service

    pg_services_extra: []        # extra services to be added

    # - haproxy - #
    haproxy_enabled: true                         # enable haproxy among every cluster members
    haproxy_reload: true                          # reload haproxy after config
    haproxy_admin_auth_enabled: false             # enable authentication for haproxy admin?
    haproxy_admin_username: admin                 # default haproxy admin username
    haproxy_admin_password: admin                 # default haproxy admin password
    haproxy_exporter_port: 9101                   # default admin/exporter port
    haproxy_client_timeout: 3h                    # client side connection timeout
    haproxy_server_timeout: 3h                    # server side connection timeout

    # - vip - #
    vip_mode: none                                # none | l2 | l4
    vip_reload: true                              # whether reload service after config
    # vip_address: 127.0.0.1                      # virtual ip address ip (l2 or l4)
    # vip_cidrmask: 24                            # virtual ip address cidr mask (l2 only)
    # vip_interface: eth0                         # virtual ip network interface (l2 only)

    # - dns - #                                   # NOT IMPLEMENTED
    # dns_mode: vip                               # vip|all|selector: how to resolve cluster DNS?
    # dns_selector: '[]'                          # if dns_mode == vip, filter instances been resolved

...

2 - 内核优化

Pigsty针对操作系统内核进行的参数调整

Pigsty使用tuned调整操作系统配置,tuned是CentOS7自带的调参工具。

Pigsty Tuned配置

Pigsty默认会为操作系统安装四种tuned profile

  • OLTP:针对常规业务库,优化延迟
  • OLAP:针对分析库,优化吞吐量
  • CRIT:针对核心业务库,优化RPO
  • TINY:针对微型实例与虚拟机
tuned-adm profile oltp    # 启用OLTP模式
tuned-adm profile olap    # 启用OLAP模式
tuned-adm profile crit    # 启用CRIT模式
tuned-adm profile tiny    # 启用TINY模式

Tuned基本操作

# 如需启动 tuned,请以 root 身份运行下列指令:
systemctl start tuned

# 若要在每次计算机启动时激活 tuned,请输入以下指令:
systemctl enable tuned

# 其它的 tuned 控制,例如配置文件选择等,请使用:
tuned-adm

# 若要查看可用的已安装配置文件,此命令需要 tuned 服务正在运行。
tuned-adm list

# 若要查看目前已激活的配置文件,请运行:
tuned-adm active

# 若要选择或激活某一配置文件,请运行:
tuned-adm profile profile
# 例如
tuned-adm profile powersave

# 若要让 tuned 推荐最适合您的系统的配置文件,同时不改变任何现有的配置文件,也不使用安装期间使用过的逻辑,请运行以下指令:
tuned-adm recommend

# 要禁用所有微调:
tuned-adm off

要列出所有可用配置文件并识别目前激活的配置文件,请运行:
tuned-adm list
要只显示当前激活的配置文件请运行:
tuned-adm active
要切换到某个可用的配置文件请运行:
tuned-adm profile profile_name
例如:
tuned-adm profile server-powersave

OLTP配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to oltp mode
# Path      :   /etc/tuned/oltp/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLTP System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

# vm.dirty_background_bytes=67108864 # 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=10                 # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

OLAP配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-09-18
# Desc      :   Tune operatiing system to olap mode
# Path      :   /etc/tuned/olap/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLAP System
include=network-throughput

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
# vm.swappiness=10

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

vm.dirty_background_ratio = 10    # throughput-performance default
vm.dirty_ratio=80                 # throughput-performance default 40 -> 80

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

CRIT配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to crit mode
# Path      :   /etc/tuned/crit/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL CRIT System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=100

# 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_bytes=67108864
# vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=6                    # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

TINY配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to tiny mode
# Path      :   /etc/tuned/tiny/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL TINY System
# include=virtual-guest

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up.  Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40

# Filesystem I/O is usually much more efficient than swapping, so try to keep
# swapping low.  It's usually safe to go even lower than this on systems with
# server-grade storage.
vm.swappiness = 30

#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

数据库内核调优参考

# Database kernel optimisation
fs.aio-max-nr = 1048576 # 限制并发未完成的异步请求数目,,不应小于1M
fs.file-max = 16777216  # 最大打开16M个文件

# kernel
kernel.shmmax = 485058		# 共享内存最大页面数量: $(expr $(getconf _PHYS_PAGES) / 2)
kernel.shmall = 1986797568 	# 共享内存总大小: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
kernel.shmmni = 16384 		# 系统范围内共享内存段的最大数量 4096 -> 16384
kernel.msgmni = 32768		# 系统的消息队列数目,影响可以启动的代理程序数 设为内存MB数
kernel.msgmnb = 65536		# 影响队列的大小
kernel.msgmax = 65536		# 影响队列中可以发送的消息的大小
kernel.numa_balancing = 0   # Numa禁用
kernel.sched_migration_cost_ns = 5000000 # 5ms内,调度认为进程还是Hot的。
kernel.sem = 2048 134217728 2048 65536   # 每个信号集最大信号量2048,系统总共可用信号量134217728,单次最大操作2048,信号集总数65536

# vm
vm.dirty_ratio = 80                       # 绝对限制,超过80%阻塞写请求刷盘
vm.dirty_background_bytes = 268435456     # 256MB脏数据唤醒刷盘进程
vm.dirty_expire_centisecs = 6000          # 1分钟前的数据被认为需要刷盘
vm.dirty_writeback_centisecs= 500         # 刷新进程运行间隔5秒
vm.mmap_min_addr = 65536                  # 禁止访问0x10000下的内存
vm.zone_reclaim_mode = 0                  # Numa禁用

# vm swap
vm.swappiness = 0                         # 禁用SWAP,但高水位仍会有
vm.overcommit_memory = 2                  # 允许一定程度的Overcommit
vm.overcommit_ratio = 50                  # 允许的Overcommit:$((($mem - $swap) * 100 / $mem))

# tcp memory
net.ipv4.tcp_rmem = 8192 65536 16777216		# tcp读buffer: 32M/256M/16G
net.ipv4.tcp_wmem = 8192 65536 16777216		# tcp写buffer: 32M/256M/16G
net.ipv4.tcp_mem = 131072 262144 16777216	# tcp 内存使用 512M/1G/16G
net.core.rmem_default = 262144      		# 接受缓冲区默认大小: 256K
net.core.rmem_max = 4194304         		# 接受缓冲区最大大小: 4M
net.core.wmem_default = 262144      		# 发送缓冲区默认大小: 256K
net.core.wmem_max = 4194304         		# 发送缓冲区最大大小: 4M
# tcp keepalive
net.ipv4.tcp_keepalive_intvl = 20	# 探测没有确认时,重新发送探测的频度。默认75s -> 20s
net.ipv4.tcp_keepalive_probes = 3	# 3 * 20 = 1分钟超时断开
net.ipv4.tcp_keepalive_time = 60	# 探活周期1分钟
# tcp port resure
net.ipv4.tcp_tw_reuse = 1           # 允许将TIME_WAIT socket用于新的TCP连接。默认为0
net.ipv4.tcp_tw_recycle = 0			# 快速回收,已弃用
net.ipv4.tcp_fin_timeout = 5        # 保持在FIN-WAIT-2状态的秒时间
net.ipv4.tcp_timestamps = 1
# tcp anti-flood
net.ipv4.tcp_syncookies = 1			# SYN_RECV队列满后发cookie,防止恶意攻击
net.ipv4.tcp_synack_retries = 1		# 收到不完整sync后的重试次数 5->2
net.ipv4.tcp_syn_retries = 1         #表示在内核放弃建立连接之前发送SYN包的数量。
# tcp load-balancer
net.ipv4.ip_forward = 1						# IP转发
net.ipv4.ip_nonlocal_bind = 1				# 绑定非本机地址
net.netfilter.nf_conntrack_max = 1048576	# 最大跟踪连接数
net.ipv4.ip_local_port_range = 10000 65535	# 端口范围
net.ipv4.tcp_max_tw_buckets = 262144		# 256k  TIME_WAIT
net.core.somaxconn = 65535          		# 限制LISTEN队列最大数据包量,触发重传机制。
net.ipv4.tcp_max_syn_backlog = 8192 		# SYN队列大小:1024->8192
net.core.netdev_max_backlog = 8192			# 网卡收包快于内核时,允许队列长度

3 - 指标清单

Pigsty可用监控指标清单

下面是Pigsty目前可用的监控指标列表。

衍生指标的定义规则,请查阅 衍生指标 一节。

监控指标列表

name
go_gc_duration_seconds
go_gc_duration_seconds_count
go_gc_duration_seconds_sum
go_goroutines
go_info
go_memstats_alloc_bytes
go_memstats_alloc_bytes_total
go_memstats_buck_hash_sys_bytes
go_memstats_frees_total
go_memstats_gc_cpu_fraction
go_memstats_gc_sys_bytes
go_memstats_heap_alloc_bytes
go_memstats_heap_idle_bytes
go_memstats_heap_inuse_bytes
go_memstats_heap_objects
go_memstats_heap_released_bytes
go_memstats_heap_sys_bytes
go_memstats_last_gc_time_seconds
go_memstats_lookups_total
go_memstats_mallocs_total
go_memstats_mcache_inuse_bytes
go_memstats_mcache_sys_bytes
go_memstats_mspan_inuse_bytes
go_memstats_mspan_sys_bytes
go_memstats_next_gc_bytes
go_memstats_other_sys_bytes
go_memstats_stack_inuse_bytes
go_memstats_stack_sys_bytes
go_memstats_sys_bytes
go_threads
haproxy_backend_active_servers
haproxy_backend_backup_servers
haproxy_backend_bytes_in_total
haproxy_backend_bytes_out_total
haproxy_backend_check_last_change_seconds
haproxy_backend_check_up_down_total
haproxy_backend_client_aborts_total
haproxy_backend_connect_time_average_seconds
haproxy_backend_connection_attempts_total
haproxy_backend_connection_errors_total
haproxy_backend_connection_reuses_total
haproxy_backend_current_queue
haproxy_backend_current_sessions
haproxy_backend_downtime_seconds_total
haproxy_backend_failed_header_rewriting_total
haproxy_backend_http_cache_hits_total
haproxy_backend_http_cache_lookups_total
haproxy_backend_http_comp_bytes_bypassed_total
haproxy_backend_http_comp_bytes_in_total
haproxy_backend_http_comp_bytes_out_total
haproxy_backend_http_comp_responses_total
haproxy_backend_http_requests_total
haproxy_backend_http_responses_total
haproxy_backend_internal_errors_total
haproxy_backend_last_session_seconds
haproxy_backend_limit_sessions
haproxy_backend_loadbalanced_total
haproxy_backend_max_connect_time_seconds
haproxy_backend_max_queue
haproxy_backend_max_queue_time_seconds
haproxy_backend_max_response_time_seconds
haproxy_backend_max_session_rate
haproxy_backend_max_sessions
haproxy_backend_max_total_time_seconds
haproxy_backend_queue_time_average_seconds
haproxy_backend_redispatch_warnings_total
haproxy_backend_requests_denied_total
haproxy_backend_response_errors_total
haproxy_backend_response_time_average_seconds
haproxy_backend_responses_denied_total
haproxy_backend_retry_warnings_total
haproxy_backend_server_aborts_total
haproxy_backend_sessions_total
haproxy_backend_status
haproxy_backend_total_time_average_seconds
haproxy_backend_weight
haproxy_frontend_bytes_in_total
haproxy_frontend_bytes_out_total
haproxy_frontend_connections_rate_max
haproxy_frontend_connections_total
haproxy_frontend_current_sessions
haproxy_frontend_denied_connections_total
haproxy_frontend_denied_sessions_total
haproxy_frontend_failed_header_rewriting_total
haproxy_frontend_http_cache_hits_total
haproxy_frontend_http_cache_lookups_total
haproxy_frontend_http_comp_bytes_bypassed_total
haproxy_frontend_http_comp_bytes_in_total
haproxy_frontend_http_comp_bytes_out_total
haproxy_frontend_http_comp_responses_total
haproxy_frontend_http_requests_rate_max
haproxy_frontend_http_requests_total
haproxy_frontend_http_responses_total
haproxy_frontend_intercepted_requests_total
haproxy_frontend_internal_errors_total
haproxy_frontend_limit_session_rate
haproxy_frontend_limit_sessions
haproxy_frontend_max_session_rate
haproxy_frontend_max_sessions
haproxy_frontend_request_errors_total
haproxy_frontend_requests_denied_total
haproxy_frontend_responses_denied_total
haproxy_frontend_sessions_total
haproxy_frontend_status
haproxy_process_active_peers
haproxy_process_busy_polling_enabled
haproxy_process_connected_peers
haproxy_process_connections_total
haproxy_process_current_backend_ssl_key_rate
haproxy_process_current_connection_rate
haproxy_process_current_connections
haproxy_process_current_frontend_ssl_key_rate
haproxy_process_current_run_queue
haproxy_process_current_session_rate
haproxy_process_current_ssl_connections
haproxy_process_current_ssl_rate
haproxy_process_current_tasks
haproxy_process_current_zlib_memory
haproxy_process_dropped_logs_total
haproxy_process_frontent_ssl_reuse
haproxy_process_hard_max_connections
haproxy_process_http_comp_bytes_in_total
haproxy_process_http_comp_bytes_out_total
haproxy_process_idle_time_percent
haproxy_process_jobs
haproxy_process_limit_connection_rate
haproxy_process_limit_http_comp
haproxy_process_limit_session_rate
haproxy_process_limit_ssl_rate
haproxy_process_listeners
haproxy_process_max_backend_ssl_key_rate
haproxy_process_max_connection_rate
haproxy_process_max_connections
haproxy_process_max_fds
haproxy_process_max_frontend_ssl_key_rate
haproxy_process_max_memory_bytes
haproxy_process_max_pipes
haproxy_process_max_session_rate
haproxy_process_max_sockets
haproxy_process_max_ssl_connections
haproxy_process_max_ssl_rate
haproxy_process_max_zlib_memory
haproxy_process_nbproc
haproxy_process_nbthread
haproxy_process_pipes_free_total
haproxy_process_pipes_used_total
haproxy_process_pool_allocated_bytes
haproxy_process_pool_failures_total
haproxy_process_pool_used_bytes
haproxy_process_relative_process_id
haproxy_process_requests_total
haproxy_process_ssl_cache_lookups_total
haproxy_process_ssl_cache_misses_total
haproxy_process_ssl_connections_total
haproxy_process_start_time_seconds
haproxy_process_stopping
haproxy_process_unstoppable_jobs
haproxy_server_bytes_in_total
haproxy_server_bytes_out_total
haproxy_server_check_code
haproxy_server_check_duration_seconds
haproxy_server_check_failures_total
haproxy_server_check_last_change_seconds
haproxy_server_check_status
haproxy_server_check_up_down_total
haproxy_server_client_aborts_total
haproxy_server_connect_time_average_seconds
haproxy_server_connection_attempts_total
haproxy_server_connection_errors_total
haproxy_server_connection_reuses_total
haproxy_server_current_queue
haproxy_server_current_sessions
haproxy_server_current_throttle
haproxy_server_downtime_seconds_total
haproxy_server_failed_header_rewriting_total
haproxy_server_internal_errors_total
haproxy_server_last_session_seconds
haproxy_server_limit_sessions
haproxy_server_loadbalanced_total
haproxy_server_max_connect_time_seconds
haproxy_server_max_queue
haproxy_server_max_queue_time_seconds
haproxy_server_max_response_time_seconds
haproxy_server_max_session_rate
haproxy_server_max_sessions
haproxy_server_max_total_time_seconds
haproxy_server_queue_limit
haproxy_server_queue_time_average_seconds
haproxy_server_redispatch_warnings_total
haproxy_server_response_errors_total
haproxy_server_response_time_average_seconds
haproxy_server_responses_denied_total
haproxy_server_retry_warnings_total
haproxy_server_server_aborts_total
haproxy_server_server_idle_connections_current
haproxy_server_server_idle_connections_limit
haproxy_server_sessions_total
haproxy_server_status
haproxy_server_total_time_average_seconds
haproxy_server_weight
node:cls:cpu_count
node:cls:cpu_mode
node:cls:cpu_usage
node:cls:cpu_usage_avg5m
node:cls:disk_io_rate
node:cls:disk_iops
node:cls:disk_read_iops
node:cls:disk_read_rate
node:cls:disk_write_iops
node:cls:disk_write_rate
node:cls:mem_usage
node:cls:network_io
node:cls:network_rx
node:cls:network_tx
node:cls:ntp_offset_range
node:cls:sched_timeslicesa
node:cpu:cpu_mode
node:cpu:cpu_usage
node:cpu:cpu_usage_avg5m
node:cpu:sched_timeslices
node:dev:disk_io_rate
node:dev:disk_iops
node:dev:disk_read_iops
node:dev:disk_read_rate
node:dev:disk_read_rt
node:dev:disk_read_time
node:dev:disk_write_iops
node:dev:disk_write_rate
node:dev:disk_write_rt
node:dev:disk_write_time
node:dev:network_io_rate
node:dev:network_rx
node:dev:network_tx
node:fs:avail_bytes
node:fs:free_bytes
node:fs:free_inode
node:fs:inode_usage
node:fs:size_bytes
node:fs:space_deriv_1h
node:fs:space_exhaust
node:fs:space_usage
node:fs:total_inode
node:ins:cpu_count
node:ins:cpu_mode
node:ins:cpu_usage
node:ins:cpu_usage_avg5m
node:ins:ctx_switch
node:ins:disk_io_rate
node:ins:disk_iops
node:ins:disk_read_iops
node:ins:disk_read_rate
node:ins:disk_write_iops
node:ins:disk_write_rate
node:ins:fd_usage
node:ins:forks
node:ins:intrrupt
node:ins:mem_app
node:ins:mem_free
node:ins:mem_usage
node:ins:network_io
node:ins:network_rx
node:ins:network_tx
node:ins:pagefault
node:ins:pagein
node:ins:pageout
node:ins:sched_timeslices
node:ins:stdload1
node:ins:stdload15
node:ins:stdload5
node:ins:swap_usage
node:ins:swapin
node:ins:swapout
node:ins:tcp_active_opens
node:ins:tcp_dropped
node:ins:tcp_insegs
node:ins:tcp_outsegs
node:ins:tcp_overflow
node:ins:tcp_overflow_rate
node:ins:tcp_passive_opens
node:ins:tcp_retrans_rate
node:ins:tcp_retranssegs
node:ins:tcp_segs
node:uptime
node_arp_entries
node_boot_time_seconds
node_context_switches_total
node_cooling_device_cur_state
node_cooling_device_max_state
node_cpu_guest_seconds_total
node_cpu_seconds_total
node_disk_io_now
node_disk_io_time_seconds_total
node_disk_io_time_weighted_seconds_total
node_disk_read_bytes_total
node_disk_read_time_seconds_total
node_disk_reads_completed_total
node_disk_reads_merged_total
node_disk_write_time_seconds_total
node_disk_writes_completed_total
node_disk_writes_merged_total
node_disk_written_bytes_total
node_entropy_available_bits
node_exporter_build_info
node_filefd_allocated
node_filefd_maximum
node_filesystem_avail_bytes
node_filesystem_device_error
node_filesystem_files
node_filesystem_files_free
node_filesystem_free_bytes
node_filesystem_readonly
node_filesystem_size_bytes
node_forks_total
node_intr_total
node_ipvs_connections_total
node_ipvs_incoming_bytes_total
node_ipvs_incoming_packets_total
node_ipvs_outgoing_bytes_total
node_ipvs_outgoing_packets_total
node_load1
node_load15
node_load5
node_memory_Active_anon_bytes
node_memory_Active_bytes
node_memory_Active_file_bytes
node_memory_AnonHugePages_bytes
node_memory_AnonPages_bytes
node_memory_Bounce_bytes
node_memory_Buffers_bytes
node_memory_Cached_bytes
node_memory_CmaFree_bytes
node_memory_CmaTotal_bytes
node_memory_CommitLimit_bytes
node_memory_Committed_AS_bytes
node_memory_DirectMap2M_bytes
node_memory_DirectMap4k_bytes
node_memory_Dirty_bytes
node_memory_HardwareCorrupted_bytes
node_memory_HugePages_Free
node_memory_HugePages_Rsvd
node_memory_HugePages_Surp
node_memory_HugePages_Total
node_memory_Hugepagesize_bytes
node_memory_Inactive_anon_bytes
node_memory_Inactive_bytes
node_memory_Inactive_file_bytes
node_memory_KernelStack_bytes
node_memory_Mapped_bytes
node_memory_MemAvailable_bytes
node_memory_MemFree_bytes
node_memory_MemTotal_bytes
node_memory_Mlocked_bytes
node_memory_NFS_Unstable_bytes
node_memory_PageTables_bytes
node_memory_Percpu_bytes
node_memory_SReclaimable_bytes
node_memory_SUnreclaim_bytes
node_memory_Shmem_bytes
node_memory_Slab_bytes
node_memory_SwapCached_bytes
node_memory_SwapFree_bytes
node_memory_SwapTotal_bytes
node_memory_Unevictable_bytes
node_memory_VmallocChunk_bytes
node_memory_VmallocTotal_bytes
node_memory_VmallocUsed_bytes
node_memory_WritebackTmp_bytes
node_memory_Writeback_bytes
node_netstat_Icmp6_InErrors
node_netstat_Icmp6_InMsgs
node_netstat_Icmp6_OutMsgs
node_netstat_Icmp_InErrors
node_netstat_Icmp_InMsgs
node_netstat_Icmp_OutMsgs
node_netstat_Ip6_InOctets
node_netstat_Ip6_OutOctets
node_netstat_IpExt_InOctets
node_netstat_IpExt_OutOctets
node_netstat_Ip_Forwarding
node_netstat_TcpExt_ListenDrops
node_netstat_TcpExt_ListenOverflows
node_netstat_TcpExt_SyncookiesFailed
node_netstat_TcpExt_SyncookiesRecv
node_netstat_TcpExt_SyncookiesSent
node_netstat_TcpExt_TCPSynRetrans
node_netstat_Tcp_ActiveOpens
node_netstat_Tcp_CurrEstab
node_netstat_Tcp_InErrs
node_netstat_Tcp_InSegs
node_netstat_Tcp_OutSegs
node_netstat_Tcp_PassiveOpens
node_netstat_Tcp_RetransSegs
node_netstat_Udp6_InDatagrams
node_netstat_Udp6_InErrors
node_netstat_Udp6_NoPorts
node_netstat_Udp6_OutDatagrams
node_netstat_Udp6_RcvbufErrors
node_netstat_Udp6_SndbufErrors
node_netstat_UdpLite6_InErrors
node_netstat_UdpLite_InErrors
node_netstat_Udp_InDatagrams
node_netstat_Udp_InErrors
node_netstat_Udp_NoPorts
node_netstat_Udp_OutDatagrams
node_netstat_Udp_RcvbufErrors
node_netstat_Udp_SndbufErrors
node_network_address_assign_type
node_network_carrier
node_network_carrier_changes_total
node_network_device_id
node_network_dormant
node_network_flags
node_network_iface_id
node_network_iface_link
node_network_iface_link_mode
node_network_info
node_network_mtu_bytes
node_network_net_dev_group
node_network_protocol_type
node_network_receive_bytes_total
node_network_receive_compressed_total
node_network_receive_drop_total
node_network_receive_errs_total
node_network_receive_fifo_total
node_network_receive_frame_total
node_network_receive_multicast_total
node_network_receive_packets_total
node_network_transmit_bytes_total
node_network_transmit_carrier_total
node_network_transmit_colls_total
node_network_transmit_compressed_total
node_network_transmit_drop_total
node_network_transmit_errs_total
node_network_transmit_fifo_total
node_network_transmit_packets_total
node_network_transmit_queue_length
node_network_up
node_nf_conntrack_entries
node_nf_conntrack_entries_limit
node_ntp_leap
node_ntp_offset_seconds
node_ntp_reference_timestamp_seconds
node_ntp_root_delay_seconds
node_ntp_root_dispersion_seconds
node_ntp_rtt_seconds
node_ntp_sanity
node_ntp_stratum
node_power_supply_capacity
node_power_supply_cyclecount
node_power_supply_energy_full
node_power_supply_energy_full_design
node_power_supply_energy_watthour
node_power_supply_info
node_power_supply_online
node_power_supply_power_watt
node_power_supply_present
node_power_supply_voltage_min_design
node_power_supply_voltage_volt
node_processes_max_processes
node_processes_max_threads
node_processes_pids
node_processes_state
node_processes_threads
node_procs_blocked
node_procs_running
node_schedstat_running_seconds_total
node_schedstat_timeslices_total
node_schedstat_waiting_seconds_total
node_scrape_collector_duration_seconds
node_scrape_collector_success
node_sockstat_FRAG6_inuse
node_sockstat_FRAG6_memory
node_sockstat_FRAG_inuse
node_sockstat_FRAG_memory
node_sockstat_RAW6_inuse
node_sockstat_RAW_inuse
node_sockstat_TCP6_inuse
node_sockstat_TCP_alloc
node_sockstat_TCP_inuse
node_sockstat_TCP_mem
node_sockstat_TCP_mem_bytes
node_sockstat_TCP_orphan
node_sockstat_TCP_tw
node_sockstat_UDP6_inuse
node_sockstat_UDPLITE6_inuse
node_sockstat_UDPLITE_inuse
node_sockstat_UDP_inuse
node_sockstat_UDP_mem
node_sockstat_UDP_mem_bytes
node_sockstat_sockets_used
node_systemd_socket_accepted_connections_total
node_systemd_socket_current_connections
node_systemd_system_running
node_systemd_timer_last_trigger_seconds
node_systemd_unit_state
node_systemd_units
node_systemd_version
node_tcp_connection_states
node_textfile_scrape_error
node_time_seconds
node_timex_estimated_error_seconds
node_timex_frequency_adjustment_ratio
node_timex_loop_time_constant
node_timex_maxerror_seconds
node_timex_offset_seconds
node_timex_pps_calibration_total
node_timex_pps_error_total
node_timex_pps_frequency_hertz
node_timex_pps_jitter_seconds
node_timex_pps_jitter_total
node_timex_pps_shift_seconds
node_timex_pps_stability_exceeded_total
node_timex_pps_stability_hertz
node_timex_status
node_timex_sync_status
node_timex_tai_offset_seconds
node_timex_tick_seconds
node_udp_queues
node_uname_info
node_vmstat_pgfault
node_vmstat_pgmajfault
node_vmstat_pgpgin
node_vmstat_pgpgout
node_vmstat_pswpin
node_vmstat_pswpout
node_xfs_allocation_btree_compares_total
node_xfs_allocation_btree_lookups_total
node_xfs_allocation_btree_records_deleted_total
node_xfs_allocation_btree_records_inserted_total
node_xfs_block_map_btree_compares_total
node_xfs_block_map_btree_lookups_total
node_xfs_block_map_btree_records_deleted_total
node_xfs_block_map_btree_records_inserted_total
node_xfs_block_mapping_extent_list_compares_total
node_xfs_block_mapping_extent_list_deletions_total
node_xfs_block_mapping_extent_list_insertions_total
node_xfs_block_mapping_extent_list_lookups_total
node_xfs_block_mapping_reads_total
node_xfs_block_mapping_unmaps_total
node_xfs_block_mapping_writes_total
node_xfs_directory_operation_create_total
node_xfs_directory_operation_getdents_total
node_xfs_directory_operation_lookup_total
node_xfs_directory_operation_remove_total
node_xfs_extent_allocation_blocks_allocated_total
node_xfs_extent_allocation_blocks_freed_total
node_xfs_extent_allocation_extents_allocated_total
node_xfs_extent_allocation_extents_freed_total
node_xfs_read_calls_total
node_xfs_vnode_active_total
node_xfs_vnode_allocate_total
node_xfs_vnode_get_total
node_xfs_vnode_hold_total
node_xfs_vnode_reclaim_total
node_xfs_vnode_release_total
node_xfs_vnode_remove_total
node_xfs_write_calls_total
pg:all:active_backends
pg:all:age
pg:all:backends
pg:all:buf_alloc
pg:all:buf_flush
pg:all:commits
pg:all:commits_realtime
pg:all:ixact_backends
pg:all:lag_bytes
pg:all:lag_seconds
pg:all:qps_realtime
pg:all:rollbacks
pg:all:rollbacks_realtime
pg:all:sessions
pg:all:tps_realtime
pg:all:tup_deleted
pg:all:tup_inserted
pg:all:tup_modified
pg:all:tup_selected
pg:all:tup_touched
pg:all:tup_updated
pg:all:wal_rate
pg:all:xacts
pg:all:xacts_avg30m
pg:all:xacts_mu
pg:all:xacts_realtime
pg:all:xacts_sigma
pg:cls:active_backends
pg:cls:age
pg:cls:backends
pg:cls:buf_alloc
pg:cls:buf_flush
pg:cls:ckpt_1h
pg:cls:commits
pg:cls:commits_realtime
pg:cls:ixact_backends
pg:cls:lag_bytes
pg:cls:lag_seconds
pg:cls:leader
pg:cls:load0
pg:cls:load1
pg:cls:load15
pg:cls:load5
pg:cls:lock_count
pg:cls:locks
pg:cls:primarys
pg:cls:qps_realtime
pg:cls:replicas
pg:cls:rlock
pg:cls:rollbacks
pg:cls:rollbacks_realtime
pg:cls:saturation0
pg:cls:saturation1
pg:cls:saturation15
pg:cls:saturation5
pg:cls:sessions
pg:cls:size
pg:cls:synchronous
pg:cls:temp_bytes
pg:cls:temp_files
pg:cls:timeline
pg:cls:tps_realtime
pg:cls:tup_deleted
pg:cls:tup_inserted
pg:cls:tup_modified
pg:cls:tup_selected
pg:cls:tup_touched
pg:cls:tup_updated
pg:cls:wal_rate
pg:cls:wlock
pg:cls:xacts
pg:cls:xacts_avg30m
pg:cls:xacts_mu
pg:cls:xacts_realtime
pg:cls:xacts_sigma
pg:cls:xlock
pg:db:age_deriv_1h
pg:db:age_exhaust
pg:db:backends
pg:db:blks_access_1m
pg:db:blks_hit_1m
pg:db:blks_read_1m
pg:db:buffer_hit_rate
pg:db:commits
pg:db:commits_realtime
pg:db:io_time_usage
pg:db:lock_count
pg:db:locks
pg:db:pool_current_conn
pg:db:pool_disabled
pg:db:pool_max_conn
pg:db:pool_paused
pg:db:pool_reserve_size
pg:db:pool_size
pg:db:qps_realtime
pg:db:read_time_usage
pg:db:rlock
pg:db:rollbacks
pg:db:rollbacks_realtime
pg:db:sessions
pg:db:temp_bytes
pg:db:temp_files
pg:db:tps_realtime
pg:db:tup_deleted
pg:db:tup_inserted
pg:db:tup_modified
pg:db:tup_selected
pg:db:tup_touched
pg:db:tup_updated
pg:db:wlock
pg:db:write_time_usage
pg:db:xacts
pg:db:xacts_avg30m
pg:db:xacts_mu
pg:db:xacts_realtime
pg:db:xacts_sigma
pg:db:xlock
pg:ins:active_backends
pg:ins:age
pg:ins:backends
pg:ins:buf_alloc
pg:ins:buf_flush
pg:ins:buf_flush_backend
pg:ins:buf_flush_checkpoint
pg:ins:checkpoint_lsn
pg:ins:ckpt_req
pg:ins:ckpt_timed
pg:ins:commits
pg:ins:commits_realtime
pg:ins:free_clients
pg:ins:free_servers
pg:ins:hit_rate
pg:ins:ixact_backends
pg:ins:lag_bytes
pg:ins:lag_seconds
pg:ins:last_ckpt
pg:ins:load0
pg:ins:load1
pg:ins:load15
pg:ins:load5
pg:ins:lock_count
pg:ins:locks
pg:ins:login_clients
pg:ins:pool_databases
pg:ins:pool_users
pg:ins:pools
pg:ins:qps_realtime
pg:ins:query_rt
pg:ins:query_rt_avg30m
pg:ins:query_rt_mu
pg:ins:query_rt_sigma
pg:ins:query_time_rate15m
pg:ins:query_time_rate1m
pg:ins:query_time_rate5m
pg:ins:recv_init_lsn
pg:ins:recv_init_tli
pg:ins:recv_last_lsn
pg:ins:recv_last_tli
pg:ins:redo_lsn
pg:ins:rlock
pg:ins:rollbacks
pg:ins:rollbacks_realtime
pg:ins:saturation0
pg:ins:saturation1
pg:ins:saturation15
pg:ins:saturation5
pg:ins:sessions
pg:ins:slot_retained_bytes
pg:ins:temp_bytes
pg:ins:temp_files
pg:ins:tps_realtime
pg:ins:tup_deleted
pg:ins:tup_inserted
pg:ins:tup_modified
pg:ins:tup_selected
pg:ins:tup_touched
pg:ins:tup_updated
pg:ins:used_clients
pg:ins:wal_rate
pg:ins:wlock
pg:ins:xact_rt
pg:ins:xact_rt_avg30m
pg:ins:xact_rt_mu
pg:ins:xact_rt_sigma
pg:ins:xact_time_rate15m
pg:ins:xact_time_rate1m
pg:ins:xact_time_rate5m
pg:ins:xacts
pg:ins:xacts_avg30m
pg:ins:xacts_mu
pg:ins:xacts_realtime
pg:ins:xacts_sigma
pg:ins:xlock
pg:query:call
pg:query:rt
pg:svc:active_backends
pg:svc:backends
pg:svc:buf_alloc
pg:svc:buf_flush
pg:svc:commits
pg:svc:commits_realtime
pg:svc:ixact_backends
pg:svc:load0
pg:svc:load1
pg:svc:load15
pg:svc:load5
pg:svc:lock_count
pg:svc:locks
pg:svc:qps_realtime
pg:svc:query_rt
pg:svc:query_rt_avg30m
pg:svc:query_rt_mu
pg:svc:query_rt_sigma
pg:svc:rlock
pg:svc:rollbacks
pg:svc:rollbacks_realtime
pg:svc:sessions
pg:svc:temp_bytes
pg:svc:temp_files
pg:svc:tps_realtime
pg:svc:tup_deleted
pg:svc:tup_inserted
pg:svc:tup_modified
pg:svc:tup_selected
pg:svc:tup_touched
pg:svc:tup_updated
pg:svc:wlock
pg:svc:xact_rt
pg:svc:xact_rt_avg30m
pg:svc:xact_rt_mu
pg:svc:xact_rt_sigma
pg:svc:xacts
pg:svc:xacts_avg30m
pg:svc:xacts_mu
pg:svc:xacts_realtime
pg:svc:xacts_sigma
pg:svc:xlock
pg_activity_count
pg_activity_max_conn_duration
pg_activity_max_duration
pg_activity_max_tx_duration
pg_backend_count
pg_backup_time
pg_bgwriter_buffers_alloc
pg_bgwriter_buffers_backend
pg_bgwriter_buffers_backend_fsync
pg_bgwriter_buffers_checkpoint
pg_bgwriter_buffers_clean
pg_bgwriter_checkpoint_sync_time
pg_bgwriter_checkpoint_write_time
pg_bgwriter_checkpoints_req
pg_bgwriter_checkpoints_timed
pg_bgwriter_maxwritten_clean
pg_bgwriter_stats_reset
pg_boot_time
pg_checkpoint_checkpoint_lsn
pg_checkpoint_elapse
pg_checkpoint_full_page_writes
pg_checkpoint_newest_commit_ts_xid
pg_checkpoint_next_multi_offset
pg_checkpoint_next_multixact_id
pg_checkpoint_next_oid
pg_checkpoint_next_xid
pg_checkpoint_next_xid_epoch
pg_checkpoint_oldest_active_xid
pg_checkpoint_oldest_commit_ts_xid
pg_checkpoint_oldest_multi_dbid
pg_checkpoint_oldest_multi_xid
pg_checkpoint_oldest_xid
pg_checkpoint_oldest_xid_dbid
pg_checkpoint_prev_tli
pg_checkpoint_redo_lsn
pg_checkpoint_time
pg_checkpoint_tli
pg_class_relage
pg_class_relpages
pg_class_relsize
pg_class_reltuples
pg_conf_reload_time
pg_database_age
pg_database_allow_conn
pg_database_conn_limit
pg_database_frozen_xid
pg_database_is_template
pg_db_blk_read_time
pg_db_blk_write_time
pg_db_blks_access
pg_db_blks_hit
pg_db_blks_read
pg_db_checksum_failures
pg_db_checksum_last_failure
pg_db_confl_bufferpin
pg_db_confl_deadlock
pg_db_confl_lock
pg_db_confl_snapshot
pg_db_confl_tablespace
pg_db_conflicts
pg_db_deadlocks
pg_db_numbackends
pg_db_stats_reset
pg_db_temp_bytes
pg_db_temp_files
pg_db_tup_deleted
pg_db_tup_fetched
pg_db_tup_inserted
pg_db_tup_modified
pg_db_tup_returned
pg_db_tup_updated
pg_db_xact_commit
pg_db_xact_rollback
pg_db_xact_total
pg_downstream_count
pg_exporter_last_scrape_time
pg_exporter_query_cache_ttl
pg_exporter_query_scrape_duration
pg_exporter_query_scrape_error_count
pg_exporter_query_scrape_hit_count
pg_exporter_query_scrape_metric_count
pg_exporter_query_scrape_total_count
pg_exporter_scrape_duration
pg_exporter_scrape_error_count
pg_exporter_scrape_total_count
pg_exporter_server_scrape_duration
pg_exporter_server_scrape_total_count
pg_exporter_server_scrape_total_seconds
pg_exporter_up
pg_exporter_uptime
pg_flush_lsn
pg_func_calls
pg_func_self_time
pg_func_total_time
pg_in_recovery
pg_index_bloat_ratio
pg_index_bloat_size
pg_index_idx_blks_hit
pg_index_idx_blks_read
pg_index_idx_scan
pg_index_idx_tup_fetch
pg_index_idx_tup_read
pg_insert_lsn
pg_is_in_backup
pg_is_in_recovery
pg_is_primary
pg_is_replica
pg_is_wal_replay_paused
pg_lag
pg_last_replay_time
pg_lock_count
pg_lsn
pg_meta_info
pg_query_blk_io_time
pg_query_calls
pg_query_max_time
pg_query_mean_time
pg_query_min_time
pg_query_rows
pg_query_stddev_time
pg_query_total_time
pg_query_wal_bytes
pg_receive_lsn
pg_replay_lsn
pg_setting_block_size
pg_setting_data_checksums
pg_setting_max_connections
pg_setting_max_locks_per_transaction
pg_setting_max_prepared_transactions
pg_setting_max_replication_slots
pg_setting_max_wal_senders
pg_setting_max_worker_processes
pg_setting_wal_log_hints
pg_shmem_allocated_size
pg_shmem_offset
pg_shmem_size
pg_size_bytes
pg_slru_blks_exists
pg_slru_blks_hit
pg_slru_blks_read
pg_slru_blks_written
pg_slru_blks_zeroed
pg_slru_flushes
pg_slru_stats_reset
pg_slru_truncates
pg_status
pg_sync_standby_disabled
pg_sync_standby_enabled
pg_table_analyze_count
pg_table_autoanalyze_count
pg_table_autovacuum_count
pg_table_bloat_ratio
pg_table_bloat_size
pg_table_heap_blks_hit
pg_table_heap_blks_read
pg_table_idx_blks_hit
pg_table_idx_blks_read
pg_table_idx_scan
pg_table_idx_tup_fetch
pg_table_last_analyze
pg_table_last_autoanalyze
pg_table_last_autovacuum
pg_table_last_vacuum
pg_table_n_dead_tup
pg_table_n_live_tup
pg_table_n_mod_since_analyze
pg_table_n_tup_del
pg_table_n_tup_hot_upd
pg_table_n_tup_ins
pg_table_n_tup_mod
pg_table_n_tup_upd
pg_table_seq_scan
pg_table_seq_tup_read
pg_table_size_bytes
pg_table_size_indexsize
pg_table_size_relsize
pg_table_size_toastsize
pg_table_tbl_scan
pg_table_tidx_blks_hit
pg_table_tidx_blks_read
pg_table_toast_blks_hit
pg_table_toast_blks_read
pg_table_tup_read
pg_table_vacuum_count
pg_timeline
pg_timestamp
pg_up
pg_uptime
pg_version
pg_write_lsn
pg_xact_xmax
pg_xact_xmin
pg_xact_xnum
pgbouncer_database_current_connections
pgbouncer_database_disabled
pgbouncer_database_max_connections
pgbouncer_database_paused
pgbouncer_database_pool_size
pgbouncer_database_reserve_pool
pgbouncer_exporter_last_scrape_time
pgbouncer_exporter_query_cache_ttl
pgbouncer_exporter_query_scrape_duration
pgbouncer_exporter_query_scrape_error_count
pgbouncer_exporter_query_scrape_hit_count
pgbouncer_exporter_query_scrape_metric_count
pgbouncer_exporter_query_scrape_total_count
pgbouncer_exporter_scrape_duration
pgbouncer_exporter_scrape_error_count
pgbouncer_exporter_scrape_total_count
pgbouncer_exporter_server_scrape_duration
pgbouncer_exporter_server_scrape_total_count
pgbouncer_exporter_server_scrape_total_seconds
pgbouncer_exporter_up
pgbouncer_exporter_uptime
pgbouncer_in_recovery
pgbouncer_list_items
pgbouncer_pool_active_clients
pgbouncer_pool_active_servers
pgbouncer_pool_idle_servers
pgbouncer_pool_login_servers
pgbouncer_pool_maxwait
pgbouncer_pool_maxwait_us
pgbouncer_pool_tested_servers
pgbouncer_pool_used_servers
pgbouncer_pool_waiting_clients
pgbouncer_stat_avg_query_count
pgbouncer_stat_avg_query_time
pgbouncer_stat_avg_recv
pgbouncer_stat_avg_sent
pgbouncer_stat_avg_wait_time
pgbouncer_stat_avg_xact_count
pgbouncer_stat_avg_xact_time
pgbouncer_stat_total_query_count
pgbouncer_stat_total_query_time
pgbouncer_stat_total_received
pgbouncer_stat_total_sent
pgbouncer_stat_total_wait_time
pgbouncer_stat_total_xact_count
pgbouncer_stat_total_xact_time
pgbouncer_up
pgbouncer_version
process_cpu_seconds_total
process_max_fds
process_open_fds
process_resident_memory_bytes
process_start_time_seconds
process_virtual_memory_bytes
process_virtual_memory_max_bytes
promhttp_metric_handler_errors_total
promhttp_metric_handler_requests_in_flight
promhttp_metric_handler_requests_total
scrape_duration_seconds
scrape_samples_post_metric_relabeling
scrape_samples_scraped
scrape_series_added
up

4 - 衍生指标

Pigsty衍生监控指标的定义详情

这里是Pigsty所有衍生指标的定义规则。

机器节点聚合指标

---
  - name: node-rules
    rules:
      #==============================================================#
      #                         Aliveness                            #
      #==============================================================#
      # TODO: change this to your node exporter port
      - record: node_exporter_up
        expr: up{instance=~".*:9099"}
      - record: node:uptime
        expr: time() - node_boot_time_seconds{}


      #==============================================================#
      #                             CPU                              #
      #==============================================================#
      # cpu mode time ratio
      - record: node:cpu:cpu_mode
        expr: irate(node_cpu_seconds_total{}[1m])
      - record: node:ins:cpu_mode
        expr: sum without (cpu) (node:cpu:cpu_mode)
      - record: node:cls:cpu_mode
        expr: sum by (cls, mode) (node:ins:cpu_mode)

      # cpu schedule time-slices
      - record: node:cpu:sched_timeslices
        expr: irate(node_schedstat_timeslices_total{}[1m])
      - record: node:ins:sched_timeslices
        expr: sum without (cpu) (node:cpu:sched_timeslices)
      - record: node:cls:sched_timeslicesa
        expr: sum by (cls) (node:ins:sched_timeslices)

      # cpu count
      - record: node:ins:cpu_count
        expr: count without (cpu) (node:cpu:cpu_usage)
      - record: node:cls:cpu_count
        expr: sum by (cls) (node:ins:cpu_count)

      # cpu usage
      - record: node:cpu:cpu_usage
        expr: 1 - sum without (mode) (node:cpu:cpu_mode{mode="idle"})
      - record: node:ins:cpu_usage
        expr: sum without (cpu) (node:cpu:cpu_usage) / node:ins:cpu_count
      - record: node:cls:cpu_usage
        expr: sum by (cls) (node:ins:cpu_usage * node:ins:cpu_count) / sum by (cls) (node:ins:cpu_count)

      # cpu usage avg5m
      - record: node:cpu:cpu_usage_avg5m
        expr: avg_over_time(node:cpu:cpu_usage[5m])
      - record: node:ins:cpu_usage_avg5m
        expr: avg_over_time(node:ins:cpu_usage[5m])
      - record: node:cls:cpu_usage_avg5m
        expr: avg_over_time(node:cls:cpu_usage[5m])

      #==============================================================#
      #                            Memory                            #
      #==============================================================#
      # mem usage
      - record: node:ins:mem_app
        expr: node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_Slab_bytes - node_memory_PageTables_bytes - node_memory_SwapCached_bytes
      - record: node:ins:mem_free
        expr: node_memory_MemFree_bytes{} + node_memory_Cached_bytes{}
      - record: node:ins:mem_usage
        expr: node:ins:mem_app / node_memory_MemTotal_bytes
      - record: node:cls:mem_usage
        expr: sum by (cls) (node:ins:mem_app) / sum by (cls) (node_memory_MemTotal_bytes)
      - record: node:ins:swap_usage
        expr: 1 - node_memory_SwapFree_bytes{} / node_memory_SwapTotal_bytes{}


      #==============================================================#
      #                            Disk                              #
      #==============================================================#
      # disk read iops
      - record: node:dev:disk_read_iops
        expr: irate(node_disk_reads_completed_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_read_iops
        expr: sum without (device) (node:dev:disk_read_iops)
      - record: node:cls:disk_read_iops
        expr: sum by (cls) (node:ins:disk_read_iops)

      # disk write iops
      - record: node:dev:disk_write_iops
        expr: irate(node_disk_writes_completed_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_write_iops
        expr: sum without (device) (node:dev:disk_write_iops)
      - record: node:cls:disk_write_iops
        expr: sum by (cls) (node:ins:disk_write_iops)

      # disk iops
      - record: node:dev:disk_iops
        expr: node:dev:disk_read_iops + node:dev:disk_write_iops
      - record: node:ins:disk_iops
        expr: node:ins:disk_read_iops + node:ins:disk_write_iops
      - record: node:cls:disk_iops
        expr: node:cls:disk_read_iops + node:cls:disk_write_iops

      # read bandwidth (rate1m)
      - record: node:dev:disk_read_rate
        expr: rate(node_disk_read_bytes_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_read_rate
        expr: sum without (device) (node:dev:disk_read_rate)
      - record: node:cls:disk_read_rate
        expr: sum by (cls) (node:ins:disk_read_rate)

      # write bandwidth (rate1m)
      - record: node:dev:disk_write_rate
        expr: rate(node_disk_written_bytes_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_write_rate
        expr: sum without (device) (node:dev:disk_write_rate)
      - record: node:cls:disk_write_rate
        expr: sum by (cls) (node:ins:disk_write_rate)

      # io bandwidth (rate1m)
      - record: node:dev:disk_io_rate
        expr: node:dev:disk_read_rate + node:dev:disk_write_rate
      - record: node:ins:disk_io_rate
        expr: node:ins:disk_read_rate + node:ins:disk_write_rate
      - record: node:cls:disk_io_rate
        expr: node:cls:disk_read_rate + node:cls:disk_write_rate

      # read/write total time
      - record: node:dev:disk_read_time
        expr: rate(node_disk_read_time_seconds_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:dev:disk_write_time
        expr: rate(node_disk_read_time_seconds_total{device=~"[a-zA-Z-_]+"}[1m])

      # read/write response time
      - record: node:dev:disk_read_rt
        expr: node:dev:disk_read_time / node:dev:disk_read_iops
      - record: node:dev:disk_write_rt
        expr: node:dev:disk_write_time / node:dev:disk_write_iops
      - record: node:dev:disk_rt
        expr: (node:dev:disk_read_time + node:dev:disk_write_time) / node:dev:iops


      #==============================================================#
      #                            Network                           #
      #==============================================================#
      # transmit bandwidth (out)
      - record: node:dev:network_tx
        expr: irate(node_network_transmit_bytes_total{}[1m])
      - record: node:ins:network_tx
        expr: sum without (device) (node:dev:network_tx{device!~"lo|bond.*"})
      - record: node:cls:network_tx
        expr: sum by (cls) (node:ins:network_tx)

      # receive bandwidth (in)
      - record: node:dev:network_rx
        expr: irate(node_network_receive_bytes_total{}[1m])
      - record: node:ins:network_rx
        expr: sum without (device) (node:dev:network_rx{device!~"lo|bond.*"})
      - record: node:cls:network_rx
        expr: sum by (cls) (node:ins:network_rx)

      # io bandwidth
      - record: node:dev:network_io_rate
        expr: node:dev:network_tx + node:dev:network_rx
      - record: node:ins:network_io
        expr: node:ins:network_tx + node:ins:network_rx
      - record: node:cls:network_io
        expr: node:cls:network_tx + node:cls:network_rx


      #==============================================================#
      #                           Schedule                           #
      #==============================================================#
      # normalized load
      - record: node:ins:stdload1
        expr: node_load1 / node:ins:cpu_count
      - record: node:ins:stdload5
        expr: node_load5 / node:ins:cpu_count
      - record: node:ins:stdload15
        expr: node_load15 / node:ins:cpu_count

      # process
      - record: node:ins:forks
        expr: irate(node_forks_total[1m])
      # interrupt & context switch
      - record: node:ins:intrrupt
        expr: irate(node_intr_total[1m])
      - record: node:ins:ctx_switch
        expr: irate(node_context_switches_total{}[1m])


      #==============================================================#
      #                              VM                              #
      #==============================================================#
      - record: node:ins:pagefault
        expr: irate(node_vmstat_pgfault[1m])
      - record: node:ins:pagein
        expr: irate(node_vmstat_pgpgin[1m])
      - record: node:ins:pageout
        expr: irate(node_vmstat_pgpgout[1m])
      - record: node:ins:swapin
        expr: irate(node_vmstat_pswpin[1m])
      - record: node:ins:swapout
        expr: irate(node_vmstat_pswpout[1m])


      #==============================================================#
      #                              FS                              #
      #==============================================================#
      # filesystem space usage
      - record: node:fs:free_bytes
        expr: max without(device, fstype) (node_filesystem_free_bytes{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:avail_bytes
        expr: max without(device, fstype) (node_filesystem_avail_bytes{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:size_bytes
        expr: max without(device, fstype) (node_filesystem_size_bytes{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:space_usage
        expr: 1 - (node:fs:avail_bytes{} / node:fs:size_bytes{})
      - record: node:fs:free_inode
        expr: max without(device, fstype) (node_filesystem_files_free{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:total_inode
        expr: max without(device, fstype) (node_filesystem_files{fstype!~"(n|root|tmp)fs.*"})

      # space delta and prediction
      - record: node:fs:space_deriv_1h
        expr: 0 - deriv(node_filesystem_avail_bytes{}[1h])
      - record: node:fs:space_exhaust
        expr: (node_filesystem_avail_bytes{} / node:fs:space_deriv_1h{}) > 0

      # fs inode usage
      - record: node:fs:inode_usage
        expr: 1 - (node:fs:free_inode / node:fs:total_inode)
      # file descriptor usage
      - record: node:ins:fd_usage
        expr: node_filefd_allocated / node_filefd_maximum


      #==============================================================#
      #                             TCP                              #
      #==============================================================#
      # tcp segments (rate1m)
      - record: node:ins:tcp_insegs
        expr: rate(node_netstat_Tcp_InSegs{}[1m])
      - record: node:ins:tcp_outsegs
        expr: rate(node_netstat_Tcp_OutSegs{}[1m])
      - record: node:ins:tcp_retranssegs
        expr: rate(node_netstat_Tcp_RetransSegs{}[1m])
      - record: node:ins:tcp_segs
        expr: node:ins:tcp_insegs + node:ins:tcp_outsegs
      # retransmit
      - record: node:ins:tcp_retrans_rate
        expr: node:ins:tcp_retranssegs / node:ins:tcp_outsegs
      # overflow
      - record: node:ins:tcp_overflow_rate
        expr: rate(node_netstat_TcpExt_ListenOverflows[1m])


      #==============================================================#
      #                           Netstat                            #
      #==============================================================#
      # tcp open (rate1m)
      - record: node:ins:tcp_passive_opens
        expr: rate(node_netstat_Tcp_PassiveOpens[1m])
      - record: node:ins:tcp_active_opens
        expr: rate(node_netstat_Tcp_ActiveOpens[1m])
      # tcp close
      - record: node:ins:tcp_attempt_fails
        expr: rate(node_netstat_Tcp_AttemptFails[1m])
      - record: node:ins:tcp_estab_resets
        expr: rate(node_netstat_Tcp_EstabResets[1m])
      # tcp drop
      - record: node:ins:tcp_overflow
        expr: rate(node_netstat_TcpExt_ListenOverflows[1m])
      - record: node:ins:tcp_dropped
        expr: rate(node_netstat_TcpExt_ListenDrops[1m])


      #==============================================================#
      #                             NTP                              #
      #==============================================================#
      - record: node:cls:ntp_offset_range
        expr: max by (cls)(node_ntp_offset_seconds) - min by (cls)(node_ntp_offset_seconds)

...

数据库与连接池聚合指标

---
#==============================================================#
# File      :   pgsql.yml
# Ctime     :   2020-04-22
# Mtime     :   2020-12-03
# Desc      :   Record and alert rules for postgres
# Path      :   /etc/prometheus/rules/pgsql.yml
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

groups:

  ################################################################
  #                         PgSQL Rules                          #
  ################################################################
  - name: pgsql-rules
    rules:

      #==============================================================#
      #                        Aliveness                             #
      #==============================================================#
      # TODO: change these to your pg_exporter & pgbouncer_exporter port
      - record: pg_exporter_up
        expr: up{instance=~".*:9185"}

      - record: pgbouncer_exporter_up
        expr: up{instance=~".*:9127"}


      #==============================================================#
      #                        Identity                              #
      #==============================================================#
      - record: pg_is_primary
        expr: 1 - pg_in_recovery
      - record: pg_is_replica
        expr: pg_in_recovery
      - record: pg_status
        expr: (pg_up{} * 2) +  (1 - pg_in_recovery{})
      # encoded: 0:replica[DOWN] 1:primary[DOWN] 2:replica 3:primary


      #==============================================================#
      #                            Age                               #
      #==============================================================#
      # age
      - record: pg:ins:age
        expr: max without (datname) (pg_database_age{datname!~"template[0-9]"})
      - record: pg:cls:age
        expr: max by (cls) (pg:ins:age)
      - record: pg:all:age
        expr: max(pg:cls:age)

      # age derive and prediction
      - record: pg:db:age_deriv_1h
        expr: deriv(pg_database_age{}[1h])
      - record: pg:db:age_exhaust
        expr: (2147483648 - pg_database_age{}) / pg:db:age_deriv_1h



      #==============================================================#
      #                         Sessions                             #
      #==============================================================#
      # session count (by state)
      - record: pg:db:sessions
        expr: pg_activity_count
      - record: pg:ins:sessions
        expr: sum without (datname) (pg:db:sessions)
      - record: pg:svc:sessions
        expr: sum by (cls, role, state) (pg:ins:sessions)
      - record: pg:cls:sessions
        expr: sum by (cls, state) (pg:ins:sessions)
      - record: pg:all:sessions
        expr: sum by (state) (pg:cls:sessions)

      # backends
      - record: pg:db:backends
        expr: pg_db_numbackends
      - record: pg:ins:backends
        expr: sum without (datname) (pg_db_numbackends)
      - record: pg:svc:backends
        expr: sum by (cls, role) (pg:ins:backends)
      - record: pg:cls:backends
        expr: sum by (cls) (pg:ins:backends)
      - record: pg:all:backends
        expr: sum(pg:cls:backends)

      # active backends
      - record: pg:ins:active_backends
        expr: pg:ins:sessions{state="active"}
      - record: pg:svc:active_backends
        expr: sum by (cls, role) (pg:ins:active_backends)
      - record: pg:cls:active_backends
        expr: sum by (cls) (pg:ins:active_backends)
      - record: pg:all:active_backends
        expr: sum(pg:cls:active_backends)

      # idle in xact backends (including abort)
      - record: pg:ins:ixact_backends
        expr: pg:ins:sessions{state=~"idle in.*"}
      - record: pg:svc:ixact_backends
        expr: sum by (cls, role) (pg:ins:active_backends)
      - record: pg:cls:ixact_backends
        expr: sum by (cls) (pg:ins:active_backends)
      - record: pg:all:ixact_backends
        expr: sum(pg:cls:active_backends)


      #==============================================================#
      #                    Servers (Pgbouncer)                       #
      #==============================================================#

      # active servers
      - record: pg:pool:active_servers
        expr: pgbouncer_pool_active_servers{datname!="pgbouncer"}
      - record: pg:db:active_servers
        expr: sum without(user) (pg:pool:active_servers)
      - record: pg:ins:active_servers
        expr: sum without(user, datname) (pg:pool:active_servers)
      - record: pg:svc:active_servers
        expr: sum by (cls, role) (pg:ins:active_servers)
      - record: pg:cls:active_servers
        expr: sum by (cls) (pg:ins:active_servers)
      - record: pg:all:active_servers
        expr: sum(pg:cls:active_servers)

      # idle servers
      - record: pg:pool:idle_servers
        expr: pgbouncer_pool_idle_servers{datname!="pgbouncer"}
      - record: pg:db:idle_servers
        expr: sum without(user) (pg:pool:idle_servers)
      - record: pg:ins:idle_servers
        expr: sum without(user, datname) (pg:pool:idle_servers)
      - record: pg:svc:idle_servers
        expr: sum by (cls, role) (pg:ins:idle_servers)
      - record: pg:cls:idle_servers
        expr: sum by (cls) (pg:ins:idle_servers)
      - record: pg:all:idle_servers
        expr: sum(pg:cls:idle_servers)

      # used servers
      - record: pg:pool:used_servers
        expr: pgbouncer_pool_used_servers{datname!="pgbouncer"}
      - record: pg:db:used_servers
        expr: sum without(user) (pg:pool:used_servers)
      - record: pg:ins:used_servers
        expr: sum without(user, datname) (pg:pool:used_servers)
      - record: pg:svc:used_servers
        expr: sum by (cls, role) (pg:ins:used_servers)
      - record: pg:cls:used_servers
        expr: sum by (cls) (pg:ins:used_servers)
      - record: pg:all:used_servers
        expr: sum(pg:cls:used_servers)

      # tested servers
      - record: pg:pool:tested_servers
        expr: pgbouncer_pool_tested_servers{datname!="pgbouncer"}
      - record: pg:db:tested_servers
        expr: sum without(user) (pg:pool:tested_servers)
      - record: pg:ins:tested_servers
        expr: sum without(user, datname) (pg:pool:tested_servers)
      - record: pg:svc:tested_servers
        expr: sum by (cls, role) (pg:ins:tested_servers)
      - record: pg:cls:tested_servers
        expr: sum by (cls) (pg:ins:tested_servers)
      - record: pg:all:tested_servers
        expr: sum(pg:cls:tested_servers)

      # login servers
      - record: pg:pool:login_servers
        expr: pgbouncer_pool_login_servers{datname!="pgbouncer"}
      - record: pg:db:login_servers
        expr: sum without(user) (pg:pool:login_servers)
      - record: pg:ins:login_servers
        expr: sum without(user, datname) (pg:pool:login_servers)
      - record: pg:svc:login_servers
        expr: sum by (cls, role) (pg:ins:login_servers)
      - record: pg:cls:login_servers
        expr: sum by (cls) (pg:ins:login_servers)
      - record: pg:all:login_servers
        expr: sum(pg:cls:login_servers)



      #==============================================================#
      #                   Clients (Pgbouncer)                        #
      #==============================================================#
      # active clients
      - record: pg:pool:active_clients
        expr: pgbouncer_pool_active_clients{datname!="pgbouncer"}
      - record: pg:db:active_clients
        expr: sum without(user) (pg:pool:active_clients)
      - record: pg:ins:active_clients
        expr: sum without(user, datname) (pg:pool:active_clients)
      - record: pg:svc:active_clients
        expr: sum by (cls, role) (pg:ins:active_clients)
      - record: pg:cls:active_clients
        expr: sum by (cls) (pg:ins:active_clients)
      - record: pg:all:active_clients
        expr: sum(pg:cls:active_clients)

      # waiting clients
      - record: pg:pool:waiting_clients
        expr: pgbouncer_pool_waiting_clients{datname!="pgbouncer"}
      - record: pg:db:waiting_clients
        expr: sum without(user) (pg:pool:waiting_clients)
      - record: pg:ins:waiting_clients
        expr: sum without(user, datname) (pg:pool:waiting_clients)
      - record: pg:svc:waiting_clients
        expr: sum by (cls, role) (pg:ins:waiting_clients)
      - record: pg:cls:waiting_clients
        expr: sum by (cls) (pg:ins:waiting_clients)
      - record: pg:all:waiting_clients
        expr: sum(pg:cls:waiting_clients)


      #==============================================================#
      #                       Transactions                           #
      #==============================================================#
      # commits (realtime)
      - record: pg:db:commits_realtime
        expr: irate(pg_db_xact_commit{}[1m])
      - record: pg:ins:commits_realtime
        expr: sum without (datname) (pg:db:commits_realtime)
      - record: pg:svc:commits_realtime
        expr: sum by (cls, role) (pg:ins:commits_realtime)
      - record: pg:cls:commits_realtime
        expr: sum by (cls) (pg:ins:commits_realtime)
      - record: pg:all:commits_realtime
        expr: sum(pg:cls:commits_realtime)

      # commits (rate1m)
      - record: pg:db:commits
        expr: rate(pg_db_xact_commit{}[1m])
      - record: pg:ins:commits
        expr: sum without (datname) (pg:db:commits)
      - record: pg:svc:commits
        expr: sum by (cls, role) (pg:ins:commits)
      - record: pg:cls:commits
        expr: sum by (cls) (pg:ins:commits)
      - record: pg:all:commits
        expr: sum(pg:cls:commits)

      # rollbacks realtime
      - record: pg:db:rollbacks_realtime
        expr: irate(pg_db_xact_rollback{}[1m])
      - record: pg:ins:rollbacks_realtime
        expr: sum without (datname) (pg:db:rollbacks_realtime)
      - record: pg:svc:rollbacks_realtime
        expr: sum by (cls, role) (pg:ins:rollbacks_realtime)
      - record: pg:cls:rollbacks_realtime
        expr: sum by (cls) (pg:ins:rollbacks_realtime)
      - record: pg:all:rollbacks_realtime
        expr: sum(pg:cls:rollbacks_realtime)
      # rollbacks
      - record: pg:db:rollbacks
        expr: rate(pg_db_xact_rollback{}[1m])
      - record: pg:ins:rollbacks
        expr: sum without (datname) (pg:db:rollbacks)
      - record: pg:svc:rollbacks
        expr: sum by (cls, role) (pg:ins:rollbacks)
      - record: pg:cls:rollbacks
        expr: sum by (cls) (pg:ins:rollbacks)
      - record: pg:all:rollbacks
        expr: sum(pg:cls:rollbacks)

      # xacts (realtime)
      - record: pg:db:xacts_realtime
        expr: irate(pg_db_xact_commit{}[1m])
      - record: pg:ins:xacts_realtime
        expr: sum without (datname) (pg:db:xacts_realtime)
      - record: pg:svc:xacts_realtime
        expr: sum by (cls, role) (pg:ins:xacts_realtime)
      - record: pg:cls:xacts_realtime
        expr: sum by (cls) (pg:ins:xacts_realtime)
      - record: pg:all:xacts_realtime
        expr: sum(pg:cls:xacts_realtime)
      # xacts (rate1m)
      - record: pg:db:xacts
        expr: rate(pg_db_xact_commit{}[1m])
      - record: pg:ins:xacts
        expr: sum without (datname) (pg:db:xacts)
      - record: pg:svc:xacts
        expr: sum by (cls, role) (pg:ins:xacts)
      - record: pg:cls:xacts
        expr: sum by (cls) (pg:ins:xacts)
      - record: pg:all:xacts
        expr: sum(pg:cls:xacts)
      # xacts avg30m
      - record: pg:db:xacts_avg30m
        expr: avg_over_time(pg:db:xacts[30m])
      - record: pg:ins:xacts_avg30m
        expr: avg_over_time(pg:ins:xacts[30m])
      - record: pg:svc:xacts_avg30m
        expr: avg_over_time(pg:svc:xacts[30m])
      - record: pg:cls:xacts_avg30m
        expr: avg_over_time(pg:cls:xacts[30m])
      - record: pg:all:xacts_avg30m
        expr: avg_over_time(pg:all:xacts[30m])
      # xacts µ
      - record: pg:db:xacts_mu
        expr: avg_over_time(pg:db:xacts_avg30m[30m])
      - record: pg:ins:xacts_mu
        expr: avg_over_time(pg:ins:xacts_avg30m[30m])
      - record: pg:svc:xacts_mu
        expr: avg_over_time(pg:svc:xacts_avg30m[30m])
      - record: pg:cls:xacts_mu
        expr: avg_over_time(pg:cls:xacts_avg30m[30m])
      - record: pg:all:xacts_mu
        expr: avg_over_time(pg:all:xacts_avg30m[30m])
      # xacts σ: sigma
      - record: pg:db:xacts_sigma
        expr: stddev_over_time(pg:db:xacts[30m])
      - record: pg:ins:xacts_sigma
        expr: stddev_over_time(pg:ins:xacts[30m])
      - record: pg:svc:xacts_sigma
        expr: stddev_over_time(pg:svc:xacts[30m])
      - record: pg:cls:xacts_sigma
        expr: stddev_over_time(pg:cls:xacts[30m])
      - record: pg:all:xacts_sigma
        expr: stddev_over_time(pg:all:xacts[30m])


      #==============================================================#
      #                      TPS (Pgbouncer)                         #
      #==============================================================#
      # TPS realtime (irate1m)
      - record: pg:db:tps_realtime
        expr: irate(pgbouncer_stat_total_xact_count{}[1m])
      - record: pg:ins:tps_realtime
        expr: sum without(datname) (pg:db:tps_realtime{})
      - record: pg:svc:tps_realtime
        expr: sum by(cls, role) (pg:ins:tps_realtime{})
      - record: pg:cls:tps_realtime
        expr: sum by(cls) (pg:ins:tps_realtime{})
      - record: pg:all:tps_realtime
        expr: sum(pg:cls:tps_realtime{})

      # TPS (rate1m)
      - record: pg:db:tps
        expr: pgbouncer_stat_avg_xact_count{datname!="pgbouncer"}
      - record: pg:ins:tps
        expr: sum without(datname) (pg:db:tps)
      - record: pg:svc:tps
        expr: sum by (cls, role) (pg:ins:tps)
      - record: pg:cls:tps
        expr: sum by(cls) (pg:ins:tps)
      - record: pg:all:tps
        expr: sum(pg:cls:tps)
      # tps : avg30m
      - record: pg:db:tps_avg30m
        expr: avg_over_time(pg:db:tps[30m])
      - record: pg:ins:tps_avg30m
        expr: avg_over_time(pg:ins:tps[30m])
      - record: pg:svc:tps_avg30m
        expr: avg_over_time(pg:svc:tps[30m])
      - record: pg:cls:tps_avg30m
        expr: avg_over_time(pg:cls:tps[30m])
      - record: pg:all:tps_avg30m
        expr: avg_over_time(pg:all:tps[30m])
      # tps µ
      - record: pg:db:tps_mu
        expr: avg_over_time(pg:db:tps_avg30m[30m])
      - record: pg:ins:tps_mu
        expr: avg_over_time(pg:ins:tps_avg30m[30m])
      - record: pg:svc:tps_mu
        expr: avg_over_time(pg:svc:tps_avg30m[30m])
      - record: pg:cls:tps_mu
        expr: avg_over_time(pg:cls:tps_avg30m[30m])
      - record: pg:all:tps_mu
        expr: avg_over_time(pg:all:tps_avg30m[30m])
      # tps σ
      - record: pg:db:tps_sigma
        expr: stddev_over_time(pg:db:tps[30m])
      - record: pg:ins:tps_sigma
        expr: stddev_over_time(pg:ins:tps[30m])
      - record: pg:svc:tps_sigma
        expr: stddev_over_time(pg:svc:tps[30m])
      - record: pg:cls:tps_sigma
        expr: stddev_over_time(pg:cls:tps[30m])
      - record: pg:all:tps_sigma
        expr: stddev_over_time(pg:all:tps[30m])

      # xact rt (rate1m)
      - record: pg:db:xact_rt
        expr: pgbouncer_stat_avg_xact_time{datname!="pgbouncer"} / 1000000
      - record: pg:ins:xact_rt
        expr: sum without(datname) (rate(pgbouncer_stat_total_xact_time[1m])) / sum without(datname) (rate(pgbouncer_stat_total_xact_count[1m])) / 1000000
      - record: pg:svc:xact_rt
        expr: sum by (cls, role) (rate(pgbouncer_stat_total_xact_time[1m])) / sum by (cls, role) (rate(pgbouncer_stat_total_xact_count[1m])) / 1000000
      # xact_rt avg30m
      - record: pg:db:xact_rt_avg30m
        expr: avg_over_time(pg:db:xact_rt[30m])
      - record: pg:ins:xact_rt_avg30m
        expr: avg_over_time(pg:ins:xact_rt[30m])
      - record: pg:svc:xact_rt_avg30m
        expr: avg_over_time(pg:svc:xact_rt[30m])
      # xact_rt µ
      - record: pg:db:xact_rt_mu
        expr: avg_over_time(pg:db:xact_rt_avg30m[30m])
      - record: pg:ins:xact_rt_mu
        expr: avg_over_time(pg:ins:xact_rt_avg30m[30m])
      - record: pg:svc:xact_rt_mu
        expr: avg_over_time(pg:svc:xact_rt_avg30m[30m])

      # xact_rt σ: stddev30m
      - record: pg:db:xact_rt_sigma
        expr: stddev_over_time(pg:db:xact_rt[30m])
      - record: pg:ins:xact_rt_sigma
        expr: stddev_over_time(pg:ins:xact_rt[30m])
      - record: pg:svc:xact_rt_sigma
        expr: stddev_over_time(pg:svc:xact_rt[30m])



      #==============================================================#
      #                     QPS (Pgbouncer)                          #
      #==============================================================#
      # QPS realtime (irate1m)
      - record: pg:db:qps_realtime
        expr: irate(pgbouncer_stat_total_query_count{}[1m])
      - record: pg:ins:qps_realtime
        expr: sum without(datname) (pg:db:qps_realtime{})
      - record: pg:svc:qps_realtime
        expr: sum by(cls, role) (pg:ins:qps_realtime{})
      - record: pg:cls:qps_realtime
        expr: sum by(cls) (pg:ins:qps_realtime{})
      - record: pg:all:qps_realtime
        expr: sum(pg:cls:qps_realtime{})
      # qps (rate1m)
      - record: pg:db:qps
        expr: pgbouncer_stat_avg_query_count{datname!="pgbouncer"}
      - record: pg:ins:qps
        expr: sum without(datname) (pg:db:qps)
      - record: pg:svc:qps
        expr: sum by (cls, role) (pg:ins:qps)
      - record: pg:cls:qps
        expr: sum by(cls) (pg:ins:qps)
      - record: pg:all:qps
        expr: sum(pg:cls:qps)

      # qps avg30m
      - record: pg:db:qps_avg30m
        expr: avg_over_time(pg:db:qps[30m])
      - record: pg:ins:qps_avg30m
        expr: avg_over_time(pg:ins:qps[30m])
      - record: pg:svc:qps_avg30m
        expr: avg_over_time(pg:svc:qps[30m])
      - record: pg:cls:qps_avg30m
        expr: avg_over_time(pg:cls:qps[30m])
      - record: pg:all:qps_avg30m
        expr: avg_over_time(pg:all:qps[30m])
      # qps µ
      - record: pg:db:qps_mu
        expr: avg_over_time(pg:db:qps_avg30m[30m])
      - record: pg:ins:qps_mu
        expr: avg_over_time(pg:ins:qps_avg30m[30m])
      - record: pg:svc:qps_mu
        expr: avg_over_time(pg:svc:qps_avg30m[30m])
      - record: pg:cls:qps_mu
        expr: avg_over_time(pg:cls:qps_avg30m[30m])
      - record: pg:all:qps_mu
        expr: avg_over_time(pg:all:qps_avg30m[30m])
      # qps σ: stddev30m qps
      - record: pg:db:qps_sigma
        expr: stddev_over_time(pg:db:qps[30m])
      - record: pg:ins:qps_sigma
        expr: stddev_over_time(pg:ins:qps[30m])
      - record: pg:svc:qps_sigma
        expr: stddev_over_time(pg:svc:qps[30m])
      - record: pg:cls:qps_sigma
        expr: stddev_over_time(pg:cls:qps[30m])
      - record: pg:all:qps_sigma
        expr: stddev_over_time(pg:all:qps[30m])
      # query rt (1m avg)
      - record: pg:db:query_rt
        expr: pgbouncer_stat_avg_query_time{datname!="pgbouncer"} / 1000000
      - record: pg:ins:query_rt
        expr: sum without(datname) (rate(pgbouncer_stat_total_query_time[1m])) / sum without(datname) (rate(pgbouncer_stat_total_query_count[1m])) / 1000000
      - record: pg:svc:query_rt
        expr: sum by (cls, role) (rate(pgbouncer_stat_total_query_time[1m])) / sum by (cls, role) (rate(pgbouncer_stat_total_query_count[1m])) / 1000000
      # query_rt avg30m
      - record: pg:db:query_rt_avg30m
        expr: avg_over_time(pg:db:query_rt[30m])
      - record: pg:ins:query_rt_avg30m
        expr: avg_over_time(pg:ins:query_rt[30m])
      - record: pg:svc:query_rt_avg30m
        expr: avg_over_time(pg:svc:query_rt[30m])
      # query_rt µ
      - record: pg:db:query_rt_mu
        expr: avg_over_time(pg:db:query_rt_avg30m[30m])
      - record: pg:ins:query_rt_mu
        expr: avg_over_time(pg:ins:query_rt_avg30m[30m])
      - record: pg:svc:query_rt_mu
        expr: avg_over_time(pg:svc:query_rt_avg30m[30m])
      # query_rt σ: stddev30m
      - record: pg:db:query_rt_sigma
        expr: stddev_over_time(pg:db:query_rt[30m])
      - record: pg:ins:query_rt_sigma
        expr: stddev_over_time(pg:ins:query_rt[30m])
      - record: pg:svc:query_rt_sigma
        expr: stddev_over_time(pg:svc:query_rt[30m])


      #==============================================================#
      #                        PG Load                               #
      #==============================================================#
      # seconds spend on transaction in last minute
      - record: pg:ins:xact_time_rate1m
        expr: sum without (datname) (rate(pgbouncer_stat_total_xact_time{}[1m])) / 1000000
      - record: pg:ins:xact_time_rate5m
        expr: sum without (datname) (rate(pgbouncer_stat_total_xact_time{}[5m])) / 1000000
      - record: pg:ins:xact_time_rate15m
        expr: sum without (datname) (rate(pgbouncer_stat_total_xact_time{}[15m])) / 1000000

      # seconds spend on queries in last minute
      - record: pg:ins:query_time_rate1m
        expr: sum without (datname) (rate(pgbouncer_stat_total_query_time{}[1m])) / 1000000
      - record: pg:ins:query_time_rate5m
        expr: sum without (datname) (rate(pgbouncer_stat_total_query_time{}[5m])) / 1000000
      - record: pg:ins:query_time_rate15m
        expr: sum without (datname) (rate(pgbouncer_stat_total_query_time{}[15m])) / 1000000

      # instance level load
      - record: pg:ins:load0
        expr: sum without (datname) (irate(pgbouncer_stat_total_xact_time{}[1m])) / on (ip) group_left()  node:ins:cpu_count / 1000000
      - record: pg:ins:load1
        expr: pg:ins:xact_time_rate1m  / on (ip) group_left()  node:ins:cpu_count
      - record: pg:ins:load5
        expr: pg:ins:xact_time_rate5m  / on (ip) group_left()  node:ins:cpu_count
      - record: pg:ins:load15
        expr: pg:ins:xact_time_rate15m  / on (ip) group_left()  node:ins:cpu_count

      # service level load
      - record: pg:svc:load0
        expr: sum by (svc, cls, role) (irate(pgbouncer_stat_total_xact_time{}[1m])) / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000
      - record: pg:svc:load1
        expr: sum by (svc, cls, role) (pg:ins:xact_time_rate1m)  / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000
      - record: pg:svc:load5
        expr: sum by (svc, cls, role) (pg:ins:xact_time_rate5m)  / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000
      - record: pg:svc:load15
        expr: sum by (svc, cls, role) (pg:ins:xact_time_rate15m)  / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000

      # cluster level load
      - record: pg:cls:load0
        expr: sum by (cls) (irate(pgbouncer_stat_total_xact_time{}[1m])) / on (cls) node:cls:cpu_count{} / 1000000
      - record: pg:cls:load1
        expr: sum by (cls) (pg:ins:xact_time_rate1m)  / on (cls) node:cls:cpu_count
      - record: pg:cls:load5
        expr: sum by (cls) (pg:ins:xact_time_rate5m)  / on (cls) node:cls:cpu_count
      - record: pg:cls:load15
        expr: sum by (cls) (pg:ins:xact_time_rate15m)  / on (cls) node:cls:cpu_count


      #==============================================================#
      #                     PG Saturation                            #
      #==============================================================#
      # max value of pg_load and cpu_usage

      # instance level saturation
      - record: pg:ins:saturation0
        expr: pg:ins:load0 > node:ins:cpu_usage or node:ins:cpu_usage
      - record: pg:ins:saturation1
        expr: pg:ins:load1 > node:ins:cpu_usage or node:ins:cpu_usage
      - record: pg:ins:saturation5
        expr: pg:ins:load5 > node:ins:cpu_usage or node:ins:cpu_usage
      - record: pg:ins:saturation15
        expr: pg:ins:load15 > node:ins:cpu_usage or node:ins:cpu_usage

      # cluster level saturation
      - record: pg:cls:saturation0
        expr: pg:cls:load0 > node:cls:cpu_usage or node:cls:cpu_usage
      - record: pg:cls:saturation1
        expr: pg:cls:load1 > node:cls:cpu_usage or node:cls:cpu_usage
      - record: pg:cls:saturation5
        expr: pg:cls:load5 > node:cls:cpu_usage or node:cls:cpu_usage
      - record: pg:cls:saturation15
        expr: pg:cls:load15 > node:cls:cpu_usage or node:cls:cpu_usage


      #==============================================================#
      #                          CRUD                                #
      #==============================================================#
      # rows touched
      - record: pg:db:tup_touched
        expr: irate(pg_db_tup_fetched{}[1m])
      - record: pg:ins:tup_touched
        expr: sum without(datname) (pg:db:tup_touched)
      - record: pg:svc:tup_touched
        expr: sum by (cls, role) (pg:ins:tup_touched)
      - record: pg:cls:tup_touched
        expr: sum by (cls) (pg:ins:tup_touched)
      - record: pg:all:tup_touched
        expr: sum(pg:cls:tup_touched)

      # selected
      - record: pg:db:tup_selected
        expr: irate(pg_db_tup_returned{}[1m])
      - record: pg:ins:tup_selected
        expr: sum without(datname) (pg:db:tup_selected)
      - record: pg:svc:tup_selected
        expr: sum by (cls, role) (pg:ins:tup_selected)
      - record: pg:cls:tup_selected
        expr: sum by (cls) (pg:ins:tup_selected)
      - record: pg:all:tup_selected
        expr: sum(pg:cls:tup_selected)

      # inserted
      - record: pg:db:tup_inserted
        expr: irate(pg_db_tup_inserted{}[1m])
      - record: pg:ins:tup_inserted
        expr: sum without(datname) (pg:db:tup_inserted)
      - record: pg:svc:tup_inserted
        expr: sum by (cls, role) (pg:ins:tup_inserted)
      - record: pg:cls:tup_inserted
        expr: sum by (cls) (pg:ins:tup_inserted{role="primary"})
      - record: pg:all:tup_inserted
        expr: sum(pg:cls:tup_inserted)

      # updated
      - record: pg:db:tup_updated
        expr: irate(pg_db_tup_updated{}[1m])
      - record: pg:ins:tup_updated
        expr: sum without(datname) (pg:db:tup_updated)
      - record: pg:svc:tup_updated
        expr: sum by (cls, role) (pg:ins:tup_updated)
      - record: pg:cls:tup_updated
        expr: sum by (cls) (pg:ins:tup_updated{role="primary"})
      - record: pg:all:tup_updated
        expr: sum(pg:cls:tup_updated)

      # deleted
      - record: pg:db:tup_deleted
        expr: irate(pg_db_tup_deleted{}[1m])
      - record: pg:ins:tup_deleted
        expr: sum without(datname) (pg:db:tup_deleted)
      - record: pg:svc:tup_deleted
        expr: sum by (cls, role) (pg:ins:tup_deleted)
      - record: pg:cls:tup_deleted
        expr: sum by (cls) (pg:ins:tup_deleted{role="primary"})
      - record: pg:all:tup_deleted
        expr: sum(pg:cls:tup_deleted)

      # modified
      - record: pg:db:tup_modified
        expr: irate(pg_db_tup_modified{}[1m])
      - record: pg:ins:tup_modified
        expr: sum without(datname) (pg:db:tup_modified)
      - record: pg:svc:tup_modified
        expr: sum by (cls, role) (pg:ins:tup_modified)
      - record: pg:cls:tup_modified
        expr: sum by (cls) (pg:ins:tup_modified{role="primary"})
      - record: pg:all:tup_modified
        expr: sum(pg:cls:tup_deleted)


      #==============================================================#
      #                      Object Access                           #
      #==============================================================#
      # table access
      - record: pg:table:idx_scan
        expr: rate(pg_table_idx_scan{}[1m])
      - record: pg:table:seq_scan
        expr: rate(pg_table_seq_scan{}[1m])
      - record: pg:table:qps_realtime
        expr: irate(pg_table_idx_scan{}[1m])

      # index access
      - record: pg:index:idx_scan
        expr: rate(pg_index_idx_scan{}[1m])
      - record: pg:index:qps_realtime
        expr: irate(pg_index_idx_scan{}[1m])

      # func access
      - record: pg:func:call
        expr: rate(pg_func_calls{}[1m])
      - record: pg:func:rt
        expr: rate(pg_func_total_time{}[1m]) / pg:func:call

      # query access
      - record: pg:query:call
        expr: rate(pg_query_calls{}[1m])
      - record: pg:query:rt
        expr: rate(pg_query_total_time{}[1m]) / pg:query:call / 1000



      #==============================================================#
      #                        Blocks IO                             #
      #==============================================================#
      # blocks read/hit/access in 1min
      - record: pg:db:blks_read_1m
        expr: increase(pg_db_blks_read{}[1m])
      - record: pg:db:blks_hit_1m
        expr: increase(pg_db_blks_hit{}[1m])
      - record: pg:db:blks_access_1m
        expr: increase(pg_db_blks_access{}[1m])

      # buffer hit rate (1m)
      - record: pg:db:buffer_hit_rate
        expr: pg:db:blks_hit_1m / pg:db:blks_access_1m
      - record: pg:ins:hit_rate
        expr: sum without(datname) (pg:db:blks_hit_1m) / sum without(datname) (pg:db:blks_access_1m)

      # read/write time usage
      - record: pg:db:read_time_usage
        expr: rate(pg_db_blk_read_time[1m])
      - record: pg:db:write_time_usage
        expr: rate(pg_db_blk_write_time[1m])
      - record: pg:db:io_time_usage
        expr: pg:db:read_time_usage + pg:db:write_time_usage



      #==============================================================#
      #                  Traffic IO (Pgbouncer)                      #
      #==============================================================#
      # transmit bandwidth (sent, out)
      - record: pg:db:tx
        expr: irate(pgbouncer_stat_total_sent{datname!="pgbouncer"}[1m])
      - record: pg:ins:tx
        expr: sum without (user, datname) (pg:db:tx)
      - record: pg:svc:tx
        expr: sum by (cls, role) (pg:ins:tx)
      - record: pg:cls:tx
        expr: sum by (cls) (pg:ins:tx)
      - record: pg:all:tx
        expr: sum(pg:cls:tx)

      # receive bandwidth (sent, out)
      - record: pg:db:rx
        expr: irate(pgbouncer_stat_total_received{datname!="pgbouncer"}[1m])
      - record: pg:ins:rx
        expr: sum without (datname) (pg:db:rx)
      - record: pg:svc:rx
        expr: sum by (cls, role) (pg:ins:rx)
      - record: pg:cls:rx
        expr: sum by (cls) (pg:ins:rx)
      - record: pg:all:rx
        expr: sum(pg:cls:rx)



      #==============================================================#
      #                          Lock                                #
      #==============================================================#
      # lock count by mode
      - record: pg:db:locks
        expr: pg_lock_count
      - record: pg:ins:locks
        expr: sum without(datname) (pg:db:locks)
      - record: pg:svc:locks
        expr: sum by (cls, role, mode) (pg:ins:locks)
      - record: pg:cls:locks
        expr: sum by (cls, mode) (pg:ins:locks)

      # total lock count
      - record: pg:db:lock_count
        expr: sum without (mode) (pg_lock_count{})
      - record: pg:ins:lock_count
        expr: sum without(datname) (pg:db:lock_count)
      - record: pg:svc:lock_count
        expr: sum by (cls, role) (pg:ins:lock_count)
      - record: pg:cls:lock_count
        expr: sum by (cls) (pg:ins:lock_count)

      # read category lock
      - record: pg:db:rlock
        expr: sum without (mode) (pg_lock_count{mode="AccessShareLock"})
      - record: pg:ins:rlock
        expr: sum without(datname) (pg:db:rlock)
      - record: pg:svc:rlock
        expr: sum by (cls, role) (pg:ins:rlock)
      - record: pg:cls:rlock
        expr: sum by (cls) (pg:ins:rlock)

      # write category lock (insert|update|delete)
      - record: pg:db:wlock
        expr: sum without (mode) (pg_lock_count{mode=~"RowShareLock|RowExclusiveLock"})
      - record: pg:ins:wlock
        expr: sum without(datname) (pg:db:wlock)
      - record: pg:svc:wlock
        expr: sum by (cls, role) (pg:ins:wlock)
      - record: pg:cls:wlock
        expr: sum by (cls) (pg:ins:wlock)

      # exclusive category lock
      - record: pg:db:xlock
        expr: sum without (mode) (pg_lock_count{mode=~"AccessExclusiveLock|ExclusiveLock|ShareRowExclusiveLock|ShareLock|ShareUpdateExclusiveLock"})
      - record: pg:ins:xlock
        expr: sum without(datname) (pg:db:xlock)
      - record: pg:svc:xlock
        expr: sum by (cls, role) (pg:ins:xlock)
      - record: pg:cls:xlock
        expr: sum by (cls) (pg:ins:xlock)


      #==============================================================#
      #                          Temp                                #
      #==============================================================#
      # temp files and bytes
      - record: pg:db:temp_bytes
        expr: rate(pg_db_temp_bytes{}[1m])
      - record: pg:ins:temp_bytes
        expr: sum without(datname) (pg:db:temp_bytes)
      - record: pg:svc:temp_bytes
        expr: sum by (cls, role) (pg:ins:temp_bytes)
      - record: pg:cls:temp_bytes
        expr: sum by (cls) (pg:ins:temp_bytes)

      # temp file count in last 1m
      - record: pg:db:temp_files
        expr: increase(pg_db_temp_files{}[1m])
      - record: pg:ins:temp_files
        expr: sum without(datname) (pg:db:temp_files)
      - record: pg:svc:temp_files
        expr: sum by (cls, role) (pg:ins:temp_files)
      - record: pg:cls:temp_files
        expr: sum by (cls) (pg:ins:temp_files)



      #==============================================================#
      #                           Size                               #
      #==============================================================#
      # database size
      - record: pg:ins:db_size
        expr: pg_size_database
      - record: pg:cls:db_size
        expr: sum by (cls) (pg:ins:db_size)
      # wal size
      - record: pg:ins:wal_size
        expr: pg_size_wal
      - record: pg:cls:wal_size
        expr: sum by (cls) (pg:ins:wal_size)
      # log size
      - record: pg:ins:log_size
        expr: pg_size_log
      - record: pg:cls:log_size
        expr: sum by (cls) (pg_size_log)



      #==============================================================#
      #                        Checkpoint                            #
      #==============================================================#
      # checkpoint stats
      - record: pg:ins:last_ckpt
        expr: pg_checkpoint_elapse
      - record: pg:ins:ckpt_timed
        expr: increase(pg_bgwriter_checkpoints_timed{}[30s])
      - record: pg:ins:ckpt_req
        expr: increase(pg_bgwriter_checkpoints_req{}[30s])
      - record: pg:cls:ckpt_1h
        expr: increase(pg:ins:ckpt_timed[1h]) + increase(pg:ins:ckpt_req[1h])

      # buffer flush & alloc
      - record: pg:ins:buf_flush_backend
        expr: irate(pg_bgwriter_buffers_backend{}[1m]) * 8192
      - record: pg:ins:buf_flush_checkpoint
        expr: irate(pg_bgwriter_buffers_checkpoint{}[1m]) * 8192

      - record: pg:ins:buf_flush
        expr: pg:ins:buf_flush_backend + pg:ins:buf_flush_checkpoint
      - record: pg:svc:buf_flush
        expr: sum by (cls, role) (pg:ins:buf_flush)
      - record: pg:cls:buf_flush
        expr: sum by (cls) (pg:ins:buf_flush)
      - record: pg:all:buf_flush
        expr: sum(pg:cls:buf_flush)

      - record: pg:ins:buf_alloc
        expr: irate(pg_bgwriter_buffers_alloc{}[1m]) * 8192
      - record: pg:svc:buf_alloc
        expr: sum by (cls, role) (pg:ins:buf_alloc)
      - record: pg:cls:buf_alloc
        expr: sum by (cls) (pg:ins:buf_alloc)
      - record: pg:all:buf_alloc
        expr: sum(pg:cls:buf_alloc)




      #==============================================================#
      #                           LSN                                #
      #==============================================================#
      # timeline & LSN
      - record: pg_timeline
        expr: pg_checkpoint_tli
      - record: pg:ins:redo_lsn
        expr: pg_checkpoint_redo_lsn
      - record: pg:ins:checkpoint_lsn
        expr: pg_checkpoint_checkpoint_lsn

      # wal rate
      - record: pg:ins:wal_rate
        expr: rate(pg_lsn[1m])
      - record: pg:cls:wal_rate
        expr: max by (cls) (pg:ins:wal_rate{role="primary"})
      - record: pg:all:wal_rate
        expr: sum(pg:cls:wal_rate)



      #==============================================================#
      #                       Replication                            #
      #==============================================================#
      # lag time from replica's view
      - record: pg:ins:lag_seconds
        expr: pg_lag
      - record: pg:cls:lag_seconds
        expr: max by (cls) (pg:ins:lag_seconds)
      - record: pg:all:lag_seconds
        expr: max(pg:cls:lag_seconds)

      # sync status
      - record: pg:ins:sync_status # application_name must set to replica ins name
        expr: max by (ins, svc, cls) (label_replace(pg_replication_sync_status, "ins", "$1", "application_name", "(.+)"))

      # lag of self (application_name must set to standby ins name)
      - record: pg:ins:lag_bytes
        expr: max by (ins, svc, cls, role) (label_replace(pg_replication_lsn{} - pg_replication_replay_lsn{}, "ins", "$1", "application_name", "(.+)"))
      - record: pg:cls:lag_bytes
        expr: max by (cls) (pg:ins:lag_bytes)
      - record: pg:all:lag_bytes
        expr: max(pg:cls:lag_bytes)

      # replication slot retained bytes
      - record: pg:ins:slot_retained_bytes
        expr: pg_slot_retained_bytes

      # replica walreceiver
      - record: pg:ins:recv_init_lsn
        expr: pg_walreceiver_init_lsn
      - record: pg:ins:recv_last_lsn
        expr: pg_walreceiver_last_lsn
      - record: pg:ins:recv_init_tli
        expr: pg_walreceiver_init_tli
      - record: pg:ins:recv_last_tli
        expr: pg_walreceiver_last_tli




      #==============================================================#
      # Cluster Level Metrics
      #==============================================================#
      # cluster member count
      - record: pg:cls:leader
        expr: count by (cls, ins) (max by (cls, ins) (pg_status{}) == 3)
      - record: pg:cls:size
        expr: count by (cls) (max by (cls, ins) (pg_up{}))
      - record: pg:cls:timeline
        expr: max by (cls) (pg_checkpoint_tli{})
      - record: pg:cls:primarys
        expr: count by (cls) (max by (cls, ins) (pg_in_recovery{}) == 0)
      - record: pg:cls:replicas
        expr: count by (cls) (max by (cls, ins) (pg_in_recovery{}) == 1)
      - record: pg:cls:synchronous
        expr: max by (cls) (pg_sync_standby_enabled) > bool 0
      - record: pg:cls:bridging_instances
        expr: count by (cls, role, ins, ip) (pg_replication_lsn{state="streaming", role!="primary"} > 0)
      - record: pg:cls:bridging
        expr: count by (cls) (pg:cls:bridging_instances)
      - record: pg:cls:cascading
        expr: count by (cls) (pg_replication_lsn{state="streaming", role!="primary"})





      #==============================================================#
      #                    Pgbouncer List                            #
      #==============================================================#
      # object list
      - record: pg:ins:pools
        expr: pgbouncer_list_items{list="pools"}
      - record: pg:ins:pool_databases
        expr: pgbouncer_list_items{list="databases"}
      - record: pg:ins:pool_users
        expr: pgbouncer_list_items{list="users"}
      - record: pg:ins:login_clients
        expr: pgbouncer_list_items{list="login_clients"}
      - record: pg:ins:free_clients
        expr: pgbouncer_list_items{list="free_clients"}
      - record: pg:ins:used_clients
        expr: pgbouncer_list_items{list="used_clients"}
      - record: pg:ins:free_servers
        expr: pgbouncer_list_items{list="free_servers"}



      #==============================================================#
      #                  DBConfig (Pgbouncer)                        #
      #==============================================================#
      - record: pg:db:pool_max_conn
        expr: pgbouncer_database_pool_size{datname!="pgbouncer"} + pgbouncer_database_reserve_pool{datname!="pgbouncer"}
      - record: pg:db:pool_size
        expr: pgbouncer_database_pool_size{datname!="pgbouncer"}
      - record: pg:db:pool_reserve_size
        expr: pgbouncer_database_reserve_pool{datname!="pgbouncer"}
      - record: pg:db:pool_current_conn
        expr: pgbouncer_database_current_connections{datname!="pgbouncer"}
      - record: pg:db:pool_paused
        expr: pgbouncer_database_paused{datname!="pgbouncer"}
      - record: pg:db:pool_disabled
        expr: pgbouncer_database_disabled{datname!="pgbouncer"}



      #==============================================================#
      #                  Waiting (Pgbouncer)                         #
      #==============================================================#
      # average wait time
      - record: pg:db:wait_rt
        expr: pgbouncer_stat_avg_wait_time{datname!="pgbouncer"} / 1000000

      # max wait time among all clients
      - record: pg:pool:maxwait
        expr: pgbouncer_pool_maxwait{datname!="pgbouncer"} + pgbouncer_pool_maxwait_us{datname!="pgbouncer"} / 1000000
      - record: pg:db:maxwait
        expr: max without(user) (pg:pool:maxwait)
      - record: pg:ins:maxwait
        expr: max without(user, datname) (pg:db:maxwait)
      - record: pg:svc:maxwait
        expr: max by (cls, role) (pg:ins:maxwait)
      - record: pg:cls:maxwait
        expr: max by (cls) (pg:ins:maxwait)
      - record: pg:all:maxwait
        expr: max(pg:cls:maxwait)

...

5 - 报警规则

Pigsty报警规则定义

Prometheus报警规则

机器节点报警规则

################################################################
#                          Node Alert                          #
################################################################
- name: node-alert
  rules:

    # node exporter down for 1m triggers a P1 alert
    - alert: NODE_EXPORTER_DOWN
      expr: up{instance=~"^.*:(9100)$"} == 0
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Exporter Down: {{ $labels.ins }} {{ $value }}"
        description: |
          up[instance={{ $labels.instance }}] = {{ $value }} == 0
          https://dba.p1staff.com/d/node?var-ip={{ $labels.instance }}&from=now-5m&to=now&refresh=10s          



    #==============================================================#
    #                          CPU & Load                          #
    #==============================================================#
    # node avg CPU usage > 90% for 1m
    - alert: NODE_CPU_HIGH
      expr: node:ins:cpu_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node CPU High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:cpu_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=28&fullscreen&var-ip={{ $labels.ip }}          

    # node load5 > 100%
    - alert: NODE_LOAD_HIGH
      expr: node:ins:stdload5 > 1
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node Load High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:stdload5[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 100%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=37&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                      Disk & Filesystem                       #
    #==============================================================#
    # main fs readonly triggers an immediate P0 alert
    - alert: NODE_FS_READONLY
      expr: node_filesystem_readonly{fstype!~"(n|root|tmp)fs.*"} == 1
      labels:
        severity: P0
      annotations:
        summary: "P0 Node Filesystem Readonly: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node_filesystem_readonly{ins={{ $labels.ins }}, ip={{ $labels.ip }},fstype!~"(n|root|tmp)fs.*"} == 1
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=110&fullscreen&var-ip={{ $labels.ip }}          

    # main fs usage > 90% for 1m triggers P1 alert
    - alert: NODE_FS_SPACE_FULL
      expr: node:fs:space_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Filesystem Space Full: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:fs:space_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=110&fullscreen&var-ip={{ $labels.ip }}          

    # main fs inode usage > 90% for 1m triggers P1 alert
    - alert: NODE_FS_INODE_FULL
      expr: node:fs:inode_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Filesystem iNode Full: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:fs:inode_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=110&fullscreen&var-ip={{ $labels.ip }}          

    # fd usage > 90% for 1m triggers P1 alert
    - alert: NODE_FD_FULL
      expr: node:fs:fd_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node File Descriptor Full: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:fs:fd_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=58&fullscreen&var-ip={{ $labels.ip }}          


    # ssd read latency > 32ms for 3m (except long-read)
    - alert: NODE_READ_LATENCY_HIGH
      expr: node:dev:disk_read_rt  < 10000 and node:dev:disk_read_rt  > 0.032
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node Read Latency High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:dev:disk_read_rt[ins={{ $labels.ins }}, ip={{ $labels.ip }}, device={{ $labels.device }}] = {{ $value }} > 32ms
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=29&fullscreen&var-ip={{ $labels.ip }}          

    # ssd write latency > 16ms for 3m
    - alert: NODE_WRITE_LATENCY_HIGH
      expr: node:dev:disk_write_rt  < 10000 and node:dev:disk_write_rt  > 0.016
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node Write Latency High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:dev:disk_write_rt[ins={{ $labels.ins }}, ip={{ $labels.ip }}, device={{ $labels.device }}] = {{ $value }} > 16ms
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=29&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                           Memory                             #
    #==============================================================#
    # shared memory usage > 80% for 1m triggers a P1 alert
    - alert: NODE_MEM_HIGH
      expr: node:ins:mem_usage > 0.80
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Mem High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:mem_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 80%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=40&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                      Network & TCP                           #
    #==============================================================#
    # node tcp listen overflow > 2 for 3m
    - alert: NODE_TCP_LISTEN_OVERFLOW
      expr: node:ins:tcp_overflow_rate > 2
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node TCP Listen Overflow: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:tcp_overflow_rate[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 2
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=55&fullscreen&var-ip={{ $labels.ip }}          

    # node tcp retrans > 32 per sec for 3m
    - alert: NODE_TCP_RETRANS_HIGH
      expr: node:ins:tcp_retranssegs > 32
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node TCP Retrans High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:tcp_retranssegs[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 32
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=52&fullscreen&var-ip={{ $labels.ip }}          

    # node tcp conn > 32768 for 1m
    - alert: NODE_TCP_CONN_HIGH
      expr: node_netstat_Tcp_CurrEstab > 32768
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node TCP Connection High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node_netstat_Tcp_CurrEstab[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 32768
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=54&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                          Misc                                #
    #==============================================================#
    # node ntp offset > 1s for 1m
    - alert: NODE_NTP_OFFSET_HIGH
      expr: abs(node_ntp_offset_seconds) > 1
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node NTP Offset High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node_ntp_offset_seconds[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 32768
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=70&fullscreen&var-ip={{ $labels.ip }}          


数据库与连接池报警规则

---
################################################################
#                         PgSQL Alert                          #
################################################################
- name: pgsql-alert
  rules:

    #==============================================================#
    #                     Error / Aliveness                        #
    #==============================================================#
    # cluster size change triggers a P0 alert (warn: auto heal in 5min)
    - alert: PGSQL_CLUSTER_SHRINK
      expr: delta(pg:cls:size{}[5m]) < 0
      for: 15s
      labels:
        severity: P1
      annotations:
        summary: 'delta(pg:cls:size{cls={{ $labels.cls }}}[15s]) = {{ $value | printf "%.0f" }} < 0'
        description: |
                    http://g.pigsty/d/pg-cluster&from=now-10m&to=now&var-cls={{ $labels.cls }}


    # postgres down for 15s triggers a P0 alert
    - alert: PGSQL_DOWN
      expr: PGSQL_up{} == 0
      labels:
        severity: P0
      annotations:
        summary: "[P0] PGSQL_DOWN: {{ $labels.ins }} {{ $value }}"
        description: |
          PGSQL_up[ins={{ $labels.ins }}] = {{ $value }} == 0
          http://g.pigsty/d/pg-instance&from=now-10m&to=now&var-ins={{ $labels.ins }}          

    # pgbouncer down for 15s triggers a P0 alert
    - alert: PGBOUNCER_DOWN
      expr: pgbouncer_up{} == 0
      labels:
        severity: P0
      annotations:
        summary: "P0 Pgbouncer Down: {{ $labels.ins }} {{ $value }}"
        description: |
          pgbouncer_up[ins={{ $labels.ins }}] = {{ $value }} == 0
          http://g.pigsty/d/pg-pgbouncer&from=now-10m&to=now&var-ins={{ $labels.ins }}          

    # pg/pgbouncer exporter down for 1m triggers a P1 alert
    - alert: PGSQL_EXPORTER_DOWN
      expr: up{instance=~"^.*:(9630|9631)$"} == 0
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG/PGB Exporter Down: {{ $labels.ins }} {{ $labels.instance }} {{ $value }}"
        description: |
          up[instance={{ $labels.instance }}] = {{ $value }} == 0
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=262&fullscreen&var-ins={{ $labels.ins }}          



    #==============================================================#
    #                         Latency                              #
    #==============================================================#
    # replication break for 1m triggers a P1 alert (warn: heal in 5m)
    - alert: PGSQL_REPLICATION_BREAK
      expr: delta(PGSQL_downstream_count{state="streaming"}[5m]) < 0
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Replication Break: {{ $labels.ins }} {{ $value }}"
        description: |
          PGSQL_downstream_count_delta[ins={{ $labels.ins }}] = {{ $value }} < 0
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=180&fullscreen&var-ins={{ $labels.ins }}          

    # replication lag greater than 8 second for 3m triggers a P1 alert
    - alert: PGSQL_REPLICATION_LAG
      expr: PGSQL_replication_replay_lag{application_name!='PGSQL_receivewal'} > 8
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Replication Lagged: {{ $labels.ins }} {{ $value }}"
        description: |
          PGSQL_replication_replay_lag[ins={{ $labels.ins }}] = {{ $value }} > 8s
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=384&fullscreen&var-ins={{ $labels.ins }}          

    # pg avg response time > 16ms
    - alert: PGSQL_QUERY_RT_HIGH
      expr: pg:ins:query_rt > 0.016
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Query Response Time High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:query_rt[ins={{ $labels.ins }}] = {{ $value }} > 16ms
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=137&fullscreen&var-ins={{ $labels.ins }}          


    #==============================================================#
    #                        Saturation                            #
    #==============================================================#
    # pg load1 high than 70% for 3m triggers a P1 alert
    - alert: PGSQL_LOAD_HIGH
      expr: pg:ins:load1{} > 0.70
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Load High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:load1[ins={{ $labels.ins }}] = {{ $value }} > 70%
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=210&fullscreen&var-ins={{ $labels.ins }}          

    # pg active backend more than 2 times of available cpu cores for 3m triggers a P1 alert
    - alert: PGSQL_BACKEND_HIGH
      expr: pg:ins:active_backends / on(ins) node:ins:cpu_count > 2
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Backend High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:active_backends/node:ins:cpu_count[ins={{ $labels.ins }}] = {{ $value }} > 2
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=150&fullscreen&var-ins={{ $labels.ins }}          

    # max idle xact duration exceed 3m
    - alert: PGSQL_IDLE_XACT_BACKEND_HIGH
      expr: pg:ins:ixact_backends > 1
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P1 PG Idle In Transaction Backend High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:ixact_backends[ins={{ $labels.ins }}] = {{ $value }} > 1
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=161&fullscreen&var-ins={{ $labels.ins }}          


    # 2 waiting clients for 3m triggers a P1 alert
    - alert: PGSQL_CLIENT_QUEUING
      expr: pg:ins:waiting_clients > 2
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Client Queuing: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:waiting_clients[ins={{ $labels.ins }}] = {{ $value }} > 2
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=159&fullscreen&var-ins={{ $labels.ins }}          

    # age wrap around (near half) triggers a P1 alert
    - alert: PGSQL_AGE_HIGH
      expr: pg:ins:age > 1000000000
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Age High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:age[ins={{ $labels.ins }}] = {{ $value }} > 1000000000
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=172&fullscreen&var-ins={{ $labels.ins }}          



    #==============================================================#
    #                         Traffic                              #
    #==============================================================#
    # more than 30k TPS lasts for 3m triggers a P1 (pgbouncer bottleneck)
    - alert: PGSQL_TPS_HIGH
      expr: pg:ins:xacts > 30000
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 Postgres TPS High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:xacts[ins={{ $labels.ins }}] = {{ $value }} > 30000
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=125&fullscreen&var-ins={{ $labels.ins }}          

...

6 - 标准输出

完成沙箱环境初始化剧本所执行的具体步骤与输出结果

在本地拉起沙箱时所执行的Makefile快捷命令,以及其输出结果。

命令概览

# 下载本项目代码
cd /tmp && git clone git@github.com:Vonng/pigsty.git && cd pigsty

make up         # 拉起vagrant虚拟机
make ssh        # 配置虚拟机ssh访问      【单次,下次启动无需再次执行】
sudo make dns   # 写入Pigsty静态DNS域名  【sudo输入密码,可选,单次】
make download   # 下载最新离线软件包      【可选,可显著加速初始化】
make upload     # 将离线软件包上传至元节点
make init       # 初始化Pigsty
make mon-view   # 打开Pigsty监控首页(默认用户密码:admin:admin)

clone

克隆并进入项目目录,后续操作均位于项目根目录中(以/tmp/pigsty为例)

cd /tmp && git clone git@github.com:Vonng/pigsty.git && cd pigsty

clean

清理所有的沙箱痕迹(如果有)

$ make clean
cd vagrant && vagrant destroy -f --parallel; exit 0
==> vagrant: A new version of Vagrant is available: 2.2.14 (installed version: 2.2.13)!
==> vagrant: To upgrade visit: https://www.vagrantup.com/downloads.html

==> node-3: Forcing shutdown of VM...
==> node-3: Destroying VM and associated drives...
==> node-2: Forcing shutdown of VM...
==> node-2: Destroying VM and associated drives...
==> node-1: Forcing shutdown of VM...
==> node-1: Destroying VM and associated drives...
==> meta: Forcing shutdown of VM...
==> meta: Destroying VM and associated drives...

up

执行make up将调用vagrant up命令,根据Vagrantfile中的定义,使用Virtualbox创建四台虚拟机。

请注意第一次执行vagrant up时,软件会自动从官网下载 CentOS/7 的虚拟机镜像。如果您的网络状况不佳(例如没有FQ代理),则可能需要等待相当长的一段时间。您也可以选择自己创建虚拟机,并根据 部署 一章的说明进行Pigsty部署(不建议)。

$ make up
cd vagrant && vagrant up
Bringing machine 'meta' up with 'virtualbox' provider...
Bringing machine 'node-1' up with 'virtualbox' provider...
Bringing machine 'node-2' up with 'virtualbox' provider...
Bringing machine 'node-3' up with 'virtualbox' provider...
==> meta: Cloning VM...
==> meta: Matching MAC address for NAT networking...
==> meta: Setting the name of the VM: vagrant_meta_1614587906789_29514
==> meta: Clearing any previously set network interfaces...
==> meta: Preparing network interfaces based on configuration...
    meta: Adapter 1: nat
    meta: Adapter 2: hostonly
==> meta: Forwarding ports...
    meta: 22 (guest) => 2222 (host) (adapter 1)
==> meta: Running 'pre-boot' VM customizations...
==> meta: Booting VM...
==> meta: Waiting for machine to boot. This may take a few minutes...
    meta: SSH address: 127.0.0.1:2222
    meta: SSH username: vagrant
    meta: SSH auth method: private key
==> meta: Machine booted and ready!
==> meta: Checking for guest additions in VM...
    meta: No guest additions were detected on the base box for this VM! Guest
    meta: additions are required for forwarded ports, shared folders, host only
    meta: networking, and more. If SSH fails on this machine, please install
    meta: the guest additions and repackage the box to continue.
    meta:
    meta: This is not an error message; everything may continue to work properly,
    meta: in which case you may ignore this message.
==> meta: Setting hostname...
==> meta: Configuring and enabling network interfaces...
==> meta: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> meta: Running provisioner: shell...
    meta: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-1jv6obp.sh
    meta: [INFO] write ssh config to /home/vagrant/.ssh
==> node-1: Cloning VM...
==> node-1: Matching MAC address for NAT networking...
==> node-1: Setting the name of the VM: vagrant_node-1_1614587930603_84690
==> node-1: Fixed port collision for 22 => 2222. Now on port 2200.
==> node-1: Clearing any previously set network interfaces...
==> node-1: Preparing network interfaces based on configuration...
    node-1: Adapter 1: nat
    node-1: Adapter 2: hostonly
==> node-1: Forwarding ports...
    node-1: 22 (guest) => 2200 (host) (adapter 1)
==> node-1: Running 'pre-boot' VM customizations...
==> node-1: Booting VM...
==> node-1: Waiting for machine to boot. This may take a few minutes...
    node-1: SSH address: 127.0.0.1:2200
    node-1: SSH username: vagrant
    node-1: SSH auth method: private key
==> node-1: Machine booted and ready!
==> node-1: Checking for guest additions in VM...
    node-1: No guest additions were detected on the base box for this VM! Guest
    node-1: additions are required for forwarded ports, shared folders, host only
    node-1: networking, and more. If SSH fails on this machine, please install
    node-1: the guest additions and repackage the box to continue.
    node-1:
    node-1: This is not an error message; everything may continue to work properly,
    node-1: in which case you may ignore this message.
==> node-1: Setting hostname...
==> node-1: Configuring and enabling network interfaces...
==> node-1: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> node-1: Running provisioner: shell...
    node-1: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-5w83e1.sh
    node-1: [INFO] write ssh config to /home/vagrant/.ssh
==> node-2: Cloning VM...
==> node-2: Matching MAC address for NAT networking...
==> node-2: Setting the name of the VM: vagrant_node-2_1614587953786_32441
==> node-2: Fixed port collision for 22 => 2222. Now on port 2201.
==> node-2: Clearing any previously set network interfaces...
==> node-2: Preparing network interfaces based on configuration...
    node-2: Adapter 1: nat
    node-2: Adapter 2: hostonly
==> node-2: Forwarding ports...
    node-2: 22 (guest) => 2201 (host) (adapter 1)
==> node-2: Running 'pre-boot' VM customizations...
==> node-2: Booting VM...
==> node-2: Waiting for machine to boot. This may take a few minutes...
    node-2: SSH address: 127.0.0.1:2201
    node-2: SSH username: vagrant
    node-2: SSH auth method: private key
==> node-2: Machine booted and ready!
==> node-2: Checking for guest additions in VM...
    node-2: No guest additions were detected on the base box for this VM! Guest
    node-2: additions are required for forwarded ports, shared folders, host only
    node-2: networking, and more. If SSH fails on this machine, please install
    node-2: the guest additions and repackage the box to continue.
    node-2:
    node-2: This is not an error message; everything may continue to work properly,
    node-2: in which case you may ignore this message.
==> node-2: Setting hostname...
==> node-2: Configuring and enabling network interfaces...
==> node-2: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> node-2: Running provisioner: shell...
    node-2: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-1xljcde.sh
    node-2: [INFO] write ssh config to /home/vagrant/.ssh
==> node-3: Cloning VM...
==> node-3: Matching MAC address for NAT networking...
==> node-3: Setting the name of the VM: vagrant_node-3_1614587977533_52921
==> node-3: Fixed port collision for 22 => 2222. Now on port 2202.
==> node-3: Clearing any previously set network interfaces...
==> node-3: Preparing network interfaces based on configuration...
    node-3: Adapter 1: nat
    node-3: Adapter 2: hostonly
==> node-3: Forwarding ports...
    node-3: 22 (guest) => 2202 (host) (adapter 1)
==> node-3: Running 'pre-boot' VM customizations...
==> node-3: Booting VM...
==> node-3: Waiting for machine to boot. This may take a few minutes...
    node-3: SSH address: 127.0.0.1:2202
    node-3: SSH username: vagrant
    node-3: SSH auth method: private key
==> node-3: Machine booted and ready!
==> node-3: Checking for guest additions in VM...
    node-3: No guest additions were detected on the base box for this VM! Guest
    node-3: additions are required for forwarded ports, shared folders, host only
    node-3: networking, and more. If SSH fails on this machine, please install
    node-3: the guest additions and repackage the box to continue.
    node-3:
    node-3: This is not an error message; everything may continue to work properly,
    node-3: in which case you may ignore this message.
==> node-3: Setting hostname...
==> node-3: Configuring and enabling network interfaces...
==> node-3: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> node-3: Running provisioner: shell...
    node-3: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-1cykx8o.sh
    node-3: [INFO] write ssh config to /home/vagrant/.ssh

ssh

新拉起的虚拟机默认用户为vagrant,需要配置本机到虚拟机的免密ssh访问。 执行make ssh命令将调用vagrant的ssh-config命令,将pigsty虚拟机节点的ssh配置文件写入~/.ssh/pigsty_config

通常该命令只需要在首次启动沙箱时执行一次,后续重新拉起的虚拟机通常会保有相同的SSH配置。

执行完毕后,用户才可以使用类似ssh node-1的方式通过SSH别名连接至沙箱内的虚拟机节点。

$ make ssh
cd vagrant && vagrant ssh-config > ~/.ssh/pigsty_config 2>/dev/null; true
if ! grep --quiet "pigsty_config" ~/.ssh/config ; then (echo 'Include ~/.ssh/pigsty_config' && cat ~/.ssh/config) >  ~/.ssh/config.tmp; mv ~/.ssh/config.tmp ~/.ssh/config && chmod 0600 ~/.ssh/config; fi
if ! grep --quiet "StrictHostKeyChecking=no" ~/.ssh/config ; then (echo 'StrictHostKeyChecking=no' && cat ~/.ssh/config) >  ~/.ssh/config.tmp; mv ~/.ssh/config.tmp ~/.ssh/config && chmod 0600 ~/.ssh/config; fi

dns

此命令将Pigsty沙箱虚拟机的静态DNS配置写入/etc/hosts,通常该命令只需要在首次启动沙箱时执行一次。

执行完毕后,用户才可以从本地浏览器使用域名访问 http://g.pigsty 等WebUI。

注意DNS命令需要SUDO权限执行,需要输入密码,因为/etc/hosts文件需要特权方可修改。

$ sudo make dns
Password: #<在此输入用户密码>
if ! grep --quiet "pigsty dns records" /etc/hosts ; then cat files/dns >> /etc/hosts; fi

download

从CDN下载最新的Pigsty离线安装包至本地,大小约1GB,约1分钟下载完成。

$ make download
curl http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg.tgz -o files/pkg.tgz
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1067M  100 1067M    0     0  15.2M      0  0:01:10  0:01:10 --:--:-- 29.0M

Pigsty是一个复杂的软件系统,为了确保系统的稳定,Pigsty会在初始化过程中从互联网下载所有依赖的软件包并建立本地Yum源。

所有依赖的软件总大小约1GB左右,下载速度取决于您的网络情况。尽管Pigsty已经尽量使用镜像源以加速下载,但少量包的下载仍可能受到防火墙的阻挠,可能出现非常慢的情况。您可以通过proxy_env配置项设置下载代理以完成首次下载,或直接下载预先打包好的离线安装包。

最新的离线安装包地址为:

Github Release:https://github.com/Vonng/pigsty/releases

CDN Download:http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg.tgz

您也可以手工下载好后放置于files/pkg.tgz

upload

将下载的离线安装包上传元节点并解压,加速后续初始化。

$ make upload
ssh -t meta "sudo rm -rf /tmp/pkg.tgz"
Connection to 127.0.0.1 closed.
scp -r files/pkg.tgz meta:/tmp/pkg.tgz
pkg.tgz                                                                                                                                                                 100% 1068MB  53.4MB/s   00:19
ssh -t meta "sudo mkdir -p /www/pigsty/; sudo rm -rf /www/pigsty/*; sudo tar -xf /tmp/pkg.tgz --strip-component=1 -C /www/pigsty/"
Connection to 127.0.0.1 closed.

init

完成上述操作后,执行make init即会调用ansible完成Pigsty系统的初始化。

$ make init
./sandbox.yml   # 快速初始化,并行初始化元节点与普通数据库节点

sandbox.yml是专门为本地沙箱环境准备的初始化剧本,通过同时初始化元节点和数据库节点节省了一半时间。 生产环境建议使用infra.ymlpgsql.yml分别依次完成元节点与普通节点的初始化。

如果您已经将离线安装包上传至元节点,那么初始化环境会比较快,视机器配置可能总共需要5~10分钟不等。

若离线安装包不存在,那么Pigsty会在初始化过程中从互联网下载约1GB数据,视网络条件可能需要20分钟或更久。

$ make init
./sandbox.yml                       # interleave sandbox provisioning
[WARNING]: Invalid characters were found in group names but not replaced, use -vvvv to see details

PLAY [Init local repo] ***********************************************************************************************************************************************************************************

TASK [repo : Create local repo directory] ****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Backup & remove existing repos] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Add required upstream repos] ****************************************************************************************************************************************************************
[WARNING]: Using a variable for a task's 'args' is unsafe in some situations (see https://docs.ansible.com/ansible/devel/reference_appendices/faq.html#argsplat-unsafe)
changed: [10.10.10.10] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
changed: [10.10.10.10] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
changed: [10.10.10.10] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
changed: [10.10.10.10] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
changed: [10.10.10.10] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
changed: [10.10.10.10] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
changed: [10.10.10.10] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
changed: [10.10.10.10] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
changed: [10.10.10.10] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})

TASK [repo : Check repo pkgs cache exists] ***************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Set fact whether repo_exists] ***************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Move upstream repo to backup] ***************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Add local file system repos] ****************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Remake yum cache if not exists] *************************************************************************************************************************************************************
[WARNING]: Consider using the yum module rather than running 'yum'.  If you need to use command because yum is insufficient you can add 'warn: false' to this command task or set
'command_warnings=False' in ansible.cfg to get rid of this message.
changed: [10.10.10.10]

TASK [repo : Install repo bootstrap packages] ************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=['yum-utils', 'createrepo', 'ansible', 'nginx', 'wget'])

TASK [repo : Render repo nginx server files] *************************************************************************************************************************************************************
changed: [10.10.10.10] => (item={'src': 'index.html.j2', 'dest': '/www/index.html'})
changed: [10.10.10.10] => (item={'src': 'default.conf.j2', 'dest': '/etc/nginx/conf.d/default.conf'})
changed: [10.10.10.10] => (item={'src': 'local.repo.j2', 'dest': '/www/pigsty.repo'})
changed: [10.10.10.10] => (item={'src': 'nginx.conf.j2', 'dest': '/etc/nginx/nginx.conf'})

TASK [repo : Disable selinux for repo server] ************************************************************************************************************************************************************
[WARNING]: SELinux state temporarily changed from 'enforcing' to 'permissive'. State change will take effect next reboot.
changed: [10.10.10.10]

TASK [repo : Launch repo nginx server] *******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Waits repo server online] *******************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Download web url packages] ******************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=https://github.com/Vonng/pg_exporter/releases/download/v0.3.2/pg_exporter-0.3.2-1.el7.x86_64.rpm)
skipping: [10.10.10.10] => (item=https://github.com/cybertec-postgresql/vip-manager/releases/download/v0.6/vip-manager_0.6-1_amd64.rpm)
skipping: [10.10.10.10] => (item=http://guichaz.free.fr/polysh/files/polysh-0.4-1.noarch.rpm)

TASK [repo : Download repo packages] *********************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=epel-release nginx wget yum-utils yum createrepo)
skipping: [10.10.10.10] => (item=ntp chrony uuid lz4 nc pv jq vim-enhanced make patch bash lsof wget unzip git tuned)
skipping: [10.10.10.10] => (item=readline zlib openssl libyaml libxml2 libxslt perl-ExtUtils-Embed ca-certificates)
skipping: [10.10.10.10] => (item=numactl grubby sysstat dstat iotop bind-utils net-tools tcpdump socat ipvsadm telnet)
skipping: [10.10.10.10] => (item=grafana prometheus2 pushgateway alertmanager)
skipping: [10.10.10.10] => (item=node_exporter postgres_exporter nginx_exporter blackbox_exporter)
skipping: [10.10.10.10] => (item=consul consul_exporter consul-template etcd)
skipping: [10.10.10.10] => (item=ansible python python-pip python-psycopg2 audit)
skipping: [10.10.10.10] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.10] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.10] => (item=haproxy keepalived dnsmasq)
skipping: [10.10.10.10] => (item=patroni patroni-consul patroni-etcd pgbouncer pg_cli pgbadger pg_activity)
skipping: [10.10.10.10] => (item=pgcenter boxinfo check_postgres emaj pgbconsole pg_bloat_check pgquarrel)
skipping: [10.10.10.10] => (item=barman barman-cli pgloader pgFormatter pitrery pspg pgxnclient PyGreSQL pgadmin4 tail_n_mail)
skipping: [10.10.10.10] => (item=postgresql13* postgis31* citus_13 timescaledb_13)
skipping: [10.10.10.10] => (item=pg_repack13 pg_squeeze13)
skipping: [10.10.10.10] => (item=pg_qualstats13 pg_stat_kcache13 system_stats_13 bgw_replstatus13)
skipping: [10.10.10.10] => (item=plr13 plsh13 plpgsql_check_13 plproxy13 plr13 plsh13 plpgsql_check_13 pldebugger13)
skipping: [10.10.10.10] => (item=hdfs_fdw_13 mongo_fdw13 mysql_fdw_13 ogr_fdw13 redis_fdw_13 pgbouncer_fdw13)
skipping: [10.10.10.10] => (item=wal2json13 count_distinct13 ddlx_13 geoip13 orafce13)
skipping: [10.10.10.10] => (item=rum_13 hypopg_13 ip4r13 jsquery_13 logerrors_13 periods_13 pg_auto_failover_13 pg_catcheck13)
skipping: [10.10.10.10] => (item=pg_fkpart13 pg_jobmon13 pg_partman13 pg_prioritize_13 pg_track_settings13 pgaudit15_13)
skipping: [10.10.10.10] => (item=pgcryptokey13 pgexportdoc13 pgimportdoc13 pgmemcache-13 pgmp13 pgq-13)
skipping: [10.10.10.10] => (item=pguint13 pguri13 prefix13  safeupdate_13 semver13  table_version13 tdigest13)

TASK [repo : Download repo pkg deps] *********************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=epel-release nginx wget yum-utils yum createrepo)
skipping: [10.10.10.10] => (item=ntp chrony uuid lz4 nc pv jq vim-enhanced make patch bash lsof wget unzip git tuned)
skipping: [10.10.10.10] => (item=readline zlib openssl libyaml libxml2 libxslt perl-ExtUtils-Embed ca-certificates)
skipping: [10.10.10.10] => (item=numactl grubby sysstat dstat iotop bind-utils net-tools tcpdump socat ipvsadm telnet)
skipping: [10.10.10.10] => (item=grafana prometheus2 pushgateway alertmanager)
skipping: [10.10.10.10] => (item=node_exporter postgres_exporter nginx_exporter blackbox_exporter)
skipping: [10.10.10.10] => (item=consul consul_exporter consul-template etcd)
skipping: [10.10.10.10] => (item=ansible python python-pip python-psycopg2 audit)
skipping: [10.10.10.10] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.10] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.10] => (item=haproxy keepalived dnsmasq)
skipping: [10.10.10.10] => (item=patroni patroni-consul patroni-etcd pgbouncer pg_cli pgbadger pg_activity)
skipping: [10.10.10.10] => (item=pgcenter boxinfo check_postgres emaj pgbconsole pg_bloat_check pgquarrel)
skipping: [10.10.10.10] => (item=barman barman-cli pgloader pgFormatter pitrery pspg pgxnclient PyGreSQL pgadmin4 tail_n_mail)
skipping: [10.10.10.10] => (item=postgresql13* postgis31* citus_13 timescaledb_13)
skipping: [10.10.10.10] => (item=pg_repack13 pg_squeeze13)
skipping: [10.10.10.10] => (item=pg_qualstats13 pg_stat_kcache13 system_stats_13 bgw_replstatus13)
skipping: [10.10.10.10] => (item=plr13 plsh13 plpgsql_check_13 plproxy13 plr13 plsh13 plpgsql_check_13 pldebugger13)
skipping: [10.10.10.10] => (item=hdfs_fdw_13 mongo_fdw13 mysql_fdw_13 ogr_fdw13 redis_fdw_13 pgbouncer_fdw13)
skipping: [10.10.10.10] => (item=wal2json13 count_distinct13 ddlx_13 geoip13 orafce13)
skipping: [10.10.10.10] => (item=rum_13 hypopg_13 ip4r13 jsquery_13 logerrors_13 periods_13 pg_auto_failover_13 pg_catcheck13)
skipping: [10.10.10.10] => (item=pg_fkpart13 pg_jobmon13 pg_partman13 pg_prioritize_13 pg_track_settings13 pgaudit15_13)
skipping: [10.10.10.10] => (item=pgcryptokey13 pgexportdoc13 pgimportdoc13 pgmemcache-13 pgmp13 pgq-13)
skipping: [10.10.10.10] => (item=pguint13 pguri13 prefix13  safeupdate_13 semver13  table_version13 tdigest13)

TASK [repo : Create local repo index] ********************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [repo : Copy bootstrap scripts] *********************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [repo : Mark repo cache as valid] *******************************************************************************************************************************************************************
skipping: [10.10.10.10]

PLAY [Provision Node] ************************************************************************************************************************************************************************************

TASK [node : Update node hostname] ***********************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Add new hostname to /etc/hosts] *************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Write static dns records] *******************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=10.10.10.10 yum.pigsty)
changed: [10.10.10.11] => (item=10.10.10.10 yum.pigsty)
changed: [10.10.10.13] => (item=10.10.10.10 yum.pigsty)
changed: [10.10.10.12] => (item=10.10.10.10 yum.pigsty)

TASK [node : Get old nameservers] ************************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Truncate resolv file] ***********************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Write resolv options] ***********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.12] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.10] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.13] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.11] => (item=domain service.consul)
changed: [10.10.10.12] => (item=domain service.consul)
changed: [10.10.10.13] => (item=domain service.consul)
changed: [10.10.10.10] => (item=domain service.consul)

TASK [node : Add new nameservers] ************************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=10.10.10.10)
changed: [10.10.10.12] => (item=10.10.10.10)
changed: [10.10.10.10] => (item=10.10.10.10)
changed: [10.10.10.13] => (item=10.10.10.10)

TASK [node : Append old nameservers] *********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=10.0.2.3)
changed: [10.10.10.12] => (item=10.0.2.3)
changed: [10.10.10.10] => (item=10.0.2.3)
changed: [10.10.10.13] => (item=10.0.2.3)

TASK [node : Node configure disable firewall] ************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [node : Node disable selinux by default] ************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
[WARNING]: SELinux state change will take effect next reboot
ok: [10.10.10.10]

TASK [node : Backup existing repos] **********************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Install upstream repo] **********************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.12] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.13] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.13] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.13] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.13] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.13] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.10] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.10] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.11] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.10] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.11] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.12] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.10] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.11] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.12] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.10] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.11] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.12] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.10] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.11] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.12] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.13] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.10] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.11] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.12] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.10] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.11] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.12] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.11] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})
skipping: [10.10.10.12] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.12] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})

TASK [node : Install local repo] *************************************************************************************************************************************************************************
changed: [10.10.10.13] => (item=http://yum.pigsty/pigsty.repo)
changed: [10.10.10.12] => (item=http://yum.pigsty/pigsty.repo)
changed: [10.10.10.11] => (item=http://yum.pigsty/pigsty.repo)
changed: [10.10.10.10] => (item=http://yum.pigsty/pigsty.repo)

TASK [node : Install node basic packages] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=[])
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])

TASK [node : Install node extra packages] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=[])
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])

TASK [node : Install meta specific packages] *************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=[])
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])

TASK [node : Install node basic packages] ****************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])
changed: [10.10.10.13] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])
changed: [10.10.10.11] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])
changed: [10.10.10.12] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])

TASK [node : Install node extra packages] ****************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])
changed: [10.10.10.12] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])
changed: [10.10.10.13] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])
changed: [10.10.10.10] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])

TASK [node : Install meta specific packages] *************************************************************************************************************************************************************
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])
changed: [10.10.10.10] => (item=['grafana,prometheus2,alertmanager,nginx_exporter,blackbox_exporter,pushgateway', 'dnsmasq,nginx,ansible,pgbadger,polysh'])

TASK [node : Node configure disable numa] ****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Node configure disable swap] ****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Node configure unmount swap] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=swap)
skipping: [10.10.10.10] => (item=none)
skipping: [10.10.10.11] => (item=swap)
skipping: [10.10.10.11] => (item=none)
skipping: [10.10.10.12] => (item=swap)
skipping: [10.10.10.12] => (item=none)
skipping: [10.10.10.13] => (item=swap)
skipping: [10.10.10.13] => (item=none)

TASK [node : Node setup static network] ******************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Node configure disable firewall] ************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [node : Node configure disk prefetch] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Enable linux kernel modules] ****************************************************************************************************************************************************************
changed: [10.10.10.13] => (item=softdog)
changed: [10.10.10.12] => (item=softdog)
changed: [10.10.10.11] => (item=softdog)
changed: [10.10.10.10] => (item=softdog)
changed: [10.10.10.13] => (item=br_netfilter)
changed: [10.10.10.12] => (item=br_netfilter)
changed: [10.10.10.11] => (item=br_netfilter)
changed: [10.10.10.10] => (item=br_netfilter)
changed: [10.10.10.12] => (item=ip_vs)
changed: [10.10.10.13] => (item=ip_vs)
changed: [10.10.10.11] => (item=ip_vs)
changed: [10.10.10.10] => (item=ip_vs)
changed: [10.10.10.13] => (item=ip_vs_rr)
changed: [10.10.10.12] => (item=ip_vs_rr)
changed: [10.10.10.11] => (item=ip_vs_rr)
changed: [10.10.10.10] => (item=ip_vs_rr)
ok: [10.10.10.13] => (item=ip_vs_rr)
ok: [10.10.10.12] => (item=ip_vs_rr)
ok: [10.10.10.11] => (item=ip_vs_rr)
ok: [10.10.10.10] => (item=ip_vs_rr)
changed: [10.10.10.13] => (item=ip_vs_wrr)
changed: [10.10.10.12] => (item=ip_vs_wrr)
changed: [10.10.10.11] => (item=ip_vs_wrr)
changed: [10.10.10.10] => (item=ip_vs_wrr)
changed: [10.10.10.13] => (item=ip_vs_sh)
changed: [10.10.10.12] => (item=ip_vs_sh)
changed: [10.10.10.11] => (item=ip_vs_sh)
changed: [10.10.10.10] => (item=ip_vs_sh)
changed: [10.10.10.13] => (item=nf_conntrack_ipv4)
changed: [10.10.10.12] => (item=nf_conntrack_ipv4)
changed: [10.10.10.11] => (item=nf_conntrack_ipv4)
changed: [10.10.10.10] => (item=nf_conntrack_ipv4)

TASK [node : Enable kernel module on reboot] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.10]

TASK [node : Get config parameter page count] ************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Get config parameter page size] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Tune shmmax and shmall via mem] *************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Create tuned profiles] **********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=oltp)
changed: [10.10.10.12] => (item=oltp)
changed: [10.10.10.10] => (item=oltp)
changed: [10.10.10.13] => (item=oltp)
changed: [10.10.10.11] => (item=olap)
changed: [10.10.10.12] => (item=olap)
changed: [10.10.10.13] => (item=olap)
changed: [10.10.10.10] => (item=olap)
changed: [10.10.10.11] => (item=crit)
changed: [10.10.10.12] => (item=crit)
changed: [10.10.10.13] => (item=crit)
changed: [10.10.10.10] => (item=crit)
changed: [10.10.10.11] => (item=tiny)
changed: [10.10.10.12] => (item=tiny)
changed: [10.10.10.13] => (item=tiny)
changed: [10.10.10.10] => (item=tiny)

TASK [node : Render tuned profiles] **********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=oltp)
changed: [10.10.10.12] => (item=oltp)
changed: [10.10.10.13] => (item=oltp)
changed: [10.10.10.10] => (item=oltp)
changed: [10.10.10.12] => (item=olap)
changed: [10.10.10.11] => (item=olap)
changed: [10.10.10.13] => (item=olap)
changed: [10.10.10.10] => (item=olap)
changed: [10.10.10.12] => (item=crit)
changed: [10.10.10.11] => (item=crit)
changed: [10.10.10.13] => (item=crit)
changed: [10.10.10.10] => (item=crit)
changed: [10.10.10.11] => (item=tiny)
changed: [10.10.10.12] => (item=tiny)
changed: [10.10.10.13] => (item=tiny)
changed: [10.10.10.10] => (item=tiny)

TASK [node : Active tuned profile] ***********************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [node : Change additional sysctl params] ************************************************************************************************************************************************************
changed: [10.10.10.13] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})
changed: [10.10.10.12] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})
changed: [10.10.10.11] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})
changed: [10.10.10.10] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})

TASK [node : Copy default user bash profile] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Setup node default pam ulimits] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Create os user group admin] *****************************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Create os user admin] ***********************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]

TASK [node : Grant admin group nopass sudo] **************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Add no host checking to ssh config] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Add admin ssh no host checking] *************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [node : Fetch all admin public keys] ****************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [node : Exchange all admin ssh keys] ****************************************************************************************************************************************************************
changed: [10.10.10.10 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.10'])
changed: [10.10.10.13 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.10'])
changed: [10.10.10.12 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.10'])
changed: [10.10.10.11 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.10'])
changed: [10.10.10.10 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.11'])
changed: [10.10.10.12 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.11'])
changed: [10.10.10.13 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.11'])
changed: [10.10.10.11 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.11'])
changed: [10.10.10.10 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.12'])
changed: [10.10.10.13 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.12'])
changed: [10.10.10.12 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.12'])
changed: [10.10.10.11 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.12'])
changed: [10.10.10.10 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.13'])
changed: [10.10.10.13 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.13'])
changed: [10.10.10.11 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.13'])
changed: [10.10.10.12 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.13'])

TASK [node : Install public keys] ************************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)
changed: [10.10.10.10] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)
changed: [10.10.10.12] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)
changed: [10.10.10.13] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)

TASK [node : Install ntp package] ************************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.10]
ok: [10.10.10.13]

TASK [node : Install chrony package] *********************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]
ok: [10.10.10.10]

TASK [node : Setup default node timezone] ****************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]

TASK [node : Copy the ntp.conf file] *********************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Copy the chrony.conf template] **************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Launch ntpd service] ************************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Launch chronyd service] *********************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

PLAY [Init meta service] *********************************************************************************************************************************************************************************

TASK [ca : Create local ca directory] ********************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [ca : Copy ca cert from local files] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=ca.key)
skipping: [10.10.10.10] => (item=ca.crt)

TASK [ca : Check ca key cert exists] *********************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [ca : Create self-signed CA key-cert] ***************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Make sure dnsmasq package installed] **************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nameserver : Copy dnsmasq /etc/dnsmasq.d/config] ***************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Add dynamic dns records to meta] ******************************************************************************************************************************************************
changed: [10.10.10.10] => (item=10.10.10.2  pg-meta)
changed: [10.10.10.10] => (item=10.10.10.3  pg-test)
changed: [10.10.10.10] => (item=10.10.10.10 meta-1)
changed: [10.10.10.10] => (item=10.10.10.11 node-1)
changed: [10.10.10.10] => (item=10.10.10.12 node-2)
changed: [10.10.10.10] => (item=10.10.10.13 node-3)
changed: [10.10.10.10] => (item=10.10.10.10 pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 y.pigsty yum.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 c.pigsty consul.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 g.pigsty grafana.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 p.pigsty prometheus.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 a.pigsty alertmanager.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 n.pigsty ntp.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 h.pigsty haproxy.pigsty)

TASK [nameserver : Launch meta dnsmasq service] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Wait for meta dnsmasq online] *********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nameserver : Register consul dnsmasq service] ******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Reload consul] ************************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Make sure nginx installed] *****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Create local html directory] ***************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Create nginx config directory] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Update default nginx index page] ***********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Copy nginx default config] *****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Copy nginx upstream conf] ******************************************************************************************************************************************************************
changed: [10.10.10.10] => (item={'name': 'home', 'host': 'pigsty', 'url': '127.0.0.1:3000'})
changed: [10.10.10.10] => (item={'name': 'consul', 'host': 'c.pigsty', 'url': '127.0.0.1:8500'})
changed: [10.10.10.10] => (item={'name': 'grafana', 'host': 'g.pigsty', 'url': '127.0.0.1:3000'})
changed: [10.10.10.10] => (item={'name': 'prometheus', 'host': 'p.pigsty', 'url': '127.0.0.1:9090'})
changed: [10.10.10.10] => (item={'name': 'alertmanager', 'host': 'a.pigsty', 'url': '127.0.0.1:9093'})
changed: [10.10.10.10] => (item={'name': 'haproxy', 'host': 'h.pigsty', 'url': '127.0.0.1:9091'})

TASK [nginx : Templating /etc/nginx/haproxy.conf] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Render haproxy upstream in cluster mode] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [nginx : Render haproxy location in cluster mode] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [nginx : Templating haproxy cluster index] **********************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [nginx : Templating haproxy cluster index] **********************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
ok: [10.10.10.10] => (item=pg-test)

TASK [nginx : Restart meta nginx service] ****************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Wait for nginx service online] *************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Make sure nginx exporter installed] ********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Config nginx_exporter options] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Restart nginx_exporter service] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Wait for nginx exporter online] ************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Register cosnul nginx service] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Register consul nginx-exporter service] ****************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Reload consul] *****************************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Install prometheus and alertmanager] **************************************************************************************************************************************************
ok: [10.10.10.10] => (item=prometheus2)
ok: [10.10.10.10] => (item=alertmanager)

TASK [prometheus : Wipe out prometheus config dir] *******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Wipe out existing prometheus data] ****************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [prometheus : Create postgres directory structure] **************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/prometheus)
changed: [10.10.10.10] => (item=/etc/prometheus/bin)
changed: [10.10.10.10] => (item=/etc/prometheus/rules)
changed: [10.10.10.10] => (item=/etc/prometheus/targets)
changed: [10.10.10.10] => (item=/export/prometheus/data)

TASK [prometheus : Copy prometheus bin scripts] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy prometheus rules scripts] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy altermanager config] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Render prometheus config] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Config /etc/prometheus opts] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Launch prometheus service] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Launch alertmanager service] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Wait for prometheus online] ***********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [prometheus : Wait for alertmanager online] *********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [prometheus : Render prometheus targets in cluster mode] ********************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [prometheus : Reload prometheus service] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy prometheus service definition] ***************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy alertmanager service definition] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Reload consul to register prometheus] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Make sure grafana is installed] **********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [grafana : Check grafana plugin cache exists] *******************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [grafana : Provision grafana plugins via cache] *****************************************************************************************************************************************************
[WARNING]: Consider using the file module with state=absent rather than running 'rm'.  If you need to use command because file is insufficient you can add 'warn: false' to this command task or set
'command_warnings=False' in ansible.cfg to get rid of this message.
changed: [10.10.10.10]

TASK [grafana : Download grafana plugins from web] *******************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=redis-datasource)
skipping: [10.10.10.10] => (item=simpod-json-datasource)
skipping: [10.10.10.10] => (item=fifemon-graphql-datasource)
skipping: [10.10.10.10] => (item=sbueringer-consul-datasource)
skipping: [10.10.10.10] => (item=camptocamp-prometheus-alertmanager-datasource)
skipping: [10.10.10.10] => (item=ryantxu-ajax-panel)
skipping: [10.10.10.10] => (item=marcusolsson-hourly-heatmap-panel)
skipping: [10.10.10.10] => (item=michaeldmoore-multistat-panel)
skipping: [10.10.10.10] => (item=marcusolsson-treemap-panel)
skipping: [10.10.10.10] => (item=pr0ps-trackmap-panel)
skipping: [10.10.10.10] => (item=dalvany-image-panel)
skipping: [10.10.10.10] => (item=magnesium-wordcloud-panel)
skipping: [10.10.10.10] => (item=cloudspout-button-panel)
skipping: [10.10.10.10] => (item=speakyourcode-button-panel)
skipping: [10.10.10.10] => (item=jdbranham-diagram-panel)
skipping: [10.10.10.10] => (item=grafana-piechart-panel)
skipping: [10.10.10.10] => (item=snuids-radar-panel)
skipping: [10.10.10.10] => (item=digrich-bubblechart-panel)

TASK [grafana : Download grafana plugins from web] *******************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=https://github.com/Vonng/grafana-echarts)

TASK [grafana : Create grafana plugins cache] ************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [grafana : Copy /etc/grafana/grafana.ini] ***********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Remove grafana provision dir] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Copy provisioning content] ***************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Copy pigsty dashboards] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Copy pigsty icon image] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Replace grafana icon with pigsty] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Launch grafana service] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Wait for grafana online] *****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [grafana : Update grafana default preferences] ******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Register consul grafana service] *********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Reload consul] ***************************************************************************************************************************************************************************
changed: [10.10.10.10]

PLAY [Init dcs] ******************************************************************************************************************************************************************************************

TASK [consul : Check for existing consul] ****************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Consul exists flag fact set] **************************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [consul : Abort due to consul exists] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Clean existing consul instance] ***********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Stop any running consul instance] *********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Remove existing consul dir] ***************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/consul.d)
changed: [10.10.10.11] => (item=/etc/consul.d)
changed: [10.10.10.12] => (item=/etc/consul.d)
changed: [10.10.10.13] => (item=/etc/consul.d)
changed: [10.10.10.10] => (item=/var/lib/consul)
changed: [10.10.10.11] => (item=/var/lib/consul)
changed: [10.10.10.12] => (item=/var/lib/consul)
changed: [10.10.10.13] => (item=/var/lib/consul)

TASK [consul : Recreate consul dir] **********************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/consul.d)
changed: [10.10.10.11] => (item=/etc/consul.d)
changed: [10.10.10.12] => (item=/etc/consul.d)
changed: [10.10.10.13] => (item=/etc/consul.d)
changed: [10.10.10.10] => (item=/var/lib/consul)
changed: [10.10.10.11] => (item=/var/lib/consul)
changed: [10.10.10.13] => (item=/var/lib/consul)
changed: [10.10.10.12] => (item=/var/lib/consul)

TASK [consul : Make sure consul is installed] ************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [consul : Make sure consul dir exists] **************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Get dcs server node names] ****************************************************************************************************************************************************************
ok: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Get dcs node name from var] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Get dcs node name from var] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [consul : Fetch hostname as dcs node name] **********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Get dcs name from hostname] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Copy /etc/consul.d/consul.json] ***********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Copy consul agent service] ****************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [consul : Get dcs bootstrap expect quroum] **********************************************************************************************************************************************************
ok: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Copy consul server service unit] **********************************************************************************************************************************************************
changed: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Launch consul server service] *************************************************************************************************************************************************************
changed: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Wait for consul server online] ************************************************************************************************************************************************************
ok: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Launch consul agent service] **************************************************************************************************************************************************************
skipping: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Wait for consul agent online] *************************************************************************************************************************************************************
skipping: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

PLAY [Init database cluster] *****************************************************************************************************************************************************************************

TASK [postgres : Create os group postgres] ***************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Make sure dcs group exists] *************************************************************************************************************************************************************
ok: [10.10.10.10] => (item=consul)
ok: [10.10.10.11] => (item=consul)
ok: [10.10.10.12] => (item=consul)
ok: [10.10.10.13] => (item=consul)
ok: [10.10.10.11] => (item=etcd)
ok: [10.10.10.10] => (item=etcd)
ok: [10.10.10.12] => (item=etcd)
ok: [10.10.10.13] => (item=etcd)

TASK [postgres : Create dbsu postgres] *******************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Grant dbsu nopass sudo] *****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Grant dbsu all sudo] ********************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Grant dbsu limited sudo] ****************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Config patroni watchdog support] ********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Add dbsu ssh no host checking] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Fetch dbsu public keys] *****************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Exchange dbsu ssh keys] *****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.10'])
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.11'])
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.12'])
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.13'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.10'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.11'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.12'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.13'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.10'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.11'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.12'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.13'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.10'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.11'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.12'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.13'])

TASK [postgres : Install offical pgdg yum repo] **********************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=postgresql${pg_version}*)
skipping: [10.10.10.10] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.10] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.11] => (item=postgresql${pg_version}*)
skipping: [10.10.10.10] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.11] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.10] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.11] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.12] => (item=postgresql${pg_version}*)
skipping: [10.10.10.10] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.11] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.12] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.11] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.12] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.13] => (item=postgresql${pg_version}*)
skipping: [10.10.10.11] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.12] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.13] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.12] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.13] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.12] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.13] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.13] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.13] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)

TASK [postgres : Install pg packages] ********************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])
changed: [10.10.10.11] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])
changed: [10.10.10.13] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])
changed: [10.10.10.12] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])

TASK [postgres : Install pg extensions] ******************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])
changed: [10.10.10.10] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])
changed: [10.10.10.13] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])
changed: [10.10.10.12] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])

TASK [postgres : Link /usr/pgsql to current version] *****************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Add pg bin dir to profile path] *********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Fix directory ownership] ****************************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Remove default postgres service] ********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Check necessary variables exists] *******************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.11] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.12] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.13] => {
    "changed": false,
    "msg": "All assertions passed"
}

TASK [postgres : Fetch variables via pg_cluster] *********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Set cluster basic facts for hosts] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Assert cluster primary singleton] *******************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.11] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.12] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.13] => {
    "changed": false,
    "msg": "All assertions passed"
}

TASK [postgres : Setup cluster primary ip address] *******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Setup repl upstream for primary] ********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Setup repl upstream for replicas] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Debug print instance summary] ***********************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "msg": "cluster=pg-meta service=pg-meta-primary instance=pg-meta-1 replication=[primary:itself]->10.10.10.10"
}
ok: [10.10.10.11] => {
    "msg": "cluster=pg-test service=pg-test-primary instance=pg-test-1 replication=[primary:itself]->10.10.10.11"
}
ok: [10.10.10.12] => {
    "msg": "cluster=pg-test service=pg-test-replica instance=pg-test-2 replication=[primary:itself]->10.10.10.12"
}
ok: [10.10.10.13] => {
    "msg": "cluster=pg-test service=pg-test-offline instance=pg-test-3 replication=[primary:itself]->10.10.10.13"
}

TASK [postgres : Check for existing postgres instance] ***************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Set fact whether pg port is open] *******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Abort due to existing postgres instance] ************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Clean existing postgres instance] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Shutdown existing postgres service] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Remove registerd consul service] ********************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.10]

TASK [postgres : Remove postgres metadata in consul] *****************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.10]

TASK [postgres : Remove existing postgres data] **********************************************************************************************************************************************************
ok: [10.10.10.10] => (item=/pg)
ok: [10.10.10.11] => (item=/pg)
ok: [10.10.10.12] => (item=/pg)
ok: [10.10.10.13] => (item=/pg)
ok: [10.10.10.10] => (item=/export/postgres)
ok: [10.10.10.11] => (item=/export/postgres)
ok: [10.10.10.12] => (item=/export/postgres)
ok: [10.10.10.13] => (item=/export/postgres)
ok: [10.10.10.10] => (item=/var/backups/postgres)
ok: [10.10.10.11] => (item=/var/backups/postgres)
ok: [10.10.10.12] => (item=/var/backups/postgres)
ok: [10.10.10.13] => (item=/var/backups/postgres)
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)

TASK [postgres : Make sure main and backup dir exists] ***************************************************************************************************************************************************
changed: [10.10.10.11] => (item=/export)
changed: [10.10.10.12] => (item=/export)
changed: [10.10.10.13] => (item=/export)
changed: [10.10.10.10] => (item=/export)
changed: [10.10.10.11] => (item=/var/backups)
changed: [10.10.10.12] => (item=/var/backups)
changed: [10.10.10.13] => (item=/var/backups)
changed: [10.10.10.10] => (item=/var/backups)

TASK [postgres : Create postgres directory structure] ****************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/export/postgres)
changed: [10.10.10.11] => (item=/export/postgres)
changed: [10.10.10.12] => (item=/export/postgres)
changed: [10.10.10.13] => (item=/export/postgres)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/bin)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/bin)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/bin)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/bin)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/log)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/log)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/log)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/log)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/tmp)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/tmp)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/tmp)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/tmp)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/conf)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/conf)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/conf)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/conf)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/data)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/data)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/data)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/data)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/meta)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/meta)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/meta)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/meta)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/stat)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/stat)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/stat)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/stat)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/change)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/change)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/change)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/change)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/postgres)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/postgres)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/postgres)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/postgres)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/arcwal)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/arcwal)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/arcwal)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/arcwal)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/backup)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/backup)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/backup)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/backup)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/remote)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/remote)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/remote)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/remote)

TASK [postgres : Create pgbouncer directory structure] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)

TASK [postgres : Create links from pgbkup to pgroot] *****************************************************************************************************************************************************
changed: [10.10.10.10] => (item=arcwal)
changed: [10.10.10.11] => (item=arcwal)
changed: [10.10.10.12] => (item=arcwal)
changed: [10.10.10.13] => (item=arcwal)
changed: [10.10.10.10] => (item=backup)
changed: [10.10.10.11] => (item=backup)
changed: [10.10.10.12] => (item=backup)
changed: [10.10.10.13] => (item=backup)
changed: [10.10.10.10] => (item=remote)
changed: [10.10.10.11] => (item=remote)
changed: [10.10.10.12] => (item=remote)
changed: [10.10.10.13] => (item=remote)

TASK [postgres : Create links from current cluster] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Copy pg_cluster to /pg/meta/cluster] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_version to /pg/meta/version] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_instance to /pg/meta/instance] **************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_seq to /pg/meta/sequence] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_role to /pg/meta/role] **********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy postgres scripts to /pg/bin/] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Copy alias profile to /etc/profile.d] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Copy psqlrc to postgres home] ***********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Setup hostname to pg instance name] *****************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Copy consul node-meta definition] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Restart consul to load new node-meta] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Config patroni watchdog support] ********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Get config parameter page count] ********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Get config parameter page size] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Tune shared buffer and work mem] ********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Hanlde small size mem occasion] *********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Calculate postgres mem params] **********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : create patroni config dir] **************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : use predefined patroni template] ********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Render default /pg/conf/patroni.yml] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Link /pg/conf/patroni to /pg/bin/] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Link /pg/bin/patroni.yml to /etc/patroni/] **********************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Config patroni watchdog support] ********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Copy patroni systemd service file] ******************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : create patroni systemd drop-in dir] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Copy postgres systemd service file] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Drop-In consul dependency for patroni] **************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Render default initdb scripts] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Launch patroni on primary instance] *****************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Wait for patroni primary online] ********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
ok: [10.10.10.10]
ok: [10.10.10.11]

TASK [postgres : Wait for postgres primary online] *******************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
ok: [10.10.10.10]
ok: [10.10.10.11]

TASK [postgres : Check primary postgres service ready] ***************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
[WARNING]: Module remote_tmp /var/lib/pgsql/.ansible/tmp did not exist and was created with a mode of 0700, this may cause issues when running as another user. To avoid this, create the remote_tmp dir
with the correct permissions manually
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Check replication connectivity to primary] **********************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Render init roles sql] ******************************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Render init template sql] ***************************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Render default pg-init scripts] *********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.10]

TASK [postgres : Execute initialization scripts] *********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Check primary instance ready] ***********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Add dbsu password to pgpass if exists] **************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Add system user to pgpass] **************************************************************************************************************************************************************
changed: [10.10.10.10] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.11] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.12] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.13] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.11] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.10] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.13] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.12] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.13] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})
changed: [10.10.10.12] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})
changed: [10.10.10.10] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})
changed: [10.10.10.11] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})

TASK [postgres : Check replication connectivity to primary] **********************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Launch patroni on replica instances] ****************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Wait for patroni replica online] ********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Wait for postgres replica online] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Check replica postgres service ready] ***************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Render hba rules] ***********************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [postgres : Reload hba rules] ***********************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Pause patroni] **************************************************************************************************************************************************************************
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Stop patroni on replica instance] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Stop patroni on primary instance] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Launch raw postgres on primary] *********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Launch raw postgres on primary] *********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Wait for postgres online] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Check pgbouncer is installed] ***********************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [postgres : Stop existing pgbouncer service] ********************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Remove existing pgbouncer dirs] *********************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)

TASK [postgres : Recreate dirs with owner postgres] ******************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)

TASK [postgres : Copy /etc/pgbouncer/pgbouncer.ini] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Copy /etc/pgbouncer/pgb_hba.conf] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Touch userlist and database list] *******************************************************************************************************************************************************
changed: [10.10.10.10] => (item=database.txt)
changed: [10.10.10.11] => (item=database.txt)
changed: [10.10.10.12] => (item=database.txt)
changed: [10.10.10.13] => (item=database.txt)
changed: [10.10.10.10] => (item=userlist.txt)
changed: [10.10.10.11] => (item=userlist.txt)
changed: [10.10.10.12] => (item=userlist.txt)
changed: [10.10.10.13] => (item=userlist.txt)

TASK [postgres : Add default users to pgbouncer] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Copy pgbouncer systemd service] *********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Launch pgbouncer pool service] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Wait for pgbouncer service online] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Check pgbouncer service is ready] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : include_tasks] **************************************************************************************************************************************************************************
included: /private/tmp/pigsty/roles/postgres/tasks/createuser.yml for 10.10.10.10 => (item={'name': 'dbuser_meta', 'password': 'DBUser.Meta', 'login': True, 'superuser': False, 'createdb': False, 'createrole': False, 'inherit': True, 'replication': False, 'bypassrls': False, 'connlimit': -1, 'expire_at': '2030-12-31', 'expire_in': 365, 'roles': ['dbrole_readwrite'], 'pgbouncer': True, 'parameters': {'search_path': 'public'}, 'comment': 'test user'})
included: /private/tmp/pigsty/roles/postgres/tasks/createuser.yml for 10.10.10.10 => (item={'name': 'dbuser_vonng2', 'password': 'DBUser.Vonng', 'roles': ['dbrole_offline'], 'expire_in': 365, 'pgbouncer': False, 'comment': 'example personal user for interactive queries'})
included: /private/tmp/pigsty/roles/postgres/tasks/createuser.yml for 10.10.10.11, 10.10.10.12, 10.10.10.13 => (item={'name': 'test', 'password': 'test', 'roles': ['dbrole_readwrite'], 'pgbouncer': True, 'comment': 'default test user for production usage'})

TASK [postgres : Render user dbuser_meta creation sql] ***************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute user dbuser_meta creation sql on primary] ***************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Add user to pgbouncer] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Render user dbuser_vonng2 creation sql] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute user dbuser_vonng2 creation sql on primary] *************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Add user to pgbouncer] ******************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [postgres : Render user test creation sql] **********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Execute user test creation sql on primary] **********************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Add user to pgbouncer] ******************************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : include_tasks] **************************************************************************************************************************************************************************
included: /private/tmp/pigsty/roles/postgres/tasks/createdb.yml for 10.10.10.10 => (item={'name': 'meta', 'allowconn': True, 'revokeconn': False, 'connlimit': -1, 'extensions': [{'name': 'postgis', 'schema': 'public'}], 'parameters': {'enable_partitionwise_join': True}, 'pgbouncer': True, 'comment': 'pigsty meta database'})
included: /private/tmp/pigsty/roles/postgres/tasks/createdb.yml for 10.10.10.11, 10.10.10.12, 10.10.10.13 => (item={'name': 'test'})

TASK [postgres : debug] **********************************************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "msg": {
        "allowconn": true,
        "comment": "pigsty meta database",
        "connlimit": -1,
        "extensions": [
            {
                "name": "postgis",
                "schema": "public"
            }
        ],
        "name": "meta",
        "parameters": {
            "enable_partitionwise_join": true
        },
        "pgbouncer": true,
        "revokeconn": false
    }
}

TASK [postgres : Render database meta creation sql] ******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Render database meta baseline sql] ******************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [postgres : Execute database meta creation command] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute database meta creation sql] *****************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute database meta creation sql] *****************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [postgres : Add pgbouncer busniess database] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : debug] **********************************************************************************************************************************************************************************
ok: [10.10.10.11] => {
    "msg": {
        "name": "test"
    }
}
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Render database test creation sql] ******************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Render database test baseline sql] ******************************************************************************************************************************************************
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Execute database test creation command] *************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Execute database test creation sql] *****************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Execute database test creation sql] *****************************************************************************************************************************************************
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Add pgbouncer busniess database] ********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Reload pgbouncer to add db and users] ***************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Copy pg service definition to consul] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=postgres)
changed: [10.10.10.11] => (item=postgres)
changed: [10.10.10.12] => (item=postgres)
changed: [10.10.10.13] => (item=postgres)
changed: [10.10.10.10] => (item=pgbouncer)
changed: [10.10.10.11] => (item=pgbouncer)
changed: [10.10.10.12] => (item=pgbouncer)
changed: [10.10.10.13] => (item=pgbouncer)
changed: [10.10.10.10] => (item=patroni)
changed: [10.10.10.11] => (item=patroni)
changed: [10.10.10.12] => (item=patroni)
changed: [10.10.10.13] => (item=patroni)

TASK [postgres : Reload postgres consul service] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Render grafana datasource definition] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Register datasource to grafana] *********************************************************************************************************************************************************
[WARNING]: Consider using the get_url or uri module rather than running 'curl'.  If you need to use command because get_url or uri is insufficient you can add 'warn: false' to this command task or set
'command_warnings=False' in ansible.cfg to get rid of this message.
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [monitor : Install exporter yum repo] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [monitor : Install node_exporter and pg_exporter] ***************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=node_exporter)
skipping: [10.10.10.10] => (item=pg_exporter)
skipping: [10.10.10.11] => (item=node_exporter)
skipping: [10.10.10.11] => (item=pg_exporter)
skipping: [10.10.10.12] => (item=node_exporter)
skipping: [10.10.10.12] => (item=pg_exporter)
skipping: [10.10.10.13] => (item=node_exporter)
skipping: [10.10.10.13] => (item=pg_exporter)

TASK [monitor : Copy node_exporter binary] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [monitor : Copy pg_exporter binary] *****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [monitor : Create /etc/pg_exporter conf dir] ********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Copy default pg_exporter.yaml] ***********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [monitor : Config /etc/default/pg_exporter] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Config pg_exporter service unit] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [monitor : Launch pg_exporter systemd service] ******************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [monitor : Wait for pg_exporter service online] *****************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.11]
ok: [10.10.10.13]

TASK [monitor : Register pg-exporter consul service] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Reload pg-exporter consul service] *******************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [monitor : Config pgbouncer_exporter opts] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Config pgbouncer_exporter service] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Launch pgbouncer_exporter service] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]

TASK [monitor : Wait for pgbouncer_exporter online] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [monitor : Register pgb-exporter consul service] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]

TASK [monitor : Reload pgb-exporter consul service] ******************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [monitor : Copy node_exporter systemd service] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [monitor : Config default node_exporter options] ****************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [monitor : Launch node_exporter service unit] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [monitor : Wait for node_exporter online] ***********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [monitor : Register node-exporter service to consul] ************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [monitor : Reload node-exporter consul service] *****************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [service : Make sure haproxy is installed] **********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [service : Create haproxy directory] ****************************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]
ok: [10.10.10.11]

TASK [service : Copy haproxy systemd service file] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [service : Fetch postgres cluster memberships] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [service : Templating /etc/haproxy/haproxy.cfg] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]

TASK [service : Launch haproxy load balancer service] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [service : Wait for haproxy load balancer online] ***************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.11]
ok: [10.10.10.13]

TASK [service : Reload haproxy load balancer service] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [service : Copy haproxy exporter definition] ********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [service : Copy haproxy service definition] *********************************************************************************************************************************************************
changed: [10.10.10.12] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.10] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.11] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.13] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.10] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.12] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.13] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.11] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.10] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.12] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.11] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.13] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.10] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
changed: [10.10.10.12] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
changed: [10.10.10.11] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
changed: [10.10.10.13] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})

TASK [service : Reload haproxy consul service] ***********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [service : Make sure vip-manager is installed] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.13]
ok: [10.10.10.11]
ok: [10.10.10.12]

TASK [service : Copy vip-manager systemd service file] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [service : create vip-manager systemd drop-in dir] **************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [service : create vip-manager systemd drop-in file] *************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [service : Templating /etc/default/vip-manager.yml] *************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [service : Launch vip-manager] **********************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [service : Fetch postgres cluster memberships] ******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [service : Render L4 VIP configs] *******************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.10] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.10] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.11] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.10] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
skipping: [10.10.10.11] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.11] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.12] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.11] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
skipping: [10.10.10.12] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.13] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.12] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.13] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.12] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
skipping: [10.10.10.13] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.13] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})

TASK [service : include_tasks] ***************************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

PLAY RECAP ***********************************************************************************************************************************************************************************************
10.10.10.10                : ok=264  changed=205  unreachable=0    failed=0    skipped=62   rescued=0    ignored=0
10.10.10.11                : ok=182  changed=146  unreachable=0    failed=0    skipped=55   rescued=0    ignored=0
10.10.10.12                : ok=171  changed=135  unreachable=0    failed=0    skipped=66   rescued=0    ignored=0
10.10.10.13                : ok=171  changed=135  unreachable=0    failed=0    skipped=66   rescued=0    ignored=0

烈建议在第一次完成初始化后执行 make cache 命令,该命令会将下载好的软件打为离线缓存包,并放置于files/pkg.tgz中。这样当下一次创建新的pigsty环境时,只要宿主机内操作系统一致,就可以直接复用该离线包,省去大量下载时间。

mon-view

初始化完毕后,您可以通过浏览器访问 http://pigsty 前往监控系统主页。默认的用户名与密码均为admin

如果没有配置DNS,或者没有使用默认的IP地址,也可以直接访问 http://meta_ip_address:3000前往监控系统首页。

$ make mon-view
open -n 'http://g.pigsty/'

7 - PG Exporter

PG Exporter参考

Exporter

https://github.com/Vonng/pg_exporter

完全自研的 pg_exporter, 用于收集postgres与pgbouncer的指标:

支持PostgreSQL 9.4 ~ 13版本,Pgbouncer 1.8+版本

几乎所有指标都通过配置文件以SQL的形式获取,完全定制化,提供热重载功能

指标收集器可以根据类似Kubernetes的方式调度执行 (例如只在从库上执行,只在带有tag启动标签的节点执行,只在安装特定扩展的实例上执行等)

带有灵活的指标缓存策略,自动超时取消,最小化监控系统对数据库的性能影响。

提供健康检查,就绪探针,主从角色检查等功能,可用于流量分发

PG Exporter

Prometheus exporter for PostgreSQL metrics. Gives you complete insight on your favourate elephant!

Latest binaries & rpms can be found on release page. Supported pg version: PostgreSQL 9.4+ & Pgbouncer 1.8+. Default collectors definition is compatible with PostgreSQL 10,11,12,13.

Latest pg_exporter version: 0.3.1

Features

  • Support both Postgres & Pgbouncer
  • Flexible: Almost all metrics are defined in customizable configuration files in SQL style.
  • Fine-grained execution control (Tags Filter, Facts Filter, Version Filter, Timeout, Cache, etc…)
  • Dynamic Planning: User could provide multiple branches of a metric queries. Queries matches server version & fact & tag will be actually installed.
  • Configurable caching policy & query timeout
  • Rich metrics about pg_exporter itself.
  • Auto discovery multi database in the same cluster (multiple database scrape TBD)
  • Tested and verified in real world production environment for years (200+ Nodes)
  • Metrics overhelming! Gives you complete insight on your favourate elephant!
  • (Pgbouncer mode is enabled when target dbname is pgbouncer)

性能表现

对于极端场景(几十万张表与几万种查询),一次抓取最多可能耗费秒级的时长。

好在所有指标收集器都是可选关闭的,且pg_exporter 允许为收集器配置主动超时取消(默认100ms)

自监控

Exporter展示了监控系统组件本身的监控指标,包括:

  • Exporter是否存活,Uptime,Exporter每分钟被抓取的次数
  • 每个监控查询的耗时,产生的指标数量与错误数量。

Prometheus的配置

Prometheus的抓取频率建议采用10~15秒,并配置适当的超时。

演示或特殊情况也可以配置的更精细(例如2秒,5秒等)

单Prometheus节点可以支持几百个实例的监控,约几百万个时间序列 (Dell R740 64 Core / 400GB Mem/ 3TB PCI-E SSD)

更大规模的集群可以通过Prometheus级联、联邦或分片实现伸缩。例如为每一个数据库集群部署一个Prometheus,并使用上级Prometheus统筹抓取并计算衍生指标

8 - Prometheus服务发现

Prometheus是如何通过静态文件进行服务发现的

当使用 prometheus_sd_method == ‘static’ 的静态文件服务发现模式时,Prometheus会使用静态文件进行服务发现,目标配置文件地址默认为 /etc/prometheus/targets/ 目录中的所有yml文件。

集中式配置

prometheus_sd_target 配置为batch 模式时,Pigsty会采用集中式配置管理Prometheus监控目标。

所有监控对象都定义于单一配置文件:/etc/prometheus/targets/all.yml 中。

#==============================================================#
# File      :   targets/all.yml
# Ctime     :   2021-02-18
# Mtime     :   2021-02-18
# Atime     :   2021-03-01 16:46
# Note      :   Managed by Ansible
# Desc      :   Prometheus Static Monitoring Targets Definition
# Path      :   /etc/prometheus/targets/all.yml
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# static monitor targets, batch version

#======> pg-meta-1 [primary]
- labels: {cls: pg-meta, ins: pg-meta-1, ip: 10.10.10.10, role: primary, svc: pg-meta-primary}
  targets: [10.10.10.10:9630, 10.10.10.10:9100, 10.10.10.10:9631, 10.10.10.10:9101]

#======> pg-test-1 [primary]
- labels: {cls: pg-test, ins: pg-test-1, ip: 10.10.10.11, role: primary, svc: pg-test-primary}
  targets: [10.10.10.11:9630, 10.10.10.11:9100, 10.10.10.11:9631, 10.10.10.11:9101]

#======> pg-test-2 [replica]
- labels: {cls: pg-test, ins: pg-test-2, ip: 10.10.10.12, role: replica, svc: pg-test-replica}
  targets: [10.10.10.12:9630, 10.10.10.12:9100, 10.10.10.12:9631, 10.10.10.12:9101]

#======> pg-test-3 [replica]
- labels: {cls: pg-test, ins: pg-test-3, ip: 10.10.10.13, role: replica, svc: pg-test-replica}
  targets: [10.10.10.13:9630, 10.10.10.13:9100, 10.10.10.13:9631, 10.10.10.13:9101]

分立式配置

prometheus_sd_target 配置为single 模式时,Pigsty会采用分立式配置管理Prometheus监控目标。

每个监控实例,都拥有自己独占的单一配置文件:/etc/prometheus/targets/{{ pg_instance }}.yml 中。

pg-meta-1 实例为例,其配置文件位置为:/etc/prometheus/targets/pg-meta-1.yml,内容为:

# pg-meta-1 [primary]
- labels: {cls: pg-meta, ins: pg-meta-1, ip: 10.10.10.10, role: primary, svc: pg-meta-primary}
  targets: [10.10.10.10:9630, 10.10.10.10:9100, 10.10.10.10:9631, 10.10.10.10:9101]

9 - Tuned模板

几种预制的Tuned模板

9.1 - OLTP

Tuned OLTP模板

Tuned OLTP模板主要针对延迟进行优化,此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to oltp mode
# Path      :   /etc/tuned/oltp/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLTP System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

# vm.dirty_background_bytes=67108864 # 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=10                 # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

9.2 - TINY

Tuned TINY模板

Tuned TINY模板主要针对极低配置的虚拟机进行优化,

此模板针对的典型机型是1核/1GB的虚拟机节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to tiny mode
# Path      :   /etc/tuned/tiny/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL TINY System
# include=virtual-guest

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up.  Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40

# Filesystem I/O is usually much more efficient than swapping, so try to keep
# swapping low.  It's usually safe to go even lower than this on systems with
# server-grade storage.
vm.swappiness = 30

#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

9.3 - OLAP

Tuned OLAP模板,针对高并行,长查询,高吞吐实例优化

Tuned OLAP模板主要针对吞吐量与计算并行度进行优化

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-09-18
# Desc      :   Tune operatiing system to olap mode
# Path      :   /etc/tuned/olap/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLAP System
include=network-throughput

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
# vm.swappiness=10

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

vm.dirty_background_ratio = 10    # throughput-performance default
vm.dirty_ratio=80                 # throughput-performance default 40 -> 80

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

9.4 - CRIT

Tuned CRIT模板,针对金融场景、不允许数据丢失错漏的场景进行优化。

Tuned CRIT模板主要针对RPO进行优化,尽可能减少内存中脏数据的量。

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to crit mode
# Path      :   /etc/tuned/crit/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL CRIT System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=100

# 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_bytes=67108864
# vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=6                    # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

10 - Patroni模板

Pigsty预置的四种Patroni模板

Pigsty使用Patroni管理与初始化Postgres数据库集群。

Pigsty使用Patroni完成供给的主体工作,即使用户选择了无Patroni模式,拉起数据库集群也会由Patroni负责,并在创建完成后移除Patroni组件。

用户可以通过Patroni配置文件,完成大部分的PostgreSQL集群定制工作,Patroni配置文件格式详情请参考 Patroni官方文档

预定义模板

Pigsty提供了四种预定义的初始化模板,初始化模板是用于初始化数据库集群的定义文件,默认位于roles/postgres/templates/。包括:

  • oltp.yml OLTP模板,默认配置,针对生产机型优化延迟与性能。
  • `olap.yml OLAP模板,提高并行度,针对吞吐量,长查询进行优化。
  • crit.yml) 核心业务模板,基于OLTP模板针对RPO、安全性、数据完整性进行优化,启用同步复制与数据校验和。
  • tiny.yml 微型数据库模板,针对低资源场景进行优化,例如运行于虚拟机中的演示数据库集群。

通过 pg_conf 参数指定所需使用的模板路径,如果使用预制模板,则只需填入模板文件名称即可。

如果使用定制的 Patroni配置模板,通常也应当针对机器节点使用配套的 节点优化模板

更详细的配置信息,请参考 PG供给

10.1 - OLTP

Patroni OLTP模板

Patroni OLTP模板主要针对延迟进行优化,此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (oltp)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Transactional Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# OLTP database are optimized for performance, rt latency
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}

#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'

#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    # - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'
    {{ pg_admin_username }}:
      password: '{{ pg_admin_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # template
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 10

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 1MB
    maximum_lag_on_failover: 1048576

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 30

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: false

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true


      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 400                    # 100 -> 400
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 128          # 64 -> 128
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 8                 # default 8, set to cpu core
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 16                     # 10 -> 16
        max_replication_slots: 16               # 10 -> 16
        wal_keep_size: 100GB                    # keep at least 100GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 32MB                          # 4MB -> 32MB
        huge_pages: try                         # try huge pages
        temp_file_limit: 100GB                  # 0 -> 100GB
        vacuum_cost_delay: 2ms                  # wait 2ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 800              # 100 -> 800
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 1MB             # max allowed data loss
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 60min               # checkpoint 5min -> 1h
        checkpoint_completion_target: 0.95      # 0.5 -> 0.95
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 50000         # 0->50000 last 50000 xact changes will not be vacuumed
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 8
        max_sync_workers_per_subscription: 8    # 4 -> 8

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        # enable_partitionwise_join: on
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 320GB             # max mem - shared buffer
        default_statistics_target: 1000         # stat bucket 100 -> 1000

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 100         # log slow query (>100ms)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 8192         # max query length in pg_stat_activity

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 3               # default autovacuum worker 3
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 10min  # 10min timeout for idle in transaction

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 10000           # 5000 -> 10000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: off   # do not track query other than CRUD
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5

...

10.2 - TINY

Patroni TINY模板

Patroni TINY模板主要针对极低配置的虚拟机进行优化,

此模板针对的典型机型是1核/1GB的虚拟机节点。您可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (tiny)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Tiny Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# TINY database are optimized for low-resource situation (e.g 1 Core 1G)
# typical spec: 1 Core | 1-4 GB RAM | Normal SSD  10x GB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}


#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'


#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # customization
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 10

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 1MB
    maximum_lag_on_failover: 1048576

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 30

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: false

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true


      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 50                     # default 100 -> 50
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 64           # default 64
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 1                 # default 8 -> 1 (set to cpu core)
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 10                     # default 10
        max_replication_slots: 10               # default 10
        wal_keep_size: 1GB                      # keep at least 1GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 4MB                           # default 4MB
        huge_pages: try                         # try huge pages
        temp_file_limit: 40GB                   # 0 -> 40GB (according to your disk)
        vacuum_cost_delay: 5ms                  # wait 5ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 800              # 100 -> 800
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 1MB             # max allowed data loss
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 15min               # checkpoint 5min -> 15min
        checkpoint_completion_target: 0.80      # 0.5 -> 0.8
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 50000         # 0->50000 last 50000 xact changes will not be vacuumed
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 2 (set according to your cpu core)
        max_sync_workers_per_subscription: 8    # 4 -> 2

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        # enable_partitionwise_join: on
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 2GB               # max mem - shared buffer
        default_statistics_target: 200          # stat bucket 100 -> 200

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 100         # log slow query (>100ms)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 8192         # max query length in pg_stat_activity

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 1               # default autovacuum worker 3 -> 1
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 10min  # 10min timeout for idle in transaction

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 3000            # 5000 -> 3000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: off   # do not track query other than CRUD
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5

...

10.3 - OLAP

Patroni OLAP模板,针对高并行,长查询,高吞吐实例优化

Patroni OLAP模板主要针对吞吐量与计算并行度进行优化

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (olap)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Analysis Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# OLTP database are optimized for throughput
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}

#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'

#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    # - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'
    {{ pg_admin_username }}:
      password: '{{ pg_admin_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # template
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 10

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 16MB (analysis tolerate more data loss)
    maximum_lag_on_failover: 16777216

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 30

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: false

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true

      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 400                    # 100 -> 400
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 256          # 64 -> 256 (analysis)
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 64                # default 8 -> 64, SET THIS ACCORDING TO YOUR CPU CORES
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 16                     # 10 -> 16
        max_replication_slots: 16               # 10 -> 16
        wal_keep_size: 100GB                    # keep at least 100GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 128MB                         # 4MB -> 128MB (analysis)
        huge_pages: try                         # try huge pages
        temp_file_limit: 500GB                  # 0 -> 500GB (analysis)
        vacuum_cost_delay: 2ms                  # wait 2ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 1600             # 100 -> 1600 (analysis)
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer
        max_parallel_workers: 64                # SET THIS ACCORDING TO YOUR CPU CORES
        max_parallel_workers_per_gather: 64     # SET THIS ACCORDING TO YOUR CPU CORES
        max_parallel_maintenance_workers: 4     # 2 -> 4

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 16MB            # max allowed data loss (analysis)
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 60min               # checkpoint 5min -> 1h
        checkpoint_completion_target: 0.95      # 0.5 -> 0.95
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 0             # 0 (default)
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 8
        max_sync_workers_per_subscription: 8    # 4 -> 8

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        enable_partitionwise_join: on           # enable on analysis
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 320GB             # max mem - shared buffer
        default_statistics_target: 1000         # stat bucket 100 -> 1000
        jit: on                                 # default on
        jit_above_cost: 100000                  # default jit threshold

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 1000         # log slow query (>1s)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 8192         # max query length in pg_stat_activity

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 3               # default autovacuum worker 3
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 0  # Disable idle in xact timeout in analysis database

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 10000           # 5000 -> 10000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: off   # do not track query other than CRUD
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5

...

10.4 - CRIT

Patroni CRIT模板,针对金融场景、不允许数据丢失错漏的场景进行优化。

Patroni CRIT模板主要针对RPO进行优化,采用同步复制,发生故障时确保不会有数据丢失。

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。用户可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (crit)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Critical Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# CRIT database are optimized for security, integrity, RPO
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}

#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'

#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    # - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'
    {{ pg_admin_username }}:
      password: '{{ pg_admin_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # template
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 120   # more patient on critical database

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 0 for critical database
    maximum_lag_on_failover: 1

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 10   # more patient on critical database

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: true  # use sync replication on critical database

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true


      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 400                    # 100 -> 400
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 128          # 64 -> 128
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 8                 # default 8, set to cpu core
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 16                     # 10 -> 16
        max_replication_slots: 16               # 10 -> 16
        wal_keep_size: 100GB                    # keep at least 100GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 32MB                          # 4MB -> 32MB
        huge_pages: try                         # try huge pages
        temp_file_limit: 100GB                  # 0 -> 100GB
        vacuum_cost_delay: 2ms                  # wait 2ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 800              # 100 -> 800
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 1MB             # max allowed data loss
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 60min               # checkpoint 5min -> 1h
        checkpoint_completion_target: 0.95      # 0.5 -> 0.95
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 50000         # 0->50000 last 50000 xact changes will not be vacuumed
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 8
        max_sync_workers_per_subscription: 8    # 4 -> 8

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        # enable_partitionwise_join: on
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 320GB             # max mem - shared buffer
        default_statistics_target: 1000         # stat bucket 100 -> 1000

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 100         # log slow query (>100ms)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 32768        # show full query on critical database

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 3               # default autovacuum worker 3
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 1min  # 1min timeout for idle in transaction in critical database

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 10000           # 5000 -> 10000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: on    # TRACK all queries on critical database
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5
...