This the multi-page printable view of this section. Click here to print.

Return to the regular view of this page.

Reference

Low level reference about pigsty

1 - Example Config

Example config file for pigsty sandbox

以下是用于沙箱环境的默认配置文件:pigsty.yml

---
######################################################################
# File      :   pigsty.yml
# Path      :   pigsty.yml
# Desc      :   Pigsty Configuration file
# Note      :   follow ansible inventory file format
# Ctime     :   2020-05-22
# Mtime     :   2021-03-16
# Copyright (C) 2018-2021 Ruohang Feng
######################################################################


######################################################################
#               Development Environment Inventory                    #
######################################################################
all: # top-level namespace, match all hosts


  #==================================================================#
  #                           Clusters                               #
  #==================================================================#
  # postgres database clusters are defined as kv pair in `all.children`
  # where the key is cluster name and the value is the object consist
  # of cluster members (hosts) and ad-hoc variables (vars)
  # meta node are defined in special group "meta" with `meta_node=true`

  children:

    #-----------------------------
    # meta controller
    #-----------------------------
    meta:       # special group 'meta' defines the main controller machine
      vars:
        meta_node: true                     # mark node as meta controller
        ansible_group_priority: 99          # meta group is top priority

      # nodes in meta group
      hosts: {10.10.10.10: {ansible_host: meta}}

    #-----------------------------
    # cluster: pg-meta
    #-----------------------------
    pg-meta:
      # - cluster members - #
      hosts:
        10.10.10.10: {pg_seq: 1, pg_role: primary, ansible_host: meta}

      # - cluster configs - #
      vars:
        pg_cluster: pg-meta                 # define actual cluster name
        pg_version: 13                      # define installed pgsql version
        node_tune: tiny                     # tune node into oltp|olap|crit|tiny mode
        pg_conf: tiny.yml                   # tune pgsql into oltp/olap/crit/tiny mode
        patroni_mode: pause                 # enter maintenance mode, {default|pause|remove}
        patroni_watchdog_mode: off          # disable watchdog (require|automatic|off)
        pg_lc_ctype: en_US.UTF8             # enabled pg_trgm i18n char support

        pg_users:
          # complete example of user/role definition for production user
          - name: dbuser_meta               # example production user have read-write access
            password: DBUser.Meta           # example user's password, can be encrypted
            login: true                     # can login, true by default (should be false for role)
            superuser: false                # is superuser? false by default
            createdb: false                 # can create database? false by default
            createrole: false               # can create role? false by default
            inherit: true                   # can this role use inherited privileges?
            replication: false              # can this role do replication? false by default
            bypassrls: false                # can this role bypass row level security? false by default
            connlimit: -1                   # connection limit, -1 disable limit
            expire_at: '2030-12-31'         # 'timestamp' when this role is expired
            expire_in: 365                  # now + n days when this role is expired (OVERWRITE expire_at)
            roles: [dbrole_readwrite]       # dborole_admin|dbrole_readwrite|dbrole_readonly
            pgbouncer: true                 # add this user to pgbouncer? false by default (true for production user)
            parameters:                     # user's default search path
              search_path: public
            comment: test user

          # simple example for personal user definition
          - name: dbuser_vonng2              # personal user example which only have limited access to offline instance
            password: DBUser.Vonng          # or instance with explict mark `pg_offline_query = true`
            roles: [dbrole_offline]         # personal/stats/ETL user should be grant with dbrole_offline
            expire_in: 365                  # expire in 365 days since creation
            pgbouncer: false                # personal user should NOT be allowed to login with pgbouncer
            comment: example personal user for interactive queries

        pg_databases:
          - name: meta                      # name is the only required field for a database
            # owner: postgres                 # optional, database owner
            # template: template1             # optional, template1 by default
            # encoding: UTF8                # optional, UTF8 by default , must same as template database, leave blank to set to db default
            # locale: C                     # optional, C by default , must same as template database, leave blank to set to db default
            # lc_collate: C                 # optional, C by default , must same as template database, leave blank to set to db default
            # lc_ctype: C                   # optional, C by default , must same as template database, leave blank to set to db default
            allowconn: true                 # optional, true by default, false disable connect at all
            revokeconn: false               # optional, false by default, true revoke connect from public # (only default user and owner have connect privilege on database)
            # tablespace: pg_default          # optional, 'pg_default' is the default tablespace
            connlimit: -1                   # optional, connection limit, -1 or none disable limit (default)
            extensions:                     # optional, extension name and where to create
              - {name: postgis, schema: public}
            parameters:                     # optional, extra parameters with ALTER DATABASE
              enable_partitionwise_join: true
            pgbouncer: true                 # optional, add this database to pgbouncer list? true by default
            comment: pigsty meta database   # optional, comment string for database

        pg_default_database: meta           # default database will be used as primary monitor target

        # proxy settings
        vip_mode: l2                      # enable/disable vip (require members in same LAN)
        vip_address: 10.10.10.2             # virtual ip address
        vip_cidrmask: 8                     # cidr network mask length
        vip_interface: eth1                 # interface to add virtual ip


    #-----------------------------
    # cluster: pg-test
    #-----------------------------
    pg-test: # define cluster named 'pg-test'
      # - cluster members - #
      hosts:
        10.10.10.11: {pg_seq: 1, pg_role: primary, ansible_host: node-1}
        10.10.10.12: {pg_seq: 2, pg_role: replica, ansible_host: node-2}
        10.10.10.13: {pg_seq: 3, pg_role: offline, ansible_host: node-3}

      # - cluster configs - #
      vars:
        # basic settings
        pg_cluster: pg-test                 # define actual cluster name
        pg_version: 13                      # define installed pgsql version
        node_tune: tiny                     # tune node into oltp|olap|crit|tiny mode
        pg_conf: tiny.yml                   # tune pgsql into oltp/olap/crit/tiny mode

        # business users, adjust on your own needs
        pg_users:
          - name: test                      # example production user have read-write access
            password: test                  # example user's password
            roles: [dbrole_readwrite]       # dborole_admin|dbrole_readwrite|dbrole_readonly|dbrole_offline
            pgbouncer: true                 # production user that access via pgbouncer
            comment: default test user for production usage

        pg_databases:                       # create a business database 'test'
          - name: test                      # use the simplest form

        pg_default_database: test           # default database will be used as primary monitor target

        # proxy settings
        vip_mode: l2                        # enable/disable vip (require members in same LAN)
        vip_address: 10.10.10.3             # virtual ip address
        vip_cidrmask: 8                     # cidr network mask length
        vip_interface: eth1                 # interface to add virtual ip


  #==================================================================#
  #                           Globals                                #
  #==================================================================#
  vars:

    #------------------------------------------------------------------------------
    # CONNECTION PARAMETERS
    #------------------------------------------------------------------------------
    # this section defines connection parameters

    # ansible_user: vagrant                       # admin user with ssh access and sudo privilege

    proxy_env: # global proxy env when downloading packages
      no_proxy: "localhost,127.0.0.1,10.0.0.0/8,192.168.0.0/16,*.pigsty,*.aliyun.com,mirrors.aliyuncs.com,mirrors.tuna.tsinghua.edu.cn,mirrors.zju.edu.cn"
      # http_proxy: ''
      # https_proxy: ''
      # all_proxy: ''


    #------------------------------------------------------------------------------
    # REPO PROVISION
    #------------------------------------------------------------------------------
    # this section defines how to build a local repo

    # - repo basic - #
    repo_enabled: true                            # build local yum repo on meta nodes?
    repo_name: pigsty                             # local repo name
    repo_address: yum.pigsty                      # repo external address (ip:port or url)
    repo_port: 80                                 # listen address, must same as repo_address
    repo_home: /www                               # default repo dir location
    repo_rebuild: false                           # force re-download packages
    repo_remove: true                             # remove existing repos

    # - where to download - #
    repo_upstreams:
      - name: base
        description: CentOS-$releasever - Base - Aliyun Mirror
        baseurl:
          - http://mirrors.aliyun.com/centos/$releasever/os/$basearch/
          - http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/
          - http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/
        gpgcheck: no
        failovermethod: priority

      - name: updates
        description: CentOS-$releasever - Updates - Aliyun Mirror
        baseurl:
          - http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/
          - http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/
          - http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/
        gpgcheck: no
        failovermethod: priority

      - name: extras
        description: CentOS-$releasever - Extras - Aliyun Mirror
        baseurl:
          - http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/
          - http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/
          - http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/
        gpgcheck: no
        failovermethod: priority

      - name: epel
        description: CentOS $releasever - EPEL - Aliyun Mirror
        baseurl: http://mirrors.aliyun.com/epel/$releasever/$basearch
        gpgcheck: no
        failovermethod: priority

      - name: grafana
        description: Grafana - TsingHua Mirror
        gpgcheck: no
        baseurl: https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm

      - name: prometheus
        description: Prometheus and exporters
        gpgcheck: no
        baseurl: https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch

      # consider using ZJU PostgreSQL mirror in mainland china
      - name: pgdg-common
        description: PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch
        gpgcheck: no
        # baseurl: https://download.postgresql.org/pub/repos/yum/common/redhat/rhel-$releasever-$basearch
        baseurl: http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch

      - name: pgdg13
        description: PostgreSQL 13 for RHEL/CentOS $releasever - $basearch
        gpgcheck: no
        # baseurl: https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-$releasever-$basearch
        baseurl: http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch

      - name: centos-sclo
        description: CentOS-$releasever - SCLo
        gpgcheck: no
        mirrorlist: http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo

      - name: centos-sclo-rh
        description: CentOS-$releasever - SCLo rh
        gpgcheck: no
        mirrorlist: http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh

      - name: nginx
        description: Nginx Official Yum Repo
        skip_if_unavailable: true
        gpgcheck: no
        baseurl: http://nginx.org/packages/centos/$releasever/$basearch/

      - name: haproxy
        description: Copr repo for haproxy
        skip_if_unavailable: true
        gpgcheck: no
        baseurl: https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/

      # for latest consul & kubernetes
      - name: harbottle
        description: Copr repo for main owned by harbottle
        skip_if_unavailable: true
        gpgcheck: no
        baseurl: https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/

    # - what to download - #
    repo_packages:
      # repo bootstrap packages
      - epel-release nginx wget yum-utils yum createrepo                                      # bootstrap packages

      # node basic packages
      - ntp chrony uuid lz4 nc pv jq vim-enhanced make patch bash lsof wget unzip git tuned   # basic system util
      - readline zlib openssl libyaml libxml2 libxslt perl-ExtUtils-Embed ca-certificates     # basic pg dependency
      - numactl grubby sysstat dstat iotop bind-utils net-tools tcpdump socat ipvsadm telnet  # system utils

      # dcs & monitor packages
      - grafana prometheus2 pushgateway alertmanager                                          # monitor and ui
      - node_exporter postgres_exporter nginx_exporter blackbox_exporter                      # exporter
      - consul consul_exporter consul-template etcd                                           # dcs

      # python3 dependencies
      - ansible python python-pip python-psycopg2 audit                                       # ansible & python
      - python3 python3-psycopg2 python36-requests python3-etcd python3-consul                # python3
      - python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography               # python3 patroni extra deps

      # proxy and load balancer
      - haproxy keepalived dnsmasq                                                            # proxy and dns

      # postgres common Packages
      - patroni patroni-consul patroni-etcd pgbouncer pg_cli pgbadger pg_activity               # major components
      - pgcenter boxinfo check_postgres emaj pgbconsole pg_bloat_check pgquarrel                # other common utils
      - barman barman-cli pgloader pgFormatter pitrery pspg pgxnclient PyGreSQL pgadmin4 tail_n_mail

      # postgres 13 packages
      - postgresql13* postgis31* citus_13 timescaledb_13 # pgrouting_13                         # postgres 13 and postgis 31
      - pg_repack13 pg_squeeze13                                                                # maintenance extensions
      - pg_qualstats13 pg_stat_kcache13 system_stats_13 bgw_replstatus13                        # stats extensions
      - plr13 plsh13 plpgsql_check_13 plproxy13 plr13 plsh13 plpgsql_check_13 pldebugger13      # PL extensions                                      # pl extensions
      - hdfs_fdw_13 mongo_fdw13 mysql_fdw_13 ogr_fdw13 redis_fdw_13 pgbouncer_fdw13             # FDW extensions
      - wal2json13 count_distinct13 ddlx_13 geoip13 orafce13                                    # MISC extensions
      - rum_13 hypopg_13 ip4r13 jsquery_13 logerrors_13 periods_13 pg_auto_failover_13 pg_catcheck13
      - pg_fkpart13 pg_jobmon13 pg_partman13 pg_prioritize_13 pg_track_settings13 pgaudit15_13
      - pgcryptokey13 pgexportdoc13 pgimportdoc13 pgmemcache-13 pgmp13 pgq-13
      - pguint13 pguri13 prefix13  safeupdate_13 semver13  table_version13 tdigest13

    repo_url_packages:
      - https://github.com/Vonng/pg_exporter/releases/download/v0.3.2/pg_exporter-0.3.2-1.el7.x86_64.rpm
      - https://github.com/cybertec-postgresql/vip-manager/releases/download/v0.6/vip-manager_0.6-1_amd64.rpm
      - http://guichaz.free.fr/polysh/files/polysh-0.4-1.noarch.rpm


    #------------------------------------------------------------------------------
    # NODE PROVISION
    #------------------------------------------------------------------------------
    # this section defines how to provision nodes
    # nodename:                                   # if defined, node's hostname will be overwritten

    # - node dns - #
    node_dns_hosts: # static dns records in /etc/hosts
      - 10.10.10.10 yum.pigsty
    node_dns_server: add                          # add (default) | none (skip) | overwrite (remove old settings)
    node_dns_servers:                             # dynamic nameserver in /etc/resolv.conf
      - 10.10.10.10
    node_dns_options:                             # dns resolv options
      - options single-request-reopen timeout:1 rotate
      - domain service.consul

    # - node repo - #
    node_repo_method: local                       # none|local|public (use local repo for production env)
    node_repo_remove: true                        # whether remove existing repo
    node_local_repo_url:                          # local repo url (if method=local, make sure firewall is configured or disabled)
      - http://yum.pigsty/pigsty.repo

    # - node packages - #
    node_packages:                                # common packages for all nodes
      - wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl
      - numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq
      - python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul
      - python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography
      - node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager
    node_extra_packages:                          # extra packages for all nodes
      - patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity
    node_meta_packages:                           # packages for meta nodes only
      - grafana,prometheus2,alertmanager,nginx_exporter,blackbox_exporter,pushgateway
      - dnsmasq,nginx,ansible,pgbadger,polysh

    # - node features - #
    node_disable_numa: false                      # disable numa, important for production database, reboot required
    node_disable_swap: false                      # disable swap, important for production database
    node_disable_firewall: true                   # disable firewall (required if using kubernetes)
    node_disable_selinux: true                    # disable selinux  (required if using kubernetes)
    node_static_network: true                     # keep dns resolver settings after reboot
    node_disk_prefetch: false                     # setup disk prefetch on HDD to increase performance

    # - node kernel modules - #
    node_kernel_modules:
      - softdog
      - br_netfilter
      - ip_vs
      - ip_vs_rr
      - ip_vs_rr
      - ip_vs_wrr
      - ip_vs_sh
      - nf_conntrack_ipv4

    # - node tuned - #
    node_tune: tiny                               # install and activate tuned profile: none|oltp|olap|crit|tiny
    node_sysctl_params:                           # set additional sysctl parameters, k:v format
      net.bridge.bridge-nf-call-iptables: 1       # for kubernetes

    # - node user - #
    node_admin_setup: true                        # setup an default admin user ?
    node_admin_uid: 88                            # uid and gid for admin user
    node_admin_username: admin                    # default admin user
    node_admin_ssh_exchange: true                 # exchange ssh key among cluster ?
    node_admin_pks:                               # public key list that will be installed
      - 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com'

    # - node ntp - #
    node_ntp_service: ntp                         # ntp or chrony
    node_ntp_config: true                         # overwrite existing ntp config?
    node_timezone: Asia/Shanghai                  # default node timezone
    node_ntp_servers:                             # default NTP servers
      - pool cn.pool.ntp.org iburst
      - pool pool.ntp.org iburst
      - pool time.pool.aliyun.com iburst
      - server 10.10.10.10 iburst


    #------------------------------------------------------------------------------
    # META PROVISION
    #------------------------------------------------------------------------------
    # - ca - #
    ca_method: create                             # create|copy|recreate
    ca_subject: "/CN=root-ca"                     # self-signed CA subject
    ca_homedir: /ca                               # ca cert directory
    ca_cert: ca.crt                               # ca public key/cert
    ca_key: ca.key                                # ca private key

    # - nginx - #
    nginx_upstream:
      - { name: home,          host: pigsty,   url: "127.0.0.1:3000"}
      - { name: consul,        host: c.pigsty, url: "127.0.0.1:8500" }
      - { name: grafana,       host: g.pigsty, url: "127.0.0.1:3000" }
      - { name: prometheus,    host: p.pigsty, url: "127.0.0.1:9090" }
      - { name: alertmanager,  host: a.pigsty, url: "127.0.0.1:9093" }
      - { name: haproxy,       host: h.pigsty, url: "127.0.0.1:9091" }

    # - nameserver - #
    dns_records: # dynamic dns record resolved by dnsmasq
      - 10.10.10.2  pg-meta                       # sandbox vip for pg-meta
      - 10.10.10.3  pg-test                       # sandbox vip for pg-test
      - 10.10.10.10 meta-1                        # sandbox node meta-1 (node-0)
      - 10.10.10.11 node-1                        # sandbox node node-1
      - 10.10.10.12 node-2                        # sandbox node node-2
      - 10.10.10.13 node-3                        # sandbox node node-3
      - 10.10.10.10 pigsty
      - 10.10.10.10 y.pigsty yum.pigsty
      - 10.10.10.10 c.pigsty consul.pigsty
      - 10.10.10.10 g.pigsty grafana.pigsty
      - 10.10.10.10 p.pigsty prometheus.pigsty
      - 10.10.10.10 a.pigsty alertmanager.pigsty
      - 10.10.10.10 n.pigsty ntp.pigsty
      - 10.10.10.10 h.pigsty haproxy.pigsty

    # - prometheus - #
    prometheus_data_dir: /export/prometheus/data  # prometheus data dir
    prometheus_options: '--storage.tsdb.retention=30d'
    prometheus_reload: false                      # reload prometheus instead of recreate it
    prometheus_sd_method: consul                  # service discovery method: static|consul|etcd
    prometheus_scrape_interval: 2s                # global scrape & evaluation interval
    prometheus_scrape_timeout: 1s                 # scrape timeout
    prometheus_sd_interval: 2s                    # service discovery refresh interval

    # - grafana - #
    grafana_url: http://admin:admin@10.10.10.10:3000 # grafana url
    grafana_admin_password: admin                  # default grafana admin user password
    grafana_plugin: install                        # none|install|reinstall
    grafana_cache: /www/pigsty/grafana/plugins.tar.gz # path to grafana plugins tarball
    grafana_customize: true                        # customize grafana resources
    grafana_plugins: # default grafana plugins list
      - redis-datasource
      - simpod-json-datasource
      - fifemon-graphql-datasource
      - sbueringer-consul-datasource
      - camptocamp-prometheus-alertmanager-datasource
      - ryantxu-ajax-panel
      - marcusolsson-hourly-heatmap-panel
      - michaeldmoore-multistat-panel
      - marcusolsson-treemap-panel
      - pr0ps-trackmap-panel
      - dalvany-image-panel
      - magnesium-wordcloud-panel
      - cloudspout-button-panel
      - speakyourcode-button-panel
      - jdbranham-diagram-panel
      - grafana-piechart-panel
      - snuids-radar-panel
      - digrich-bubblechart-panel
    grafana_git_plugins:
      - https://github.com/Vonng/grafana-echarts



    #------------------------------------------------------------------------------
    # DCS PROVISION
    #------------------------------------------------------------------------------
    service_registry: consul                      # where to register services: none | consul | etcd | both
    dcs_type: consul                              # consul | etcd | both
    dcs_name: pigsty                              # consul dc name | etcd initial cluster token
    dcs_servers:                                  # dcs server dict in name:ip format
      meta-1: 10.10.10.10                         # you could use existing dcs cluster
      # meta-2: 10.10.10.11                       # host which have their IP listed here will be init as server
      # meta-3: 10.10.10.12                       # 3 or 5 dcs nodes are recommend for production environment
    dcs_exists_action: clean                      # abort|skip|clean if dcs server already exists
    dcs_disable_purge: false                      # set to true to disable purge functionality for good (force dcs_exists_action = abort)
    consul_data_dir: /var/lib/consul              # consul data dir (/var/lib/consul by default)
    etcd_data_dir: /var/lib/etcd                  # etcd data dir (/var/lib/consul by default)


    #------------------------------------------------------------------------------
    # POSTGRES INSTALLATION
    #------------------------------------------------------------------------------
    # - dbsu - #
    pg_dbsu: postgres                             # os user for database, postgres by default (change it is not recommended!)
    pg_dbsu_uid: 26                               # os dbsu uid and gid, 26 for default postgres users and groups
    pg_dbsu_sudo: limit                           # none|limit|all|nopass (Privilege for dbsu, limit is recommended)
    pg_dbsu_home: /var/lib/pgsql                  # postgresql binary
    pg_dbsu_ssh_exchange: false                   # exchange ssh key among same cluster

    # - postgres packages - #
    pg_version: 13                                # default postgresql version
    pgdg_repo: false                              # use official pgdg yum repo (disable if you have local mirror)
    pg_add_repo: false                            # add postgres related repo before install (useful if you want a simple install)
    pg_bin_dir: /usr/pgsql/bin                    # postgres binary dir
    pg_packages:
      - postgresql${pg_version}*
      - postgis31_${pg_version}*
      - pgbouncer patroni pg_exporter pgbadger
      - patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity
      - python3 python3-psycopg2 python36-requests python3-etcd python3-consul
      - python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography

    pg_extensions:
      - pg_repack${pg_version} pg_qualstats${pg_version} pg_stat_kcache${pg_version} wal2json${pg_version}
      # - ogr_fdw${pg_version} mysql_fdw_${pg_version} redis_fdw_${pg_version} mongo_fdw${pg_version} hdfs_fdw_${pg_version}
      # - count_distinct${version}  ddlx_${version}  geoip${version}  orafce${version}                                   # popular features
      # - hypopg_${version}  ip4r${version}  jsquery_${version}  logerrors_${version}  periods_${version}  pg_auto_failover_${version}  pg_catcheck${version}
      # - pg_fkpart${version}  pg_jobmon${version}  pg_partman${version}  pg_prioritize_${version}  pg_track_settings${version}  pgaudit15_${version}
      # - pgcryptokey${version}  pgexportdoc${version}  pgimportdoc${version}  pgmemcache-${version}  pgmp${version}  pgq-${version}  pgquarrel pgrouting_${version}
      # - pguint${version}  pguri${version}  prefix${version}   safeupdate_${version}  semver${version}   table_version${version}  tdigest${version}



    #------------------------------------------------------------------------------
    # POSTGRES PROVISION
    #------------------------------------------------------------------------------
    # - identity - #
    # pg_cluster:                                 # [REQUIRED] cluster name (validated during pg_preflight)
    # pg_seq: 0                                   # [REQUIRED] instance seq (validated during pg_preflight)
    # pg_role: replica                            # [REQUIRED] service role (validated during pg_preflight)
    pg_hostname: false                            # overwrite node hostname with pg instance name
    pg_nodename: true                             # overwrite consul nodename with pg instance name

    # - retention - #
    # pg_exists_action, available options: abort|clean|skip
    #  - abort: abort entire play's execution (default)
    #  - clean: remove existing cluster (dangerous)
    #  - skip: end current play for this host
    # pg_exists: false                            # auxiliary flag variable (DO NOT SET THIS)
    pg_exists_action: clean
    pg_disable_purge: false                       # set to true to disable pg purge functionality for good (force pg_exists_action = abort)

    # - storage - #
    pg_data: /pg/data                             # postgres data directory
    pg_fs_main: /export                           # data disk mount point     /pg -> {{ pg_fs_main }}/postgres/{{ pg_instance }}
    pg_fs_bkup: /var/backups                      # backup disk mount point   /pg/* -> {{ pg_fs_bkup }}/postgres/{{ pg_instance }}/*

    # - connection - #
    pg_listen: '0.0.0.0'                          # postgres listen address, '0.0.0.0' by default (all ipv4 addr)
    pg_port: 5432                                 # postgres port (5432 by default)
    pg_localhost: /var/run/postgresql             # localhost unix socket dir for connection

    # - patroni - #
    # patroni_mode, available options: default|pause|remove
    #   - default: default ha mode
    #   - pause:   into maintenance mode
    #   - remove:  remove patroni after bootstrap
    patroni_mode: default                         # pause|default|remove
    pg_namespace: /pg                             # top level key namespace in dcs
    patroni_port: 8008                            # default patroni port
    patroni_watchdog_mode: automatic              # watchdog mode: off|automatic|required
    pg_conf: tiny.yml                             # user provided patroni config template path

    # - localization - #
    pg_encoding: UTF8                             # default to UTF8
    pg_locale: C                                  # default to C
    pg_lc_collate: C                              # default to C
    pg_lc_ctype: en_US.UTF8                       # default to en_US.UTF8

    # - pgbouncer - #
    pgbouncer_port: 6432                          # pgbouncer port (6432 by default)
    pgbouncer_poolmode: transaction               # pooling mode: (transaction pooling by default)
    pgbouncer_max_db_conn: 100                    # important! do not set this larger than postgres max conn or conn limit


    #------------------------------------------------------------------------------
    # POSTGRES TEMPLATE
    #------------------------------------------------------------------------------
    # - template - #
    pg_init: pg-init                              # init script for cluster template

    # - system roles - #
    pg_replication_username: replicator           # system replication user
    pg_replication_password: DBUser.Replicator    # system replication password
    pg_monitor_username: dbuser_monitor           # system monitor user
    pg_monitor_password: DBUser.Monitor           # system monitor password
    pg_admin_username: dbuser_admin               # system admin user
    pg_admin_password: DBUser.Admin               # system admin password

    # - default roles - #
    # chekc http://pigsty.cc/zh/docs/concepts/provision/acl/ for more detail
    pg_default_roles:

      # common production readonly user
      - name: dbrole_readonly                 # production read-only roles
        login: false
        comment: role for global readonly access

      # common production read-write user
      - name: dbrole_readwrite                # production read-write roles
        login: false
        roles: [dbrole_readonly]             # read-write includes read-only access
        comment: role for global read-write access

      # offline have same privileges as readonly, but with limited hba access on offline instance only
      # for the purpose of running slow queries, interactive queries and perform ETL tasks
      - name: dbrole_offline
        login: false
        comment: role for restricted read-only access (offline instance)

      # admin have the privileges to issue DDL changes
      - name: dbrole_admin
        login: false
        bypassrls: true
        comment: role for object creation
        roles: [dbrole_readwrite,pg_monitor,pg_signal_backend]

      # dbsu, name is designated by `pg_dbsu`. It's not recommend to set password for dbsu
      - name: postgres
        superuser: true
        comment: system superuser

      # default replication user, name is designated by `pg_replication_username`, and password is set by `pg_replication_password`
      - name: replicator
        replication: true                          # for replication user
        bypassrls: true                            # logical replication require bypassrls
        roles: [pg_monitor, dbrole_readonly]       # logical replication require select privileges
        comment: system replicator

      # default replication user, name is designated by `pg_monitor_username`, and password is set by `pg_monitor_password`
      - name: dbuser_monitor
        connlimit: 16
        comment: system monitor user
        roles: [pg_monitor, dbrole_readonly]

      # default admin user, name is designated by `pg_admin_username`, and password is set by `pg_admin_password`
      - name: dbuser_admin
        bypassrls: true
        superuser: true
        comment: system admin user
        roles: [dbrole_admin]

      # default stats user, for ETL and slow queries
      - name: dbuser_stats
        password: DBUser.Stats
        comment: business offline user for offline queries and ETL
        roles: [dbrole_offline]


    # - privileges - #
    # object created by dbsu and admin will have their privileges properly set
    pg_default_privileges:
      - GRANT USAGE                         ON SCHEMAS   TO dbrole_readonly
      - GRANT SELECT                        ON TABLES    TO dbrole_readonly
      - GRANT SELECT                        ON SEQUENCES TO dbrole_readonly
      - GRANT EXECUTE                       ON FUNCTIONS TO dbrole_readonly
      - GRANT USAGE                         ON SCHEMAS   TO dbrole_offline
      - GRANT SELECT                        ON TABLES    TO dbrole_offline
      - GRANT SELECT                        ON SEQUENCES TO dbrole_offline
      - GRANT EXECUTE                       ON FUNCTIONS TO dbrole_offline
      - GRANT INSERT, UPDATE, DELETE        ON TABLES    TO dbrole_readwrite
      - GRANT USAGE,  UPDATE                ON SEQUENCES TO dbrole_readwrite
      - GRANT TRUNCATE, REFERENCES, TRIGGER ON TABLES    TO dbrole_admin
      - GRANT CREATE                        ON SCHEMAS   TO dbrole_admin

    # - schemas - #
    pg_default_schemas: [monitor]                 # default schemas to be created

    # - extension - #
    pg_default_extensions:                        # default extensions to be created
      - { name: 'pg_stat_statements',  schema: 'monitor' }
      - { name: 'pgstattuple',         schema: 'monitor' }
      - { name: 'pg_qualstats',        schema: 'monitor' }
      - { name: 'pg_buffercache',      schema: 'monitor' }
      - { name: 'pageinspect',         schema: 'monitor' }
      - { name: 'pg_prewarm',          schema: 'monitor' }
      - { name: 'pg_visibility',       schema: 'monitor' }
      - { name: 'pg_freespacemap',     schema: 'monitor' }
      - { name: 'pg_repack',           schema: 'monitor' }
      - name: postgres_fdw
      - name: file_fdw
      - name: btree_gist
      - name: btree_gin
      - name: pg_trgm
      - name: intagg
      - name: intarray

    # - hba - #
    pg_offline_query: false                       # set to true to enable offline query on instance
    pg_reload: true                               # reload postgres after hba changes
    pg_hba_rules:                                 # postgres host-based authentication rules
      - title: allow meta node password access
        role: common
        rules:
          - host    all     all                         10.10.10.10/32      md5

      - title: allow intranet admin password access
        role: common
        rules:
          - host    all     +dbrole_admin               10.0.0.0/8          md5
          - host    all     +dbrole_admin               172.16.0.0/12       md5
          - host    all     +dbrole_admin               192.168.0.0/16      md5

      - title: allow intranet password access
        role: common
        rules:
          - host    all             all                 10.0.0.0/8          md5
          - host    all             all                 172.16.0.0/12       md5
          - host    all             all                 192.168.0.0/16      md5

      - title: allow local read/write (local production user via pgbouncer)
        role: common
        rules:
          - local   all     +dbrole_readonly                                md5
          - host    all     +dbrole_readonly           127.0.0.1/32         md5

      - title: allow offline query (ETL,SAGA,Interactive) on offline instance
        role: offline
        rules:
          - host    all     +dbrole_offline               10.0.0.0/8        md5
          - host    all     +dbrole_offline               172.16.0.0/12     md5
          - host    all     +dbrole_offline               192.168.0.0/16    md5

    pg_hba_rules_extra: []                        # extra hba rules (for cluster/instance overwrite)

    pgbouncer_hba_rules:                          # pgbouncer host-based authentication rules
      - title: local password access
        role: common
        rules:
          - local  all          all                                     md5
          - host   all          all                     127.0.0.1/32    md5

      - title: intranet password access
        role: common
        rules:
          - host   all          all                     10.0.0.0/8      md5
          - host   all          all                     172.16.0.0/12   md5
          - host   all          all                     192.168.0.0/16  md5

    pgbouncer_hba_rules_extra: []                 # extra pgbouncer hba rules (for cluster/instance overwrite)


    #------------------------------------------------------------------------------
    # MONITOR PROVISION
    #------------------------------------------------------------------------------
    # - install - #
    exporter_install: none                        # none|yum|binary, none by default
    exporter_repo_url: ''                         # if set, repo will be added to /etc/yum.repos.d/ before yum installation

    # - collect - #
    exporter_metrics_path: /metrics               # default metric path for pg related exporter

    # - node exporter - #
    node_exporter_enabled: true                   # setup node_exporter on instance
    node_exporter_port: 9100                      # default port for node exporter
    node_exporter_options: '--no-collector.softnet --collector.systemd --collector.ntp --collector.tcpstat --collector.processes'

    # - pg exporter - #
    pg_exporter_config: pg_exporter-demo.yaml     # default config files for pg_exporter
    pg_exporter_enabled: true                     # setup pg_exporter on instance
    pg_exporter_port: 9630                        # default port for pg exporter
    pg_exporter_url: ''                           # optional, if not set, generate from reference parameters

    # - pgbouncer exporter - #
    pgbouncer_exporter_enabled: true              # setup pgbouncer_exporter on instance (if you don't have pgbouncer, disable it)
    pgbouncer_exporter_port: 9631                 # default port for pgbouncer exporter
    pgbouncer_exporter_url: ''                    # optional, if not set, generate from reference parameters


    #------------------------------------------------------------------------------
    # SERVICE PROVISION
    #------------------------------------------------------------------------------
    pg_weight: 100              # default load balance weight (instance level)

    # - service - #
    pg_services:                                  # how to expose postgres service in cluster?
      # primary service will route {ip|name}:5433 to primary pgbouncer (5433->6432 rw)
      - name: primary           # service name {{ pg_cluster }}_primary
        src_ip: "*"
        src_port: 5433
        dst_port: pgbouncer     # 5433 route to pgbouncer
        check_url: /primary     # primary health check, success when instance is primary
        selector: "[]"          # select all instance as primary service candidate

      # replica service will route {ip|name}:5434 to replica pgbouncer (5434->6432 ro)
      - name: replica           # service name {{ pg_cluster }}_replica
        src_ip: "*"
        src_port: 5434
        dst_port: pgbouncer
        check_url: /read-only   # read-only health check. (including primary)
        selector: "[]"          # select all instance as replica service candidate
        selector_backup: "[? pg_role == `primary`]"   # primary are used as backup server in replica service

      # default service will route {ip|name}:5436 to primary postgres (5436->5432 primary)
      - name: default           # service's actual name is {{ pg_cluster }}-{{ service.name }}
        src_ip: "*"             # service bind ip address, * for all, vip for cluster virtual ip address
        src_port: 5436          # bind port, mandatory
        dst_port: postgres      # target port: postgres|pgbouncer|port_number , pgbouncer(6432) by default
        check_method: http      # health check method: only http is available for now
        check_port: patroni     # health check port:  patroni|pg_exporter|port_number , patroni by default
        check_url: /primary     # health check url path, / as default
        check_code: 200         # health check http code, 200 as default
        selector: "[]"          # instance selector
        haproxy:                # haproxy specific fields
          maxconn: 3000         # default front-end connection
          balance: roundrobin   # load balance algorithm (roundrobin by default)
          default_server_options: 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'

      # offline service will route {ip|name}:5438 to offline postgres (5438->5432 offline)
      - name: offline           # service name {{ pg_cluster }}_replica
        src_ip: "*"
        src_port: 5438
        dst_port: postgres
        check_url: /replica     # offline MUST be a replica
        selector: "[? pg_role == `offline` || pg_offline_query ]"         # instances with pg_role == 'offline' or instance marked with 'pg_offline_query == true'
        selector_backup: "[? pg_role == `replica` && !pg_offline_query]"  # replica are used as backup server in offline service

    pg_services_extra: []        # extra services to be added

    # - haproxy - #
    haproxy_enabled: true                         # enable haproxy among every cluster members
    haproxy_reload: true                          # reload haproxy after config
    haproxy_policy: roundrobin                    # roundrobin, leastconn
    haproxy_admin_auth_enabled: false             # enable authentication for haproxy admin?
    haproxy_admin_username: admin                 # default haproxy admin username
    haproxy_admin_password: admin                 # default haproxy admin password
    haproxy_exporter_port: 9101                   # default admin/exporter port
    haproxy_client_timeout: 3h                    # client side connection timeout
    haproxy_server_timeout: 3h                    # server side connection timeout

    # - vip - #
    vip_mode: none                                # none | l2 | l4
    vip_reload: true                              # whether reload service after config
    # vip_address: 127.0.0.1                      # virtual ip address ip (l2 or l4)
    # vip_cidrmask: 24                            # virtual ip address cidr mask (l2 only)
    # vip_interface: eth0                         # virtual ip network interface (l2 only)

...

2 - Kernel Optimize

Pigsty parameter tuning for the OS kernel

Pigsty使用tuned调整操作系统配置,tuned是CentOS7自带的调参工具。

Pigsty Tuned配置

Pigsty默认会为操作系统安装四种tuned profile

  • OLTP:针对常规业务库,优化延迟
  • OLAP:针对分析库,优化吞吐量
  • CRIT:针对核心业务库,优化RPO
  • TINY:针对微型实例与虚拟机
tuned-adm profile oltp    # 启用OLTP模式
tuned-adm profile olap    # 启用OLAP模式
tuned-adm profile crit    # 启用CRIT模式
tuned-adm profile tiny    # 启用TINY模式

Tuned基本操作

# 如需启动 tuned,请以 root 身份运行下列指令:
systemctl start tuned

# 若要在每次计算机启动时激活 tuned,请输入以下指令:
systemctl enable tuned

# 其它的 tuned 控制,例如配置文件选择等,请使用:
tuned-adm

# 若要查看可用的已安装配置文件,此命令需要 tuned 服务正在运行。
tuned-adm list

# 若要查看目前已激活的配置文件,请运行:
tuned-adm active

# 若要选择或激活某一配置文件,请运行:
tuned-adm profile profile
# 例如
tuned-adm profile powersave

# 若要让 tuned 推荐最适合您的系统的配置文件,同时不改变任何现有的配置文件,也不使用安装期间使用过的逻辑,请运行以下指令:
tuned-adm recommend

# 要禁用所有微调:
tuned-adm off

要列出所有可用配置文件并识别目前激活的配置文件,请运行:
tuned-adm list
要只显示当前激活的配置文件请运行:
tuned-adm active
要切换到某个可用的配置文件请运行:
tuned-adm profile profile_name
例如:
tuned-adm profile server-powersave

OLTP配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to oltp mode
# Path      :   /etc/tuned/oltp/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLTP System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

# vm.dirty_background_bytes=67108864 # 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=10                 # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

OLAP配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-09-18
# Desc      :   Tune operatiing system to olap mode
# Path      :   /etc/tuned/olap/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLAP System
include=network-throughput

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
# vm.swappiness=10

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

vm.dirty_background_ratio = 10    # throughput-performance default
vm.dirty_ratio=80                 # throughput-performance default 40 -> 80

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

CRIT配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to crit mode
# Path      :   /etc/tuned/crit/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL CRIT System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=100

# 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_bytes=67108864
# vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=6                    # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

TINY配置

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to tiny mode
# Path      :   /etc/tuned/tiny/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL TINY System
# include=virtual-guest

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up.  Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40

# Filesystem I/O is usually much more efficient than swapping, so try to keep
# swapping low.  It's usually safe to go even lower than this on systems with
# server-grade storage.
vm.swappiness = 30

#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

数据库内核调优参考

# Database kernel optimisation
fs.aio-max-nr = 1048576 # 限制并发未完成的异步请求数目,,不应小于1M
fs.file-max = 16777216  # 最大打开16M个文件

# kernel
kernel.shmmax = 485058		# 共享内存最大页面数量: $(expr $(getconf _PHYS_PAGES) / 2)
kernel.shmall = 1986797568 	# 共享内存总大小: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
kernel.shmmni = 16384 		# 系统范围内共享内存段的最大数量 4096 -> 16384
kernel.msgmni = 32768		# 系统的消息队列数目,影响可以启动的代理程序数 设为内存MB数
kernel.msgmnb = 65536		# 影响队列的大小
kernel.msgmax = 65536		# 影响队列中可以发送的消息的大小
kernel.numa_balancing = 0   # Numa禁用
kernel.sched_migration_cost_ns = 5000000 # 5ms内,调度认为进程还是Hot的。
kernel.sem = 2048 134217728 2048 65536   # 每个信号集最大信号量2048,系统总共可用信号量134217728,单次最大操作2048,信号集总数65536

# vm
vm.dirty_ratio = 80                       # 绝对限制,超过80%阻塞写请求刷盘
vm.dirty_background_bytes = 268435456     # 256MB脏数据唤醒刷盘进程
vm.dirty_expire_centisecs = 6000          # 1分钟前的数据被认为需要刷盘
vm.dirty_writeback_centisecs= 500         # 刷新进程运行间隔5秒
vm.mmap_min_addr = 65536                  # 禁止访问0x10000下的内存
vm.zone_reclaim_mode = 0                  # Numa禁用

# vm swap
vm.swappiness = 0                         # 禁用SWAP,但高水位仍会有
vm.overcommit_memory = 2                  # 允许一定程度的Overcommit
vm.overcommit_ratio = 50                  # 允许的Overcommit:$((($mem - $swap) * 100 / $mem))

# tcp memory
net.ipv4.tcp_rmem = 8192 65536 16777216		# tcp读buffer: 32M/256M/16G
net.ipv4.tcp_wmem = 8192 65536 16777216		# tcp写buffer: 32M/256M/16G
net.ipv4.tcp_mem = 131072 262144 16777216	# tcp 内存使用 512M/1G/16G
net.core.rmem_default = 262144      		# 接受缓冲区默认大小: 256K
net.core.rmem_max = 4194304         		# 接受缓冲区最大大小: 4M
net.core.wmem_default = 262144      		# 发送缓冲区默认大小: 256K
net.core.wmem_max = 4194304         		# 发送缓冲区最大大小: 4M
# tcp keepalive
net.ipv4.tcp_keepalive_intvl = 20	# 探测没有确认时,重新发送探测的频度。默认75s -> 20s
net.ipv4.tcp_keepalive_probes = 3	# 3 * 20 = 1分钟超时断开
net.ipv4.tcp_keepalive_time = 60	# 探活周期1分钟
# tcp port resure
net.ipv4.tcp_tw_reuse = 1           # 允许将TIME_WAIT socket用于新的TCP连接。默认为0
net.ipv4.tcp_tw_recycle = 0			# 快速回收,已弃用
net.ipv4.tcp_fin_timeout = 5        # 保持在FIN-WAIT-2状态的秒时间
net.ipv4.tcp_timestamps = 1
# tcp anti-flood
net.ipv4.tcp_syncookies = 1			# SYN_RECV队列满后发cookie,防止恶意攻击
net.ipv4.tcp_synack_retries = 1		# 收到不完整sync后的重试次数 5->2
net.ipv4.tcp_syn_retries = 1         #表示在内核放弃建立连接之前发送SYN包的数量。
# tcp load-balancer
net.ipv4.ip_forward = 1						# IP转发
net.ipv4.ip_nonlocal_bind = 1				# 绑定非本机地址
net.netfilter.nf_conntrack_max = 1048576	# 最大跟踪连接数
net.ipv4.ip_local_port_range = 10000 65535	# 端口范围
net.ipv4.tcp_max_tw_buckets = 262144		# 256k  TIME_WAIT
net.core.somaxconn = 65535          		# 限制LISTEN队列最大数据包量,触发重传机制。
net.ipv4.tcp_max_syn_backlog = 8192 		# SYN队列大小:1024->8192
net.core.netdev_max_backlog = 8192			# 网卡收包快于内核时,允许队列长度

3 - Metrics List

Available metrics list in Pigsty

Below is the list of monitoring indicators currently available for Pigsty.

For rules on defining derivative indicators, please refer to Derivative Metrics.

监控指标列表

name
go_gc_duration_seconds
go_gc_duration_seconds_count
go_gc_duration_seconds_sum
go_goroutines
go_info
go_memstats_alloc_bytes
go_memstats_alloc_bytes_total
go_memstats_buck_hash_sys_bytes
go_memstats_frees_total
go_memstats_gc_cpu_fraction
go_memstats_gc_sys_bytes
go_memstats_heap_alloc_bytes
go_memstats_heap_idle_bytes
go_memstats_heap_inuse_bytes
go_memstats_heap_objects
go_memstats_heap_released_bytes
go_memstats_heap_sys_bytes
go_memstats_last_gc_time_seconds
go_memstats_lookups_total
go_memstats_mallocs_total
go_memstats_mcache_inuse_bytes
go_memstats_mcache_sys_bytes
go_memstats_mspan_inuse_bytes
go_memstats_mspan_sys_bytes
go_memstats_next_gc_bytes
go_memstats_other_sys_bytes
go_memstats_stack_inuse_bytes
go_memstats_stack_sys_bytes
go_memstats_sys_bytes
go_threads
haproxy_backend_active_servers
haproxy_backend_backup_servers
haproxy_backend_bytes_in_total
haproxy_backend_bytes_out_total
haproxy_backend_check_last_change_seconds
haproxy_backend_check_up_down_total
haproxy_backend_client_aborts_total
haproxy_backend_connect_time_average_seconds
haproxy_backend_connection_attempts_total
haproxy_backend_connection_errors_total
haproxy_backend_connection_reuses_total
haproxy_backend_current_queue
haproxy_backend_current_sessions
haproxy_backend_downtime_seconds_total
haproxy_backend_failed_header_rewriting_total
haproxy_backend_http_cache_hits_total
haproxy_backend_http_cache_lookups_total
haproxy_backend_http_comp_bytes_bypassed_total
haproxy_backend_http_comp_bytes_in_total
haproxy_backend_http_comp_bytes_out_total
haproxy_backend_http_comp_responses_total
haproxy_backend_http_requests_total
haproxy_backend_http_responses_total
haproxy_backend_internal_errors_total
haproxy_backend_last_session_seconds
haproxy_backend_limit_sessions
haproxy_backend_loadbalanced_total
haproxy_backend_max_connect_time_seconds
haproxy_backend_max_queue
haproxy_backend_max_queue_time_seconds
haproxy_backend_max_response_time_seconds
haproxy_backend_max_session_rate
haproxy_backend_max_sessions
haproxy_backend_max_total_time_seconds
haproxy_backend_queue_time_average_seconds
haproxy_backend_redispatch_warnings_total
haproxy_backend_requests_denied_total
haproxy_backend_response_errors_total
haproxy_backend_response_time_average_seconds
haproxy_backend_responses_denied_total
haproxy_backend_retry_warnings_total
haproxy_backend_server_aborts_total
haproxy_backend_sessions_total
haproxy_backend_status
haproxy_backend_total_time_average_seconds
haproxy_backend_weight
haproxy_frontend_bytes_in_total
haproxy_frontend_bytes_out_total
haproxy_frontend_connections_rate_max
haproxy_frontend_connections_total
haproxy_frontend_current_sessions
haproxy_frontend_denied_connections_total
haproxy_frontend_denied_sessions_total
haproxy_frontend_failed_header_rewriting_total
haproxy_frontend_http_cache_hits_total
haproxy_frontend_http_cache_lookups_total
haproxy_frontend_http_comp_bytes_bypassed_total
haproxy_frontend_http_comp_bytes_in_total
haproxy_frontend_http_comp_bytes_out_total
haproxy_frontend_http_comp_responses_total
haproxy_frontend_http_requests_rate_max
haproxy_frontend_http_requests_total
haproxy_frontend_http_responses_total
haproxy_frontend_intercepted_requests_total
haproxy_frontend_internal_errors_total
haproxy_frontend_limit_session_rate
haproxy_frontend_limit_sessions
haproxy_frontend_max_session_rate
haproxy_frontend_max_sessions
haproxy_frontend_request_errors_total
haproxy_frontend_requests_denied_total
haproxy_frontend_responses_denied_total
haproxy_frontend_sessions_total
haproxy_frontend_status
haproxy_process_active_peers
haproxy_process_busy_polling_enabled
haproxy_process_connected_peers
haproxy_process_connections_total
haproxy_process_current_backend_ssl_key_rate
haproxy_process_current_connection_rate
haproxy_process_current_connections
haproxy_process_current_frontend_ssl_key_rate
haproxy_process_current_run_queue
haproxy_process_current_session_rate
haproxy_process_current_ssl_connections
haproxy_process_current_ssl_rate
haproxy_process_current_tasks
haproxy_process_current_zlib_memory
haproxy_process_dropped_logs_total
haproxy_process_frontent_ssl_reuse
haproxy_process_hard_max_connections
haproxy_process_http_comp_bytes_in_total
haproxy_process_http_comp_bytes_out_total
haproxy_process_idle_time_percent
haproxy_process_jobs
haproxy_process_limit_connection_rate
haproxy_process_limit_http_comp
haproxy_process_limit_session_rate
haproxy_process_limit_ssl_rate
haproxy_process_listeners
haproxy_process_max_backend_ssl_key_rate
haproxy_process_max_connection_rate
haproxy_process_max_connections
haproxy_process_max_fds
haproxy_process_max_frontend_ssl_key_rate
haproxy_process_max_memory_bytes
haproxy_process_max_pipes
haproxy_process_max_session_rate
haproxy_process_max_sockets
haproxy_process_max_ssl_connections
haproxy_process_max_ssl_rate
haproxy_process_max_zlib_memory
haproxy_process_nbproc
haproxy_process_nbthread
haproxy_process_pipes_free_total
haproxy_process_pipes_used_total
haproxy_process_pool_allocated_bytes
haproxy_process_pool_failures_total
haproxy_process_pool_used_bytes
haproxy_process_relative_process_id
haproxy_process_requests_total
haproxy_process_ssl_cache_lookups_total
haproxy_process_ssl_cache_misses_total
haproxy_process_ssl_connections_total
haproxy_process_start_time_seconds
haproxy_process_stopping
haproxy_process_unstoppable_jobs
haproxy_server_bytes_in_total
haproxy_server_bytes_out_total
haproxy_server_check_code
haproxy_server_check_duration_seconds
haproxy_server_check_failures_total
haproxy_server_check_last_change_seconds
haproxy_server_check_status
haproxy_server_check_up_down_total
haproxy_server_client_aborts_total
haproxy_server_connect_time_average_seconds
haproxy_server_connection_attempts_total
haproxy_server_connection_errors_total
haproxy_server_connection_reuses_total
haproxy_server_current_queue
haproxy_server_current_sessions
haproxy_server_current_throttle
haproxy_server_downtime_seconds_total
haproxy_server_failed_header_rewriting_total
haproxy_server_internal_errors_total
haproxy_server_last_session_seconds
haproxy_server_limit_sessions
haproxy_server_loadbalanced_total
haproxy_server_max_connect_time_seconds
haproxy_server_max_queue
haproxy_server_max_queue_time_seconds
haproxy_server_max_response_time_seconds
haproxy_server_max_session_rate
haproxy_server_max_sessions
haproxy_server_max_total_time_seconds
haproxy_server_queue_limit
haproxy_server_queue_time_average_seconds
haproxy_server_redispatch_warnings_total
haproxy_server_response_errors_total
haproxy_server_response_time_average_seconds
haproxy_server_responses_denied_total
haproxy_server_retry_warnings_total
haproxy_server_server_aborts_total
haproxy_server_server_idle_connections_current
haproxy_server_server_idle_connections_limit
haproxy_server_sessions_total
haproxy_server_status
haproxy_server_total_time_average_seconds
haproxy_server_weight
node:cls:cpu_count
node:cls:cpu_mode
node:cls:cpu_usage
node:cls:cpu_usage_avg5m
node:cls:disk_io_rate
node:cls:disk_iops
node:cls:disk_read_iops
node:cls:disk_read_rate
node:cls:disk_write_iops
node:cls:disk_write_rate
node:cls:mem_usage
node:cls:network_io
node:cls:network_rx
node:cls:network_tx
node:cls:ntp_offset_range
node:cls:sched_timeslicesa
node:cpu:cpu_mode
node:cpu:cpu_usage
node:cpu:cpu_usage_avg5m
node:cpu:sched_timeslices
node:dev:disk_io_rate
node:dev:disk_iops
node:dev:disk_read_iops
node:dev:disk_read_rate
node:dev:disk_read_rt
node:dev:disk_read_time
node:dev:disk_write_iops
node:dev:disk_write_rate
node:dev:disk_write_rt
node:dev:disk_write_time
node:dev:network_io_rate
node:dev:network_rx
node:dev:network_tx
node:fs:avail_bytes
node:fs:free_bytes
node:fs:free_inode
node:fs:inode_usage
node:fs:size_bytes
node:fs:space_deriv_1h
node:fs:space_exhaust
node:fs:space_usage
node:fs:total_inode
node:ins:cpu_count
node:ins:cpu_mode
node:ins:cpu_usage
node:ins:cpu_usage_avg5m
node:ins:ctx_switch
node:ins:disk_io_rate
node:ins:disk_iops
node:ins:disk_read_iops
node:ins:disk_read_rate
node:ins:disk_write_iops
node:ins:disk_write_rate
node:ins:fd_usage
node:ins:forks
node:ins:intrrupt
node:ins:mem_app
node:ins:mem_free
node:ins:mem_usage
node:ins:network_io
node:ins:network_rx
node:ins:network_tx
node:ins:pagefault
node:ins:pagein
node:ins:pageout
node:ins:sched_timeslices
node:ins:stdload1
node:ins:stdload15
node:ins:stdload5
node:ins:swap_usage
node:ins:swapin
node:ins:swapout
node:ins:tcp_active_opens
node:ins:tcp_dropped
node:ins:tcp_insegs
node:ins:tcp_outsegs
node:ins:tcp_overflow
node:ins:tcp_overflow_rate
node:ins:tcp_passive_opens
node:ins:tcp_retrans_rate
node:ins:tcp_retranssegs
node:ins:tcp_segs
node:uptime
node_arp_entries
node_boot_time_seconds
node_context_switches_total
node_cooling_device_cur_state
node_cooling_device_max_state
node_cpu_guest_seconds_total
node_cpu_seconds_total
node_disk_io_now
node_disk_io_time_seconds_total
node_disk_io_time_weighted_seconds_total
node_disk_read_bytes_total
node_disk_read_time_seconds_total
node_disk_reads_completed_total
node_disk_reads_merged_total
node_disk_write_time_seconds_total
node_disk_writes_completed_total
node_disk_writes_merged_total
node_disk_written_bytes_total
node_entropy_available_bits
node_exporter_build_info
node_filefd_allocated
node_filefd_maximum
node_filesystem_avail_bytes
node_filesystem_device_error
node_filesystem_files
node_filesystem_files_free
node_filesystem_free_bytes
node_filesystem_readonly
node_filesystem_size_bytes
node_forks_total
node_intr_total
node_ipvs_connections_total
node_ipvs_incoming_bytes_total
node_ipvs_incoming_packets_total
node_ipvs_outgoing_bytes_total
node_ipvs_outgoing_packets_total
node_load1
node_load15
node_load5
node_memory_Active_anon_bytes
node_memory_Active_bytes
node_memory_Active_file_bytes
node_memory_AnonHugePages_bytes
node_memory_AnonPages_bytes
node_memory_Bounce_bytes
node_memory_Buffers_bytes
node_memory_Cached_bytes
node_memory_CmaFree_bytes
node_memory_CmaTotal_bytes
node_memory_CommitLimit_bytes
node_memory_Committed_AS_bytes
node_memory_DirectMap2M_bytes
node_memory_DirectMap4k_bytes
node_memory_Dirty_bytes
node_memory_HardwareCorrupted_bytes
node_memory_HugePages_Free
node_memory_HugePages_Rsvd
node_memory_HugePages_Surp
node_memory_HugePages_Total
node_memory_Hugepagesize_bytes
node_memory_Inactive_anon_bytes
node_memory_Inactive_bytes
node_memory_Inactive_file_bytes
node_memory_KernelStack_bytes
node_memory_Mapped_bytes
node_memory_MemAvailable_bytes
node_memory_MemFree_bytes
node_memory_MemTotal_bytes
node_memory_Mlocked_bytes
node_memory_NFS_Unstable_bytes
node_memory_PageTables_bytes
node_memory_Percpu_bytes
node_memory_SReclaimable_bytes
node_memory_SUnreclaim_bytes
node_memory_Shmem_bytes
node_memory_Slab_bytes
node_memory_SwapCached_bytes
node_memory_SwapFree_bytes
node_memory_SwapTotal_bytes
node_memory_Unevictable_bytes
node_memory_VmallocChunk_bytes
node_memory_VmallocTotal_bytes
node_memory_VmallocUsed_bytes
node_memory_WritebackTmp_bytes
node_memory_Writeback_bytes
node_netstat_Icmp6_InErrors
node_netstat_Icmp6_InMsgs
node_netstat_Icmp6_OutMsgs
node_netstat_Icmp_InErrors
node_netstat_Icmp_InMsgs
node_netstat_Icmp_OutMsgs
node_netstat_Ip6_InOctets
node_netstat_Ip6_OutOctets
node_netstat_IpExt_InOctets
node_netstat_IpExt_OutOctets
node_netstat_Ip_Forwarding
node_netstat_TcpExt_ListenDrops
node_netstat_TcpExt_ListenOverflows
node_netstat_TcpExt_SyncookiesFailed
node_netstat_TcpExt_SyncookiesRecv
node_netstat_TcpExt_SyncookiesSent
node_netstat_TcpExt_TCPSynRetrans
node_netstat_Tcp_ActiveOpens
node_netstat_Tcp_CurrEstab
node_netstat_Tcp_InErrs
node_netstat_Tcp_InSegs
node_netstat_Tcp_OutSegs
node_netstat_Tcp_PassiveOpens
node_netstat_Tcp_RetransSegs
node_netstat_Udp6_InDatagrams
node_netstat_Udp6_InErrors
node_netstat_Udp6_NoPorts
node_netstat_Udp6_OutDatagrams
node_netstat_Udp6_RcvbufErrors
node_netstat_Udp6_SndbufErrors
node_netstat_UdpLite6_InErrors
node_netstat_UdpLite_InErrors
node_netstat_Udp_InDatagrams
node_netstat_Udp_InErrors
node_netstat_Udp_NoPorts
node_netstat_Udp_OutDatagrams
node_netstat_Udp_RcvbufErrors
node_netstat_Udp_SndbufErrors
node_network_address_assign_type
node_network_carrier
node_network_carrier_changes_total
node_network_device_id
node_network_dormant
node_network_flags
node_network_iface_id
node_network_iface_link
node_network_iface_link_mode
node_network_info
node_network_mtu_bytes
node_network_net_dev_group
node_network_protocol_type
node_network_receive_bytes_total
node_network_receive_compressed_total
node_network_receive_drop_total
node_network_receive_errs_total
node_network_receive_fifo_total
node_network_receive_frame_total
node_network_receive_multicast_total
node_network_receive_packets_total
node_network_transmit_bytes_total
node_network_transmit_carrier_total
node_network_transmit_colls_total
node_network_transmit_compressed_total
node_network_transmit_drop_total
node_network_transmit_errs_total
node_network_transmit_fifo_total
node_network_transmit_packets_total
node_network_transmit_queue_length
node_network_up
node_nf_conntrack_entries
node_nf_conntrack_entries_limit
node_ntp_leap
node_ntp_offset_seconds
node_ntp_reference_timestamp_seconds
node_ntp_root_delay_seconds
node_ntp_root_dispersion_seconds
node_ntp_rtt_seconds
node_ntp_sanity
node_ntp_stratum
node_power_supply_capacity
node_power_supply_cyclecount
node_power_supply_energy_full
node_power_supply_energy_full_design
node_power_supply_energy_watthour
node_power_supply_info
node_power_supply_online
node_power_supply_power_watt
node_power_supply_present
node_power_supply_voltage_min_design
node_power_supply_voltage_volt
node_processes_max_processes
node_processes_max_threads
node_processes_pids
node_processes_state
node_processes_threads
node_procs_blocked
node_procs_running
node_schedstat_running_seconds_total
node_schedstat_timeslices_total
node_schedstat_waiting_seconds_total
node_scrape_collector_duration_seconds
node_scrape_collector_success
node_sockstat_FRAG6_inuse
node_sockstat_FRAG6_memory
node_sockstat_FRAG_inuse
node_sockstat_FRAG_memory
node_sockstat_RAW6_inuse
node_sockstat_RAW_inuse
node_sockstat_TCP6_inuse
node_sockstat_TCP_alloc
node_sockstat_TCP_inuse
node_sockstat_TCP_mem
node_sockstat_TCP_mem_bytes
node_sockstat_TCP_orphan
node_sockstat_TCP_tw
node_sockstat_UDP6_inuse
node_sockstat_UDPLITE6_inuse
node_sockstat_UDPLITE_inuse
node_sockstat_UDP_inuse
node_sockstat_UDP_mem
node_sockstat_UDP_mem_bytes
node_sockstat_sockets_used
node_systemd_socket_accepted_connections_total
node_systemd_socket_current_connections
node_systemd_system_running
node_systemd_timer_last_trigger_seconds
node_systemd_unit_state
node_systemd_units
node_systemd_version
node_tcp_connection_states
node_textfile_scrape_error
node_time_seconds
node_timex_estimated_error_seconds
node_timex_frequency_adjustment_ratio
node_timex_loop_time_constant
node_timex_maxerror_seconds
node_timex_offset_seconds
node_timex_pps_calibration_total
node_timex_pps_error_total
node_timex_pps_frequency_hertz
node_timex_pps_jitter_seconds
node_timex_pps_jitter_total
node_timex_pps_shift_seconds
node_timex_pps_stability_exceeded_total
node_timex_pps_stability_hertz
node_timex_status
node_timex_sync_status
node_timex_tai_offset_seconds
node_timex_tick_seconds
node_udp_queues
node_uname_info
node_vmstat_pgfault
node_vmstat_pgmajfault
node_vmstat_pgpgin
node_vmstat_pgpgout
node_vmstat_pswpin
node_vmstat_pswpout
node_xfs_allocation_btree_compares_total
node_xfs_allocation_btree_lookups_total
node_xfs_allocation_btree_records_deleted_total
node_xfs_allocation_btree_records_inserted_total
node_xfs_block_map_btree_compares_total
node_xfs_block_map_btree_lookups_total
node_xfs_block_map_btree_records_deleted_total
node_xfs_block_map_btree_records_inserted_total
node_xfs_block_mapping_extent_list_compares_total
node_xfs_block_mapping_extent_list_deletions_total
node_xfs_block_mapping_extent_list_insertions_total
node_xfs_block_mapping_extent_list_lookups_total
node_xfs_block_mapping_reads_total
node_xfs_block_mapping_unmaps_total
node_xfs_block_mapping_writes_total
node_xfs_directory_operation_create_total
node_xfs_directory_operation_getdents_total
node_xfs_directory_operation_lookup_total
node_xfs_directory_operation_remove_total
node_xfs_extent_allocation_blocks_allocated_total
node_xfs_extent_allocation_blocks_freed_total
node_xfs_extent_allocation_extents_allocated_total
node_xfs_extent_allocation_extents_freed_total
node_xfs_read_calls_total
node_xfs_vnode_active_total
node_xfs_vnode_allocate_total
node_xfs_vnode_get_total
node_xfs_vnode_hold_total
node_xfs_vnode_reclaim_total
node_xfs_vnode_release_total
node_xfs_vnode_remove_total
node_xfs_write_calls_total
pg:all:active_backends
pg:all:age
pg:all:backends
pg:all:buf_alloc
pg:all:buf_flush
pg:all:commits
pg:all:commits_realtime
pg:all:ixact_backends
pg:all:lag_bytes
pg:all:lag_seconds
pg:all:qps_realtime
pg:all:rollbacks
pg:all:rollbacks_realtime
pg:all:sessions
pg:all:tps_realtime
pg:all:tup_deleted
pg:all:tup_inserted
pg:all:tup_modified
pg:all:tup_selected
pg:all:tup_touched
pg:all:tup_updated
pg:all:wal_rate
pg:all:xacts
pg:all:xacts_avg30m
pg:all:xacts_mu
pg:all:xacts_realtime
pg:all:xacts_sigma
pg:cls:active_backends
pg:cls:age
pg:cls:backends
pg:cls:buf_alloc
pg:cls:buf_flush
pg:cls:ckpt_1h
pg:cls:commits
pg:cls:commits_realtime
pg:cls:ixact_backends
pg:cls:lag_bytes
pg:cls:lag_seconds
pg:cls:leader
pg:cls:load0
pg:cls:load1
pg:cls:load15
pg:cls:load5
pg:cls:lock_count
pg:cls:locks
pg:cls:primarys
pg:cls:qps_realtime
pg:cls:replicas
pg:cls:rlock
pg:cls:rollbacks
pg:cls:rollbacks_realtime
pg:cls:saturation0
pg:cls:saturation1
pg:cls:saturation15
pg:cls:saturation5
pg:cls:sessions
pg:cls:size
pg:cls:synchronous
pg:cls:temp_bytes
pg:cls:temp_files
pg:cls:timeline
pg:cls:tps_realtime
pg:cls:tup_deleted
pg:cls:tup_inserted
pg:cls:tup_modified
pg:cls:tup_selected
pg:cls:tup_touched
pg:cls:tup_updated
pg:cls:wal_rate
pg:cls:wlock
pg:cls:xacts
pg:cls:xacts_avg30m
pg:cls:xacts_mu
pg:cls:xacts_realtime
pg:cls:xacts_sigma
pg:cls:xlock
pg:db:age_deriv_1h
pg:db:age_exhaust
pg:db:backends
pg:db:blks_access_1m
pg:db:blks_hit_1m
pg:db:blks_read_1m
pg:db:buffer_hit_rate
pg:db:commits
pg:db:commits_realtime
pg:db:io_time_usage
pg:db:lock_count
pg:db:locks
pg:db:pool_current_conn
pg:db:pool_disabled
pg:db:pool_max_conn
pg:db:pool_paused
pg:db:pool_reserve_size
pg:db:pool_size
pg:db:qps_realtime
pg:db:read_time_usage
pg:db:rlock
pg:db:rollbacks
pg:db:rollbacks_realtime
pg:db:sessions
pg:db:temp_bytes
pg:db:temp_files
pg:db:tps_realtime
pg:db:tup_deleted
pg:db:tup_inserted
pg:db:tup_modified
pg:db:tup_selected
pg:db:tup_touched
pg:db:tup_updated
pg:db:wlock
pg:db:write_time_usage
pg:db:xacts
pg:db:xacts_avg30m
pg:db:xacts_mu
pg:db:xacts_realtime
pg:db:xacts_sigma
pg:db:xlock
pg:ins:active_backends
pg:ins:age
pg:ins:backends
pg:ins:buf_alloc
pg:ins:buf_flush
pg:ins:buf_flush_backend
pg:ins:buf_flush_checkpoint
pg:ins:checkpoint_lsn
pg:ins:ckpt_req
pg:ins:ckpt_timed
pg:ins:commits
pg:ins:commits_realtime
pg:ins:free_clients
pg:ins:free_servers
pg:ins:hit_rate
pg:ins:ixact_backends
pg:ins:lag_bytes
pg:ins:lag_seconds
pg:ins:last_ckpt
pg:ins:load0
pg:ins:load1
pg:ins:load15
pg:ins:load5
pg:ins:lock_count
pg:ins:locks
pg:ins:login_clients
pg:ins:pool_databases
pg:ins:pool_users
pg:ins:pools
pg:ins:qps_realtime
pg:ins:query_rt
pg:ins:query_rt_avg30m
pg:ins:query_rt_mu
pg:ins:query_rt_sigma
pg:ins:query_time_rate15m
pg:ins:query_time_rate1m
pg:ins:query_time_rate5m
pg:ins:recv_init_lsn
pg:ins:recv_init_tli
pg:ins:recv_last_lsn
pg:ins:recv_last_tli
pg:ins:redo_lsn
pg:ins:rlock
pg:ins:rollbacks
pg:ins:rollbacks_realtime
pg:ins:saturation0
pg:ins:saturation1
pg:ins:saturation15
pg:ins:saturation5
pg:ins:sessions
pg:ins:slot_retained_bytes
pg:ins:temp_bytes
pg:ins:temp_files
pg:ins:tps_realtime
pg:ins:tup_deleted
pg:ins:tup_inserted
pg:ins:tup_modified
pg:ins:tup_selected
pg:ins:tup_touched
pg:ins:tup_updated
pg:ins:used_clients
pg:ins:wal_rate
pg:ins:wlock
pg:ins:xact_rt
pg:ins:xact_rt_avg30m
pg:ins:xact_rt_mu
pg:ins:xact_rt_sigma
pg:ins:xact_time_rate15m
pg:ins:xact_time_rate1m
pg:ins:xact_time_rate5m
pg:ins:xacts
pg:ins:xacts_avg30m
pg:ins:xacts_mu
pg:ins:xacts_realtime
pg:ins:xacts_sigma
pg:ins:xlock
pg:query:call
pg:query:rt
pg:svc:active_backends
pg:svc:backends
pg:svc:buf_alloc
pg:svc:buf_flush
pg:svc:commits
pg:svc:commits_realtime
pg:svc:ixact_backends
pg:svc:load0
pg:svc:load1
pg:svc:load15
pg:svc:load5
pg:svc:lock_count
pg:svc:locks
pg:svc:qps_realtime
pg:svc:query_rt
pg:svc:query_rt_avg30m
pg:svc:query_rt_mu
pg:svc:query_rt_sigma
pg:svc:rlock
pg:svc:rollbacks
pg:svc:rollbacks_realtime
pg:svc:sessions
pg:svc:temp_bytes
pg:svc:temp_files
pg:svc:tps_realtime
pg:svc:tup_deleted
pg:svc:tup_inserted
pg:svc:tup_modified
pg:svc:tup_selected
pg:svc:tup_touched
pg:svc:tup_updated
pg:svc:wlock
pg:svc:xact_rt
pg:svc:xact_rt_avg30m
pg:svc:xact_rt_mu
pg:svc:xact_rt_sigma
pg:svc:xacts
pg:svc:xacts_avg30m
pg:svc:xacts_mu
pg:svc:xacts_realtime
pg:svc:xacts_sigma
pg:svc:xlock
pg_activity_count
pg_activity_max_conn_duration
pg_activity_max_duration
pg_activity_max_tx_duration
pg_backend_count
pg_backup_time
pg_bgwriter_buffers_alloc
pg_bgwriter_buffers_backend
pg_bgwriter_buffers_backend_fsync
pg_bgwriter_buffers_checkpoint
pg_bgwriter_buffers_clean
pg_bgwriter_checkpoint_sync_time
pg_bgwriter_checkpoint_write_time
pg_bgwriter_checkpoints_req
pg_bgwriter_checkpoints_timed
pg_bgwriter_maxwritten_clean
pg_bgwriter_stats_reset
pg_boot_time
pg_checkpoint_checkpoint_lsn
pg_checkpoint_elapse
pg_checkpoint_full_page_writes
pg_checkpoint_newest_commit_ts_xid
pg_checkpoint_next_multi_offset
pg_checkpoint_next_multixact_id
pg_checkpoint_next_oid
pg_checkpoint_next_xid
pg_checkpoint_next_xid_epoch
pg_checkpoint_oldest_active_xid
pg_checkpoint_oldest_commit_ts_xid
pg_checkpoint_oldest_multi_dbid
pg_checkpoint_oldest_multi_xid
pg_checkpoint_oldest_xid
pg_checkpoint_oldest_xid_dbid
pg_checkpoint_prev_tli
pg_checkpoint_redo_lsn
pg_checkpoint_time
pg_checkpoint_tli
pg_class_relage
pg_class_relpages
pg_class_relsize
pg_class_reltuples
pg_conf_reload_time
pg_database_age
pg_database_allow_conn
pg_database_conn_limit
pg_database_frozen_xid
pg_database_is_template
pg_db_blk_read_time
pg_db_blk_write_time
pg_db_blks_access
pg_db_blks_hit
pg_db_blks_read
pg_db_checksum_failures
pg_db_checksum_last_failure
pg_db_confl_bufferpin
pg_db_confl_deadlock
pg_db_confl_lock
pg_db_confl_snapshot
pg_db_confl_tablespace
pg_db_conflicts
pg_db_deadlocks
pg_db_numbackends
pg_db_stats_reset
pg_db_temp_bytes
pg_db_temp_files
pg_db_tup_deleted
pg_db_tup_fetched
pg_db_tup_inserted
pg_db_tup_modified
pg_db_tup_returned
pg_db_tup_updated
pg_db_xact_commit
pg_db_xact_rollback
pg_db_xact_total
pg_downstream_count
pg_exporter_last_scrape_time
pg_exporter_query_cache_ttl
pg_exporter_query_scrape_duration
pg_exporter_query_scrape_error_count
pg_exporter_query_scrape_hit_count
pg_exporter_query_scrape_metric_count
pg_exporter_query_scrape_total_count
pg_exporter_scrape_duration
pg_exporter_scrape_error_count
pg_exporter_scrape_total_count
pg_exporter_server_scrape_duration
pg_exporter_server_scrape_total_count
pg_exporter_server_scrape_total_seconds
pg_exporter_up
pg_exporter_uptime
pg_flush_lsn
pg_func_calls
pg_func_self_time
pg_func_total_time
pg_in_recovery
pg_index_bloat_ratio
pg_index_bloat_size
pg_index_idx_blks_hit
pg_index_idx_blks_read
pg_index_idx_scan
pg_index_idx_tup_fetch
pg_index_idx_tup_read
pg_insert_lsn
pg_is_in_backup
pg_is_in_recovery
pg_is_primary
pg_is_replica
pg_is_wal_replay_paused
pg_lag
pg_last_replay_time
pg_lock_count
pg_lsn
pg_meta_info
pg_query_blk_io_time
pg_query_calls
pg_query_max_time
pg_query_mean_time
pg_query_min_time
pg_query_rows
pg_query_stddev_time
pg_query_total_time
pg_query_wal_bytes
pg_receive_lsn
pg_replay_lsn
pg_setting_block_size
pg_setting_data_checksums
pg_setting_max_connections
pg_setting_max_locks_per_transaction
pg_setting_max_prepared_transactions
pg_setting_max_replication_slots
pg_setting_max_wal_senders
pg_setting_max_worker_processes
pg_setting_wal_log_hints
pg_shmem_allocated_size
pg_shmem_offset
pg_shmem_size
pg_size_bytes
pg_slru_blks_exists
pg_slru_blks_hit
pg_slru_blks_read
pg_slru_blks_written
pg_slru_blks_zeroed
pg_slru_flushes
pg_slru_stats_reset
pg_slru_truncates
pg_status
pg_sync_standby_disabled
pg_sync_standby_enabled
pg_table_analyze_count
pg_table_autoanalyze_count
pg_table_autovacuum_count
pg_table_bloat_ratio
pg_table_bloat_size
pg_table_heap_blks_hit
pg_table_heap_blks_read
pg_table_idx_blks_hit
pg_table_idx_blks_read
pg_table_idx_scan
pg_table_idx_tup_fetch
pg_table_last_analyze
pg_table_last_autoanalyze
pg_table_last_autovacuum
pg_table_last_vacuum
pg_table_n_dead_tup
pg_table_n_live_tup
pg_table_n_mod_since_analyze
pg_table_n_tup_del
pg_table_n_tup_hot_upd
pg_table_n_tup_ins
pg_table_n_tup_mod
pg_table_n_tup_upd
pg_table_seq_scan
pg_table_seq_tup_read
pg_table_size_bytes
pg_table_size_indexsize
pg_table_size_relsize
pg_table_size_toastsize
pg_table_tbl_scan
pg_table_tidx_blks_hit
pg_table_tidx_blks_read
pg_table_toast_blks_hit
pg_table_toast_blks_read
pg_table_tup_read
pg_table_vacuum_count
pg_timeline
pg_timestamp
pg_up
pg_uptime
pg_version
pg_write_lsn
pg_xact_xmax
pg_xact_xmin
pg_xact_xnum
pgbouncer_database_current_connections
pgbouncer_database_disabled
pgbouncer_database_max_connections
pgbouncer_database_paused
pgbouncer_database_pool_size
pgbouncer_database_reserve_pool
pgbouncer_exporter_last_scrape_time
pgbouncer_exporter_query_cache_ttl
pgbouncer_exporter_query_scrape_duration
pgbouncer_exporter_query_scrape_error_count
pgbouncer_exporter_query_scrape_hit_count
pgbouncer_exporter_query_scrape_metric_count
pgbouncer_exporter_query_scrape_total_count
pgbouncer_exporter_scrape_duration
pgbouncer_exporter_scrape_error_count
pgbouncer_exporter_scrape_total_count
pgbouncer_exporter_server_scrape_duration
pgbouncer_exporter_server_scrape_total_count
pgbouncer_exporter_server_scrape_total_seconds
pgbouncer_exporter_up
pgbouncer_exporter_uptime
pgbouncer_in_recovery
pgbouncer_list_items
pgbouncer_pool_active_clients
pgbouncer_pool_active_servers
pgbouncer_pool_idle_servers
pgbouncer_pool_login_servers
pgbouncer_pool_maxwait
pgbouncer_pool_maxwait_us
pgbouncer_pool_tested_servers
pgbouncer_pool_used_servers
pgbouncer_pool_waiting_clients
pgbouncer_stat_avg_query_count
pgbouncer_stat_avg_query_time
pgbouncer_stat_avg_recv
pgbouncer_stat_avg_sent
pgbouncer_stat_avg_wait_time
pgbouncer_stat_avg_xact_count
pgbouncer_stat_avg_xact_time
pgbouncer_stat_total_query_count
pgbouncer_stat_total_query_time
pgbouncer_stat_total_received
pgbouncer_stat_total_sent
pgbouncer_stat_total_wait_time
pgbouncer_stat_total_xact_count
pgbouncer_stat_total_xact_time
pgbouncer_up
pgbouncer_version
process_cpu_seconds_total
process_max_fds
process_open_fds
process_resident_memory_bytes
process_start_time_seconds
process_virtual_memory_bytes
process_virtual_memory_max_bytes
promhttp_metric_handler_errors_total
promhttp_metric_handler_requests_in_flight
promhttp_metric_handler_requests_total
scrape_duration_seconds
scrape_samples_post_metric_relabeling
scrape_samples_scraped
scrape_series_added
up

4 - Derived Metrics

Details of the definition of Pigsty-derived monitoring metrics

Here are the rules for defining all Pigsty’s derived indicators.

Derived Metrics for Node

---
  - name: node-rules
    rules:
      #==============================================================#
      #                         Aliveness                            #
      #==============================================================#
      # TODO: change this to your node exporter port
      - record: node_exporter_up
        expr: up{instance=~".*:9099"}
      - record: node:uptime
        expr: time() - node_boot_time_seconds{}


      #==============================================================#
      #                             CPU                              #
      #==============================================================#
      # cpu mode time ratio
      - record: node:cpu:cpu_mode
        expr: irate(node_cpu_seconds_total{}[1m])
      - record: node:ins:cpu_mode
        expr: sum without (cpu) (node:cpu:cpu_mode)
      - record: node:cls:cpu_mode
        expr: sum by (cls, mode) (node:ins:cpu_mode)

      # cpu schedule time-slices
      - record: node:cpu:sched_timeslices
        expr: irate(node_schedstat_timeslices_total{}[1m])
      - record: node:ins:sched_timeslices
        expr: sum without (cpu) (node:cpu:sched_timeslices)
      - record: node:cls:sched_timeslicesa
        expr: sum by (cls) (node:ins:sched_timeslices)

      # cpu count
      - record: node:ins:cpu_count
        expr: count without (cpu) (node:cpu:cpu_usage)
      - record: node:cls:cpu_count
        expr: sum by (cls) (node:ins:cpu_count)

      # cpu usage
      - record: node:cpu:cpu_usage
        expr: 1 - sum without (mode) (node:cpu:cpu_mode{mode="idle"})
      - record: node:ins:cpu_usage
        expr: sum without (cpu) (node:cpu:cpu_usage) / node:ins:cpu_count
      - record: node:cls:cpu_usage
        expr: sum by (cls) (node:ins:cpu_usage * node:ins:cpu_count) / sum by (cls) (node:ins:cpu_count)

      # cpu usage avg5m
      - record: node:cpu:cpu_usage_avg5m
        expr: avg_over_time(node:cpu:cpu_usage[5m])
      - record: node:ins:cpu_usage_avg5m
        expr: avg_over_time(node:ins:cpu_usage[5m])
      - record: node:cls:cpu_usage_avg5m
        expr: avg_over_time(node:cls:cpu_usage[5m])

      #==============================================================#
      #                            Memory                            #
      #==============================================================#
      # mem usage
      - record: node:ins:mem_app
        expr: node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_Slab_bytes - node_memory_PageTables_bytes - node_memory_SwapCached_bytes
      - record: node:ins:mem_free
        expr: node_memory_MemFree_bytes{} + node_memory_Cached_bytes{}
      - record: node:ins:mem_usage
        expr: node:ins:mem_app / node_memory_MemTotal_bytes
      - record: node:cls:mem_usage
        expr: sum by (cls) (node:ins:mem_app) / sum by (cls) (node_memory_MemTotal_bytes)
      - record: node:ins:swap_usage
        expr: 1 - node_memory_SwapFree_bytes{} / node_memory_SwapTotal_bytes{}


      #==============================================================#
      #                            Disk                              #
      #==============================================================#
      # disk read iops
      - record: node:dev:disk_read_iops
        expr: irate(node_disk_reads_completed_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_read_iops
        expr: sum without (device) (node:dev:disk_read_iops)
      - record: node:cls:disk_read_iops
        expr: sum by (cls) (node:ins:disk_read_iops)

      # disk write iops
      - record: node:dev:disk_write_iops
        expr: irate(node_disk_writes_completed_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_write_iops
        expr: sum without (device) (node:dev:disk_write_iops)
      - record: node:cls:disk_write_iops
        expr: sum by (cls) (node:ins:disk_write_iops)

      # disk iops
      - record: node:dev:disk_iops
        expr: node:dev:disk_read_iops + node:dev:disk_write_iops
      - record: node:ins:disk_iops
        expr: node:ins:disk_read_iops + node:ins:disk_write_iops
      - record: node:cls:disk_iops
        expr: node:cls:disk_read_iops + node:cls:disk_write_iops

      # read bandwidth (rate1m)
      - record: node:dev:disk_read_rate
        expr: rate(node_disk_read_bytes_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_read_rate
        expr: sum without (device) (node:dev:disk_read_rate)
      - record: node:cls:disk_read_rate
        expr: sum by (cls) (node:ins:disk_read_rate)

      # write bandwidth (rate1m)
      - record: node:dev:disk_write_rate
        expr: rate(node_disk_written_bytes_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:ins:disk_write_rate
        expr: sum without (device) (node:dev:disk_write_rate)
      - record: node:cls:disk_write_rate
        expr: sum by (cls) (node:ins:disk_write_rate)

      # io bandwidth (rate1m)
      - record: node:dev:disk_io_rate
        expr: node:dev:disk_read_rate + node:dev:disk_write_rate
      - record: node:ins:disk_io_rate
        expr: node:ins:disk_read_rate + node:ins:disk_write_rate
      - record: node:cls:disk_io_rate
        expr: node:cls:disk_read_rate + node:cls:disk_write_rate

      # read/write total time
      - record: node:dev:disk_read_time
        expr: rate(node_disk_read_time_seconds_total{device=~"[a-zA-Z-_]+"}[1m])
      - record: node:dev:disk_write_time
        expr: rate(node_disk_read_time_seconds_total{device=~"[a-zA-Z-_]+"}[1m])

      # read/write response time
      - record: node:dev:disk_read_rt
        expr: node:dev:disk_read_time / node:dev:disk_read_iops
      - record: node:dev:disk_write_rt
        expr: node:dev:disk_write_time / node:dev:disk_write_iops
      - record: node:dev:disk_rt
        expr: (node:dev:disk_read_time + node:dev:disk_write_time) / node:dev:iops


      #==============================================================#
      #                            Network                           #
      #==============================================================#
      # transmit bandwidth (out)
      - record: node:dev:network_tx
        expr: irate(node_network_transmit_bytes_total{}[1m])
      - record: node:ins:network_tx
        expr: sum without (device) (node:dev:network_tx{device!~"lo|bond.*"})
      - record: node:cls:network_tx
        expr: sum by (cls) (node:ins:network_tx)

      # receive bandwidth (in)
      - record: node:dev:network_rx
        expr: irate(node_network_receive_bytes_total{}[1m])
      - record: node:ins:network_rx
        expr: sum without (device) (node:dev:network_rx{device!~"lo|bond.*"})
      - record: node:cls:network_rx
        expr: sum by (cls) (node:ins:network_rx)

      # io bandwidth
      - record: node:dev:network_io_rate
        expr: node:dev:network_tx + node:dev:network_rx
      - record: node:ins:network_io
        expr: node:ins:network_tx + node:ins:network_rx
      - record: node:cls:network_io
        expr: node:cls:network_tx + node:cls:network_rx


      #==============================================================#
      #                           Schedule                           #
      #==============================================================#
      # normalized load
      - record: node:ins:stdload1
        expr: node_load1 / node:ins:cpu_count
      - record: node:ins:stdload5
        expr: node_load5 / node:ins:cpu_count
      - record: node:ins:stdload15
        expr: node_load15 / node:ins:cpu_count

      # process
      - record: node:ins:forks
        expr: irate(node_forks_total[1m])
      # interrupt & context switch
      - record: node:ins:intrrupt
        expr: irate(node_intr_total[1m])
      - record: node:ins:ctx_switch
        expr: irate(node_context_switches_total{}[1m])


      #==============================================================#
      #                              VM                              #
      #==============================================================#
      - record: node:ins:pagefault
        expr: irate(node_vmstat_pgfault[1m])
      - record: node:ins:pagein
        expr: irate(node_vmstat_pgpgin[1m])
      - record: node:ins:pageout
        expr: irate(node_vmstat_pgpgout[1m])
      - record: node:ins:swapin
        expr: irate(node_vmstat_pswpin[1m])
      - record: node:ins:swapout
        expr: irate(node_vmstat_pswpout[1m])


      #==============================================================#
      #                              FS                              #
      #==============================================================#
      # filesystem space usage
      - record: node:fs:free_bytes
        expr: max without(device, fstype) (node_filesystem_free_bytes{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:avail_bytes
        expr: max without(device, fstype) (node_filesystem_avail_bytes{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:size_bytes
        expr: max without(device, fstype) (node_filesystem_size_bytes{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:space_usage
        expr: 1 - (node:fs:avail_bytes{} / node:fs:size_bytes{})
      - record: node:fs:free_inode
        expr: max without(device, fstype) (node_filesystem_files_free{fstype!~"(n|root|tmp)fs.*"})
      - record: node:fs:total_inode
        expr: max without(device, fstype) (node_filesystem_files{fstype!~"(n|root|tmp)fs.*"})

      # space delta and prediction
      - record: node:fs:space_deriv_1h
        expr: 0 - deriv(node_filesystem_avail_bytes{}[1h])
      - record: node:fs:space_exhaust
        expr: (node_filesystem_avail_bytes{} / node:fs:space_deriv_1h{}) > 0

      # fs inode usage
      - record: node:fs:inode_usage
        expr: 1 - (node:fs:free_inode / node:fs:total_inode)
      # file descriptor usage
      - record: node:ins:fd_usage
        expr: node_filefd_allocated / node_filefd_maximum


      #==============================================================#
      #                             TCP                              #
      #==============================================================#
      # tcp segments (rate1m)
      - record: node:ins:tcp_insegs
        expr: rate(node_netstat_Tcp_InSegs{}[1m])
      - record: node:ins:tcp_outsegs
        expr: rate(node_netstat_Tcp_OutSegs{}[1m])
      - record: node:ins:tcp_retranssegs
        expr: rate(node_netstat_Tcp_RetransSegs{}[1m])
      - record: node:ins:tcp_segs
        expr: node:ins:tcp_insegs + node:ins:tcp_outsegs
      # retransmit
      - record: node:ins:tcp_retrans_rate
        expr: node:ins:tcp_retranssegs / node:ins:tcp_outsegs
      # overflow
      - record: node:ins:tcp_overflow_rate
        expr: rate(node_netstat_TcpExt_ListenOverflows[1m])


      #==============================================================#
      #                           Netstat                            #
      #==============================================================#
      # tcp open (rate1m)
      - record: node:ins:tcp_passive_opens
        expr: rate(node_netstat_Tcp_PassiveOpens[1m])
      - record: node:ins:tcp_active_opens
        expr: rate(node_netstat_Tcp_ActiveOpens[1m])
      # tcp close
      - record: node:ins:tcp_attempt_fails
        expr: rate(node_netstat_Tcp_AttemptFails[1m])
      - record: node:ins:tcp_estab_resets
        expr: rate(node_netstat_Tcp_EstabResets[1m])
      # tcp drop
      - record: node:ins:tcp_overflow
        expr: rate(node_netstat_TcpExt_ListenOverflows[1m])
      - record: node:ins:tcp_dropped
        expr: rate(node_netstat_TcpExt_ListenDrops[1m])


      #==============================================================#
      #                             NTP                              #
      #==============================================================#
      - record: node:cls:ntp_offset_range
        expr: max by (cls)(node_ntp_offset_seconds) - min by (cls)(node_ntp_offset_seconds)

...

Derived Metrics about postgres and pgbouncer

---
#==============================================================#
# File      :   pgsql.yml
# Ctime     :   2020-04-22
# Mtime     :   2020-12-03
# Desc      :   Record and alert rules for postgres
# Path      :   /etc/prometheus/rules/pgsql.yml
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

groups:

  ################################################################
  #                         PgSQL Rules                          #
  ################################################################
  - name: pgsql-rules
    rules:

      #==============================================================#
      #                        Aliveness                             #
      #==============================================================#
      # TODO: change these to your pg_exporter & pgbouncer_exporter port
      - record: pg_exporter_up
        expr: up{instance=~".*:9185"}

      - record: pgbouncer_exporter_up
        expr: up{instance=~".*:9127"}


      #==============================================================#
      #                        Identity                              #
      #==============================================================#
      - record: pg_is_primary
        expr: 1 - pg_in_recovery
      - record: pg_is_replica
        expr: pg_in_recovery
      - record: pg_status
        expr: (pg_up{} * 2) +  (1 - pg_in_recovery{})
      # encoded: 0:replica[DOWN] 1:primary[DOWN] 2:replica 3:primary


      #==============================================================#
      #                            Age                               #
      #==============================================================#
      # age
      - record: pg:ins:age
        expr: max without (datname) (pg_database_age{datname!~"template[0-9]"})
      - record: pg:cls:age
        expr: max by (cls) (pg:ins:age)
      - record: pg:all:age
        expr: max(pg:cls:age)

      # age derive and prediction
      - record: pg:db:age_deriv_1h
        expr: deriv(pg_database_age{}[1h])
      - record: pg:db:age_exhaust
        expr: (2147483648 - pg_database_age{}) / pg:db:age_deriv_1h



      #==============================================================#
      #                         Sessions                             #
      #==============================================================#
      # session count (by state)
      - record: pg:db:sessions
        expr: pg_activity_count
      - record: pg:ins:sessions
        expr: sum without (datname) (pg:db:sessions)
      - record: pg:svc:sessions
        expr: sum by (cls, role, state) (pg:ins:sessions)
      - record: pg:cls:sessions
        expr: sum by (cls, state) (pg:ins:sessions)
      - record: pg:all:sessions
        expr: sum by (state) (pg:cls:sessions)

      # backends
      - record: pg:db:backends
        expr: pg_db_numbackends
      - record: pg:ins:backends
        expr: sum without (datname) (pg_db_numbackends)
      - record: pg:svc:backends
        expr: sum by (cls, role) (pg:ins:backends)
      - record: pg:cls:backends
        expr: sum by (cls) (pg:ins:backends)
      - record: pg:all:backends
        expr: sum(pg:cls:backends)

      # active backends
      - record: pg:ins:active_backends
        expr: pg:ins:sessions{state="active"}
      - record: pg:svc:active_backends
        expr: sum by (cls, role) (pg:ins:active_backends)
      - record: pg:cls:active_backends
        expr: sum by (cls) (pg:ins:active_backends)
      - record: pg:all:active_backends
        expr: sum(pg:cls:active_backends)

      # idle in xact backends (including abort)
      - record: pg:ins:ixact_backends
        expr: pg:ins:sessions{state=~"idle in.*"}
      - record: pg:svc:ixact_backends
        expr: sum by (cls, role) (pg:ins:active_backends)
      - record: pg:cls:ixact_backends
        expr: sum by (cls) (pg:ins:active_backends)
      - record: pg:all:ixact_backends
        expr: sum(pg:cls:active_backends)


      #==============================================================#
      #                    Servers (Pgbouncer)                       #
      #==============================================================#

      # active servers
      - record: pg:pool:active_servers
        expr: pgbouncer_pool_active_servers{datname!="pgbouncer"}
      - record: pg:db:active_servers
        expr: sum without(user) (pg:pool:active_servers)
      - record: pg:ins:active_servers
        expr: sum without(user, datname) (pg:pool:active_servers)
      - record: pg:svc:active_servers
        expr: sum by (cls, role) (pg:ins:active_servers)
      - record: pg:cls:active_servers
        expr: sum by (cls) (pg:ins:active_servers)
      - record: pg:all:active_servers
        expr: sum(pg:cls:active_servers)

      # idle servers
      - record: pg:pool:idle_servers
        expr: pgbouncer_pool_idle_servers{datname!="pgbouncer"}
      - record: pg:db:idle_servers
        expr: sum without(user) (pg:pool:idle_servers)
      - record: pg:ins:idle_servers
        expr: sum without(user, datname) (pg:pool:idle_servers)
      - record: pg:svc:idle_servers
        expr: sum by (cls, role) (pg:ins:idle_servers)
      - record: pg:cls:idle_servers
        expr: sum by (cls) (pg:ins:idle_servers)
      - record: pg:all:idle_servers
        expr: sum(pg:cls:idle_servers)

      # used servers
      - record: pg:pool:used_servers
        expr: pgbouncer_pool_used_servers{datname!="pgbouncer"}
      - record: pg:db:used_servers
        expr: sum without(user) (pg:pool:used_servers)
      - record: pg:ins:used_servers
        expr: sum without(user, datname) (pg:pool:used_servers)
      - record: pg:svc:used_servers
        expr: sum by (cls, role) (pg:ins:used_servers)
      - record: pg:cls:used_servers
        expr: sum by (cls) (pg:ins:used_servers)
      - record: pg:all:used_servers
        expr: sum(pg:cls:used_servers)

      # tested servers
      - record: pg:pool:tested_servers
        expr: pgbouncer_pool_tested_servers{datname!="pgbouncer"}
      - record: pg:db:tested_servers
        expr: sum without(user) (pg:pool:tested_servers)
      - record: pg:ins:tested_servers
        expr: sum without(user, datname) (pg:pool:tested_servers)
      - record: pg:svc:tested_servers
        expr: sum by (cls, role) (pg:ins:tested_servers)
      - record: pg:cls:tested_servers
        expr: sum by (cls) (pg:ins:tested_servers)
      - record: pg:all:tested_servers
        expr: sum(pg:cls:tested_servers)

      # login servers
      - record: pg:pool:login_servers
        expr: pgbouncer_pool_login_servers{datname!="pgbouncer"}
      - record: pg:db:login_servers
        expr: sum without(user) (pg:pool:login_servers)
      - record: pg:ins:login_servers
        expr: sum without(user, datname) (pg:pool:login_servers)
      - record: pg:svc:login_servers
        expr: sum by (cls, role) (pg:ins:login_servers)
      - record: pg:cls:login_servers
        expr: sum by (cls) (pg:ins:login_servers)
      - record: pg:all:login_servers
        expr: sum(pg:cls:login_servers)



      #==============================================================#
      #                   Clients (Pgbouncer)                        #
      #==============================================================#
      # active clients
      - record: pg:pool:active_clients
        expr: pgbouncer_pool_active_clients{datname!="pgbouncer"}
      - record: pg:db:active_clients
        expr: sum without(user) (pg:pool:active_clients)
      - record: pg:ins:active_clients
        expr: sum without(user, datname) (pg:pool:active_clients)
      - record: pg:svc:active_clients
        expr: sum by (cls, role) (pg:ins:active_clients)
      - record: pg:cls:active_clients
        expr: sum by (cls) (pg:ins:active_clients)
      - record: pg:all:active_clients
        expr: sum(pg:cls:active_clients)

      # waiting clients
      - record: pg:pool:waiting_clients
        expr: pgbouncer_pool_waiting_clients{datname!="pgbouncer"}
      - record: pg:db:waiting_clients
        expr: sum without(user) (pg:pool:waiting_clients)
      - record: pg:ins:waiting_clients
        expr: sum without(user, datname) (pg:pool:waiting_clients)
      - record: pg:svc:waiting_clients
        expr: sum by (cls, role) (pg:ins:waiting_clients)
      - record: pg:cls:waiting_clients
        expr: sum by (cls) (pg:ins:waiting_clients)
      - record: pg:all:waiting_clients
        expr: sum(pg:cls:waiting_clients)


      #==============================================================#
      #                       Transactions                           #
      #==============================================================#
      # commits (realtime)
      - record: pg:db:commits_realtime
        expr: irate(pg_db_xact_commit{}[1m])
      - record: pg:ins:commits_realtime
        expr: sum without (datname) (pg:db:commits_realtime)
      - record: pg:svc:commits_realtime
        expr: sum by (cls, role) (pg:ins:commits_realtime)
      - record: pg:cls:commits_realtime
        expr: sum by (cls) (pg:ins:commits_realtime)
      - record: pg:all:commits_realtime
        expr: sum(pg:cls:commits_realtime)

      # commits (rate1m)
      - record: pg:db:commits
        expr: rate(pg_db_xact_commit{}[1m])
      - record: pg:ins:commits
        expr: sum without (datname) (pg:db:commits)
      - record: pg:svc:commits
        expr: sum by (cls, role) (pg:ins:commits)
      - record: pg:cls:commits
        expr: sum by (cls) (pg:ins:commits)
      - record: pg:all:commits
        expr: sum(pg:cls:commits)

      # rollbacks realtime
      - record: pg:db:rollbacks_realtime
        expr: irate(pg_db_xact_rollback{}[1m])
      - record: pg:ins:rollbacks_realtime
        expr: sum without (datname) (pg:db:rollbacks_realtime)
      - record: pg:svc:rollbacks_realtime
        expr: sum by (cls, role) (pg:ins:rollbacks_realtime)
      - record: pg:cls:rollbacks_realtime
        expr: sum by (cls) (pg:ins:rollbacks_realtime)
      - record: pg:all:rollbacks_realtime
        expr: sum(pg:cls:rollbacks_realtime)
      # rollbacks
      - record: pg:db:rollbacks
        expr: rate(pg_db_xact_rollback{}[1m])
      - record: pg:ins:rollbacks
        expr: sum without (datname) (pg:db:rollbacks)
      - record: pg:svc:rollbacks
        expr: sum by (cls, role) (pg:ins:rollbacks)
      - record: pg:cls:rollbacks
        expr: sum by (cls) (pg:ins:rollbacks)
      - record: pg:all:rollbacks
        expr: sum(pg:cls:rollbacks)

      # xacts (realtime)
      - record: pg:db:xacts_realtime
        expr: irate(pg_db_xact_commit{}[1m])
      - record: pg:ins:xacts_realtime
        expr: sum without (datname) (pg:db:xacts_realtime)
      - record: pg:svc:xacts_realtime
        expr: sum by (cls, role) (pg:ins:xacts_realtime)
      - record: pg:cls:xacts_realtime
        expr: sum by (cls) (pg:ins:xacts_realtime)
      - record: pg:all:xacts_realtime
        expr: sum(pg:cls:xacts_realtime)
      # xacts (rate1m)
      - record: pg:db:xacts
        expr: rate(pg_db_xact_commit{}[1m])
      - record: pg:ins:xacts
        expr: sum without (datname) (pg:db:xacts)
      - record: pg:svc:xacts
        expr: sum by (cls, role) (pg:ins:xacts)
      - record: pg:cls:xacts
        expr: sum by (cls) (pg:ins:xacts)
      - record: pg:all:xacts
        expr: sum(pg:cls:xacts)
      # xacts avg30m
      - record: pg:db:xacts_avg30m
        expr: avg_over_time(pg:db:xacts[30m])
      - record: pg:ins:xacts_avg30m
        expr: avg_over_time(pg:ins:xacts[30m])
      - record: pg:svc:xacts_avg30m
        expr: avg_over_time(pg:svc:xacts[30m])
      - record: pg:cls:xacts_avg30m
        expr: avg_over_time(pg:cls:xacts[30m])
      - record: pg:all:xacts_avg30m
        expr: avg_over_time(pg:all:xacts[30m])
      # xacts µ
      - record: pg:db:xacts_mu
        expr: avg_over_time(pg:db:xacts_avg30m[30m])
      - record: pg:ins:xacts_mu
        expr: avg_over_time(pg:ins:xacts_avg30m[30m])
      - record: pg:svc:xacts_mu
        expr: avg_over_time(pg:svc:xacts_avg30m[30m])
      - record: pg:cls:xacts_mu
        expr: avg_over_time(pg:cls:xacts_avg30m[30m])
      - record: pg:all:xacts_mu
        expr: avg_over_time(pg:all:xacts_avg30m[30m])
      # xacts σ: sigma
      - record: pg:db:xacts_sigma
        expr: stddev_over_time(pg:db:xacts[30m])
      - record: pg:ins:xacts_sigma
        expr: stddev_over_time(pg:ins:xacts[30m])
      - record: pg:svc:xacts_sigma
        expr: stddev_over_time(pg:svc:xacts[30m])
      - record: pg:cls:xacts_sigma
        expr: stddev_over_time(pg:cls:xacts[30m])
      - record: pg:all:xacts_sigma
        expr: stddev_over_time(pg:all:xacts[30m])


      #==============================================================#
      #                      TPS (Pgbouncer)                         #
      #==============================================================#
      # TPS realtime (irate1m)
      - record: pg:db:tps_realtime
        expr: irate(pgbouncer_stat_total_xact_count{}[1m])
      - record: pg:ins:tps_realtime
        expr: sum without(datname) (pg:db:tps_realtime{})
      - record: pg:svc:tps_realtime
        expr: sum by(cls, role) (pg:ins:tps_realtime{})
      - record: pg:cls:tps_realtime
        expr: sum by(cls) (pg:ins:tps_realtime{})
      - record: pg:all:tps_realtime
        expr: sum(pg:cls:tps_realtime{})

      # TPS (rate1m)
      - record: pg:db:tps
        expr: pgbouncer_stat_avg_xact_count{datname!="pgbouncer"}
      - record: pg:ins:tps
        expr: sum without(datname) (pg:db:tps)
      - record: pg:svc:tps
        expr: sum by (cls, role) (pg:ins:tps)
      - record: pg:cls:tps
        expr: sum by(cls) (pg:ins:tps)
      - record: pg:all:tps
        expr: sum(pg:cls:tps)
      # tps : avg30m
      - record: pg:db:tps_avg30m
        expr: avg_over_time(pg:db:tps[30m])
      - record: pg:ins:tps_avg30m
        expr: avg_over_time(pg:ins:tps[30m])
      - record: pg:svc:tps_avg30m
        expr: avg_over_time(pg:svc:tps[30m])
      - record: pg:cls:tps_avg30m
        expr: avg_over_time(pg:cls:tps[30m])
      - record: pg:all:tps_avg30m
        expr: avg_over_time(pg:all:tps[30m])
      # tps µ
      - record: pg:db:tps_mu
        expr: avg_over_time(pg:db:tps_avg30m[30m])
      - record: pg:ins:tps_mu
        expr: avg_over_time(pg:ins:tps_avg30m[30m])
      - record: pg:svc:tps_mu
        expr: avg_over_time(pg:svc:tps_avg30m[30m])
      - record: pg:cls:tps_mu
        expr: avg_over_time(pg:cls:tps_avg30m[30m])
      - record: pg:all:tps_mu
        expr: avg_over_time(pg:all:tps_avg30m[30m])
      # tps σ
      - record: pg:db:tps_sigma
        expr: stddev_over_time(pg:db:tps[30m])
      - record: pg:ins:tps_sigma
        expr: stddev_over_time(pg:ins:tps[30m])
      - record: pg:svc:tps_sigma
        expr: stddev_over_time(pg:svc:tps[30m])
      - record: pg:cls:tps_sigma
        expr: stddev_over_time(pg:cls:tps[30m])
      - record: pg:all:tps_sigma
        expr: stddev_over_time(pg:all:tps[30m])

      # xact rt (rate1m)
      - record: pg:db:xact_rt
        expr: pgbouncer_stat_avg_xact_time{datname!="pgbouncer"} / 1000000
      - record: pg:ins:xact_rt
        expr: sum without(datname) (rate(pgbouncer_stat_total_xact_time[1m])) / sum without(datname) (rate(pgbouncer_stat_total_xact_count[1m])) / 1000000
      - record: pg:svc:xact_rt
        expr: sum by (cls, role) (rate(pgbouncer_stat_total_xact_time[1m])) / sum by (cls, role) (rate(pgbouncer_stat_total_xact_count[1m])) / 1000000
      # xact_rt avg30m
      - record: pg:db:xact_rt_avg30m
        expr: avg_over_time(pg:db:xact_rt[30m])
      - record: pg:ins:xact_rt_avg30m
        expr: avg_over_time(pg:ins:xact_rt[30m])
      - record: pg:svc:xact_rt_avg30m
        expr: avg_over_time(pg:svc:xact_rt[30m])
      # xact_rt µ
      - record: pg:db:xact_rt_mu
        expr: avg_over_time(pg:db:xact_rt_avg30m[30m])
      - record: pg:ins:xact_rt_mu
        expr: avg_over_time(pg:ins:xact_rt_avg30m[30m])
      - record: pg:svc:xact_rt_mu
        expr: avg_over_time(pg:svc:xact_rt_avg30m[30m])

      # xact_rt σ: stddev30m
      - record: pg:db:xact_rt_sigma
        expr: stddev_over_time(pg:db:xact_rt[30m])
      - record: pg:ins:xact_rt_sigma
        expr: stddev_over_time(pg:ins:xact_rt[30m])
      - record: pg:svc:xact_rt_sigma
        expr: stddev_over_time(pg:svc:xact_rt[30m])



      #==============================================================#
      #                     QPS (Pgbouncer)                          #
      #==============================================================#
      # QPS realtime (irate1m)
      - record: pg:db:qps_realtime
        expr: irate(pgbouncer_stat_total_query_count{}[1m])
      - record: pg:ins:qps_realtime
        expr: sum without(datname) (pg:db:qps_realtime{})
      - record: pg:svc:qps_realtime
        expr: sum by(cls, role) (pg:ins:qps_realtime{})
      - record: pg:cls:qps_realtime
        expr: sum by(cls) (pg:ins:qps_realtime{})
      - record: pg:all:qps_realtime
        expr: sum(pg:cls:qps_realtime{})
      # qps (rate1m)
      - record: pg:db:qps
        expr: pgbouncer_stat_avg_query_count{datname!="pgbouncer"}
      - record: pg:ins:qps
        expr: sum without(datname) (pg:db:qps)
      - record: pg:svc:qps
        expr: sum by (cls, role) (pg:ins:qps)
      - record: pg:cls:qps
        expr: sum by(cls) (pg:ins:qps)
      - record: pg:all:qps
        expr: sum(pg:cls:qps)

      # qps avg30m
      - record: pg:db:qps_avg30m
        expr: avg_over_time(pg:db:qps[30m])
      - record: pg:ins:qps_avg30m
        expr: avg_over_time(pg:ins:qps[30m])
      - record: pg:svc:qps_avg30m
        expr: avg_over_time(pg:svc:qps[30m])
      - record: pg:cls:qps_avg30m
        expr: avg_over_time(pg:cls:qps[30m])
      - record: pg:all:qps_avg30m
        expr: avg_over_time(pg:all:qps[30m])
      # qps µ
      - record: pg:db:qps_mu
        expr: avg_over_time(pg:db:qps_avg30m[30m])
      - record: pg:ins:qps_mu
        expr: avg_over_time(pg:ins:qps_avg30m[30m])
      - record: pg:svc:qps_mu
        expr: avg_over_time(pg:svc:qps_avg30m[30m])
      - record: pg:cls:qps_mu
        expr: avg_over_time(pg:cls:qps_avg30m[30m])
      - record: pg:all:qps_mu
        expr: avg_over_time(pg:all:qps_avg30m[30m])
      # qps σ: stddev30m qps
      - record: pg:db:qps_sigma
        expr: stddev_over_time(pg:db:qps[30m])
      - record: pg:ins:qps_sigma
        expr: stddev_over_time(pg:ins:qps[30m])
      - record: pg:svc:qps_sigma
        expr: stddev_over_time(pg:svc:qps[30m])
      - record: pg:cls:qps_sigma
        expr: stddev_over_time(pg:cls:qps[30m])
      - record: pg:all:qps_sigma
        expr: stddev_over_time(pg:all:qps[30m])
      # query rt (1m avg)
      - record: pg:db:query_rt
        expr: pgbouncer_stat_avg_query_time{datname!="pgbouncer"} / 1000000
      - record: pg:ins:query_rt
        expr: sum without(datname) (rate(pgbouncer_stat_total_query_time[1m])) / sum without(datname) (rate(pgbouncer_stat_total_query_count[1m])) / 1000000
      - record: pg:svc:query_rt
        expr: sum by (cls, role) (rate(pgbouncer_stat_total_query_time[1m])) / sum by (cls, role) (rate(pgbouncer_stat_total_query_count[1m])) / 1000000
      # query_rt avg30m
      - record: pg:db:query_rt_avg30m
        expr: avg_over_time(pg:db:query_rt[30m])
      - record: pg:ins:query_rt_avg30m
        expr: avg_over_time(pg:ins:query_rt[30m])
      - record: pg:svc:query_rt_avg30m
        expr: avg_over_time(pg:svc:query_rt[30m])
      # query_rt µ
      - record: pg:db:query_rt_mu
        expr: avg_over_time(pg:db:query_rt_avg30m[30m])
      - record: pg:ins:query_rt_mu
        expr: avg_over_time(pg:ins:query_rt_avg30m[30m])
      - record: pg:svc:query_rt_mu
        expr: avg_over_time(pg:svc:query_rt_avg30m[30m])
      # query_rt σ: stddev30m
      - record: pg:db:query_rt_sigma
        expr: stddev_over_time(pg:db:query_rt[30m])
      - record: pg:ins:query_rt_sigma
        expr: stddev_over_time(pg:ins:query_rt[30m])
      - record: pg:svc:query_rt_sigma
        expr: stddev_over_time(pg:svc:query_rt[30m])


      #==============================================================#
      #                        PG Load                               #
      #==============================================================#
      # seconds spend on transaction in last minute
      - record: pg:ins:xact_time_rate1m
        expr: sum without (datname) (rate(pgbouncer_stat_total_xact_time{}[1m])) / 1000000
      - record: pg:ins:xact_time_rate5m
        expr: sum without (datname) (rate(pgbouncer_stat_total_xact_time{}[5m])) / 1000000
      - record: pg:ins:xact_time_rate15m
        expr: sum without (datname) (rate(pgbouncer_stat_total_xact_time{}[15m])) / 1000000

      # seconds spend on queries in last minute
      - record: pg:ins:query_time_rate1m
        expr: sum without (datname) (rate(pgbouncer_stat_total_query_time{}[1m])) / 1000000
      - record: pg:ins:query_time_rate5m
        expr: sum without (datname) (rate(pgbouncer_stat_total_query_time{}[5m])) / 1000000
      - record: pg:ins:query_time_rate15m
        expr: sum without (datname) (rate(pgbouncer_stat_total_query_time{}[15m])) / 1000000

      # instance level load
      - record: pg:ins:load0
        expr: sum without (datname) (irate(pgbouncer_stat_total_xact_time{}[1m])) / on (ip) group_left()  node:ins:cpu_count / 1000000
      - record: pg:ins:load1
        expr: pg:ins:xact_time_rate1m  / on (ip) group_left()  node:ins:cpu_count
      - record: pg:ins:load5
        expr: pg:ins:xact_time_rate5m  / on (ip) group_left()  node:ins:cpu_count
      - record: pg:ins:load15
        expr: pg:ins:xact_time_rate15m  / on (ip) group_left()  node:ins:cpu_count

      # service level load
      - record: pg:svc:load0
        expr: sum by (svc, cls, role) (irate(pgbouncer_stat_total_xact_time{}[1m])) / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000
      - record: pg:svc:load1
        expr: sum by (svc, cls, role) (pg:ins:xact_time_rate1m)  / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000
      - record: pg:svc:load5
        expr: sum by (svc, cls, role) (pg:ins:xact_time_rate5m)  / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000
      - record: pg:svc:load15
        expr: sum by (svc, cls, role) (pg:ins:xact_time_rate15m)  / on (svc) group_left() sum by (svc) (node:ins:cpu_count{}) / 1000000

      # cluster level load
      - record: pg:cls:load0
        expr: sum by (cls) (irate(pgbouncer_stat_total_xact_time{}[1m])) / on (cls) node:cls:cpu_count{} / 1000000
      - record: pg:cls:load1
        expr: sum by (cls) (pg:ins:xact_time_rate1m)  / on (cls) node:cls:cpu_count
      - record: pg:cls:load5
        expr: sum by (cls) (pg:ins:xact_time_rate5m)  / on (cls) node:cls:cpu_count
      - record: pg:cls:load15
        expr: sum by (cls) (pg:ins:xact_time_rate15m)  / on (cls) node:cls:cpu_count


      #==============================================================#
      #                     PG Saturation                            #
      #==============================================================#
      # max value of pg_load and cpu_usage

      # instance level saturation
      - record: pg:ins:saturation0
        expr: pg:ins:load0 > node:ins:cpu_usage or node:ins:cpu_usage
      - record: pg:ins:saturation1
        expr: pg:ins:load1 > node:ins:cpu_usage or node:ins:cpu_usage
      - record: pg:ins:saturation5
        expr: pg:ins:load5 > node:ins:cpu_usage or node:ins:cpu_usage
      - record: pg:ins:saturation15
        expr: pg:ins:load15 > node:ins:cpu_usage or node:ins:cpu_usage

      # cluster level saturation
      - record: pg:cls:saturation0
        expr: pg:cls:load0 > node:cls:cpu_usage or node:cls:cpu_usage
      - record: pg:cls:saturation1
        expr: pg:cls:load1 > node:cls:cpu_usage or node:cls:cpu_usage
      - record: pg:cls:saturation5
        expr: pg:cls:load5 > node:cls:cpu_usage or node:cls:cpu_usage
      - record: pg:cls:saturation15
        expr: pg:cls:load15 > node:cls:cpu_usage or node:cls:cpu_usage


      #==============================================================#
      #                          CRUD                                #
      #==============================================================#
      # rows touched
      - record: pg:db:tup_touched
        expr: irate(pg_db_tup_fetched{}[1m])
      - record: pg:ins:tup_touched
        expr: sum without(datname) (pg:db:tup_touched)
      - record: pg:svc:tup_touched
        expr: sum by (cls, role) (pg:ins:tup_touched)
      - record: pg:cls:tup_touched
        expr: sum by (cls) (pg:ins:tup_touched)
      - record: pg:all:tup_touched
        expr: sum(pg:cls:tup_touched)

      # selected
      - record: pg:db:tup_selected
        expr: irate(pg_db_tup_returned{}[1m])
      - record: pg:ins:tup_selected
        expr: sum without(datname) (pg:db:tup_selected)
      - record: pg:svc:tup_selected
        expr: sum by (cls, role) (pg:ins:tup_selected)
      - record: pg:cls:tup_selected
        expr: sum by (cls) (pg:ins:tup_selected)
      - record: pg:all:tup_selected
        expr: sum(pg:cls:tup_selected)

      # inserted
      - record: pg:db:tup_inserted
        expr: irate(pg_db_tup_inserted{}[1m])
      - record: pg:ins:tup_inserted
        expr: sum without(datname) (pg:db:tup_inserted)
      - record: pg:svc:tup_inserted
        expr: sum by (cls, role) (pg:ins:tup_inserted)
      - record: pg:cls:tup_inserted
        expr: sum by (cls) (pg:ins:tup_inserted{role="primary"})
      - record: pg:all:tup_inserted
        expr: sum(pg:cls:tup_inserted)

      # updated
      - record: pg:db:tup_updated
        expr: irate(pg_db_tup_updated{}[1m])
      - record: pg:ins:tup_updated
        expr: sum without(datname) (pg:db:tup_updated)
      - record: pg:svc:tup_updated
        expr: sum by (cls, role) (pg:ins:tup_updated)
      - record: pg:cls:tup_updated
        expr: sum by (cls) (pg:ins:tup_updated{role="primary"})
      - record: pg:all:tup_updated
        expr: sum(pg:cls:tup_updated)

      # deleted
      - record: pg:db:tup_deleted
        expr: irate(pg_db_tup_deleted{}[1m])
      - record: pg:ins:tup_deleted
        expr: sum without(datname) (pg:db:tup_deleted)
      - record: pg:svc:tup_deleted
        expr: sum by (cls, role) (pg:ins:tup_deleted)
      - record: pg:cls:tup_deleted
        expr: sum by (cls) (pg:ins:tup_deleted{role="primary"})
      - record: pg:all:tup_deleted
        expr: sum(pg:cls:tup_deleted)

      # modified
      - record: pg:db:tup_modified
        expr: irate(pg_db_tup_modified{}[1m])
      - record: pg:ins:tup_modified
        expr: sum without(datname) (pg:db:tup_modified)
      - record: pg:svc:tup_modified
        expr: sum by (cls, role) (pg:ins:tup_modified)
      - record: pg:cls:tup_modified
        expr: sum by (cls) (pg:ins:tup_modified{role="primary"})
      - record: pg:all:tup_modified
        expr: sum(pg:cls:tup_deleted)


      #==============================================================#
      #                      Object Access                           #
      #==============================================================#
      # table access
      - record: pg:table:idx_scan
        expr: rate(pg_table_idx_scan{}[1m])
      - record: pg:table:seq_scan
        expr: rate(pg_table_seq_scan{}[1m])
      - record: pg:table:qps_realtime
        expr: irate(pg_table_idx_scan{}[1m])

      # index access
      - record: pg:index:idx_scan
        expr: rate(pg_index_idx_scan{}[1m])
      - record: pg:index:qps_realtime
        expr: irate(pg_index_idx_scan{}[1m])

      # func access
      - record: pg:func:call
        expr: rate(pg_func_calls{}[1m])
      - record: pg:func:rt
        expr: rate(pg_func_total_time{}[1m]) / pg:func:call

      # query access
      - record: pg:query:call
        expr: rate(pg_query_calls{}[1m])
      - record: pg:query:rt
        expr: rate(pg_query_total_time{}[1m]) / pg:query:call / 1000



      #==============================================================#
      #                        Blocks IO                             #
      #==============================================================#
      # blocks read/hit/access in 1min
      - record: pg:db:blks_read_1m
        expr: increase(pg_db_blks_read{}[1m])
      - record: pg:db:blks_hit_1m
        expr: increase(pg_db_blks_hit{}[1m])
      - record: pg:db:blks_access_1m
        expr: increase(pg_db_blks_access{}[1m])

      # buffer hit rate (1m)
      - record: pg:db:buffer_hit_rate
        expr: pg:db:blks_hit_1m / pg:db:blks_access_1m
      - record: pg:ins:hit_rate
        expr: sum without(datname) (pg:db:blks_hit_1m) / sum without(datname) (pg:db:blks_access_1m)

      # read/write time usage
      - record: pg:db:read_time_usage
        expr: rate(pg_db_blk_read_time[1m])
      - record: pg:db:write_time_usage
        expr: rate(pg_db_blk_write_time[1m])
      - record: pg:db:io_time_usage
        expr: pg:db:read_time_usage + pg:db:write_time_usage



      #==============================================================#
      #                  Traffic IO (Pgbouncer)                      #
      #==============================================================#
      # transmit bandwidth (sent, out)
      - record: pg:db:tx
        expr: irate(pgbouncer_stat_total_sent{datname!="pgbouncer"}[1m])
      - record: pg:ins:tx
        expr: sum without (user, datname) (pg:db:tx)
      - record: pg:svc:tx
        expr: sum by (cls, role) (pg:ins:tx)
      - record: pg:cls:tx
        expr: sum by (cls) (pg:ins:tx)
      - record: pg:all:tx
        expr: sum(pg:cls:tx)

      # receive bandwidth (sent, out)
      - record: pg:db:rx
        expr: irate(pgbouncer_stat_total_received{datname!="pgbouncer"}[1m])
      - record: pg:ins:rx
        expr: sum without (datname) (pg:db:rx)
      - record: pg:svc:rx
        expr: sum by (cls, role) (pg:ins:rx)
      - record: pg:cls:rx
        expr: sum by (cls) (pg:ins:rx)
      - record: pg:all:rx
        expr: sum(pg:cls:rx)



      #==============================================================#
      #                          Lock                                #
      #==============================================================#
      # lock count by mode
      - record: pg:db:locks
        expr: pg_lock_count
      - record: pg:ins:locks
        expr: sum without(datname) (pg:db:locks)
      - record: pg:svc:locks
        expr: sum by (cls, role, mode) (pg:ins:locks)
      - record: pg:cls:locks
        expr: sum by (cls, mode) (pg:ins:locks)

      # total lock count
      - record: pg:db:lock_count
        expr: sum without (mode) (pg_lock_count{})
      - record: pg:ins:lock_count
        expr: sum without(datname) (pg:db:lock_count)
      - record: pg:svc:lock_count
        expr: sum by (cls, role) (pg:ins:lock_count)
      - record: pg:cls:lock_count
        expr: sum by (cls) (pg:ins:lock_count)

      # read category lock
      - record: pg:db:rlock
        expr: sum without (mode) (pg_lock_count{mode="AccessShareLock"})
      - record: pg:ins:rlock
        expr: sum without(datname) (pg:db:rlock)
      - record: pg:svc:rlock
        expr: sum by (cls, role) (pg:ins:rlock)
      - record: pg:cls:rlock
        expr: sum by (cls) (pg:ins:rlock)

      # write category lock (insert|update|delete)
      - record: pg:db:wlock
        expr: sum without (mode) (pg_lock_count{mode=~"RowShareLock|RowExclusiveLock"})
      - record: pg:ins:wlock
        expr: sum without(datname) (pg:db:wlock)
      - record: pg:svc:wlock
        expr: sum by (cls, role) (pg:ins:wlock)
      - record: pg:cls:wlock
        expr: sum by (cls) (pg:ins:wlock)

      # exclusive category lock
      - record: pg:db:xlock
        expr: sum without (mode) (pg_lock_count{mode=~"AccessExclusiveLock|ExclusiveLock|ShareRowExclusiveLock|ShareLock|ShareUpdateExclusiveLock"})
      - record: pg:ins:xlock
        expr: sum without(datname) (pg:db:xlock)
      - record: pg:svc:xlock
        expr: sum by (cls, role) (pg:ins:xlock)
      - record: pg:cls:xlock
        expr: sum by (cls) (pg:ins:xlock)


      #==============================================================#
      #                          Temp                                #
      #==============================================================#
      # temp files and bytes
      - record: pg:db:temp_bytes
        expr: rate(pg_db_temp_bytes{}[1m])
      - record: pg:ins:temp_bytes
        expr: sum without(datname) (pg:db:temp_bytes)
      - record: pg:svc:temp_bytes
        expr: sum by (cls, role) (pg:ins:temp_bytes)
      - record: pg:cls:temp_bytes
        expr: sum by (cls) (pg:ins:temp_bytes)

      # temp file count in last 1m
      - record: pg:db:temp_files
        expr: increase(pg_db_temp_files{}[1m])
      - record: pg:ins:temp_files
        expr: sum without(datname) (pg:db:temp_files)
      - record: pg:svc:temp_files
        expr: sum by (cls, role) (pg:ins:temp_files)
      - record: pg:cls:temp_files
        expr: sum by (cls) (pg:ins:temp_files)



      #==============================================================#
      #                           Size                               #
      #==============================================================#
      # database size
      - record: pg:ins:db_size
        expr: pg_size_database
      - record: pg:cls:db_size
        expr: sum by (cls) (pg:ins:db_size)
      # wal size
      - record: pg:ins:wal_size
        expr: pg_size_wal
      - record: pg:cls:wal_size
        expr: sum by (cls) (pg:ins:wal_size)
      # log size
      - record: pg:ins:log_size
        expr: pg_size_log
      - record: pg:cls:log_size
        expr: sum by (cls) (pg_size_log)



      #==============================================================#
      #                        Checkpoint                            #
      #==============================================================#
      # checkpoint stats
      - record: pg:ins:last_ckpt
        expr: pg_checkpoint_elapse
      - record: pg:ins:ckpt_timed
        expr: increase(pg_bgwriter_checkpoints_timed{}[30s])
      - record: pg:ins:ckpt_req
        expr: increase(pg_bgwriter_checkpoints_req{}[30s])
      - record: pg:cls:ckpt_1h
        expr: increase(pg:ins:ckpt_timed[1h]) + increase(pg:ins:ckpt_req[1h])

      # buffer flush & alloc
      - record: pg:ins:buf_flush_backend
        expr: irate(pg_bgwriter_buffers_backend{}[1m]) * 8192
      - record: pg:ins:buf_flush_checkpoint
        expr: irate(pg_bgwriter_buffers_checkpoint{}[1m]) * 8192

      - record: pg:ins:buf_flush
        expr: pg:ins:buf_flush_backend + pg:ins:buf_flush_checkpoint
      - record: pg:svc:buf_flush
        expr: sum by (cls, role) (pg:ins:buf_flush)
      - record: pg:cls:buf_flush
        expr: sum by (cls) (pg:ins:buf_flush)
      - record: pg:all:buf_flush
        expr: sum(pg:cls:buf_flush)

      - record: pg:ins:buf_alloc
        expr: irate(pg_bgwriter_buffers_alloc{}[1m]) * 8192
      - record: pg:svc:buf_alloc
        expr: sum by (cls, role) (pg:ins:buf_alloc)
      - record: pg:cls:buf_alloc
        expr: sum by (cls) (pg:ins:buf_alloc)
      - record: pg:all:buf_alloc
        expr: sum(pg:cls:buf_alloc)




      #==============================================================#
      #                           LSN                                #
      #==============================================================#
      # timeline & LSN
      - record: pg_timeline
        expr: pg_checkpoint_tli
      - record: pg:ins:redo_lsn
        expr: pg_checkpoint_redo_lsn
      - record: pg:ins:checkpoint_lsn
        expr: pg_checkpoint_checkpoint_lsn

      # wal rate
      - record: pg:ins:wal_rate
        expr: rate(pg_lsn[1m])
      - record: pg:cls:wal_rate
        expr: max by (cls) (pg:ins:wal_rate{role="primary"})
      - record: pg:all:wal_rate
        expr: sum(pg:cls:wal_rate)



      #==============================================================#
      #                       Replication                            #
      #==============================================================#
      # lag time from replica's view
      - record: pg:ins:lag_seconds
        expr: pg_lag
      - record: pg:cls:lag_seconds
        expr: max by (cls) (pg:ins:lag_seconds)
      - record: pg:all:lag_seconds
        expr: max(pg:cls:lag_seconds)

      # sync status
      - record: pg:ins:sync_status # application_name must set to replica ins name
        expr: max by (ins, svc, cls) (label_replace(pg_replication_sync_status, "ins", "$1", "application_name", "(.+)"))

      # lag of self (application_name must set to standby ins name)
      - record: pg:ins:lag_bytes
        expr: max by (ins, svc, cls, role) (label_replace(pg_replication_lsn{} - pg_replication_replay_lsn{}, "ins", "$1", "application_name", "(.+)"))
      - record: pg:cls:lag_bytes
        expr: max by (cls) (pg:ins:lag_bytes)
      - record: pg:all:lag_bytes
        expr: max(pg:cls:lag_bytes)

      # replication slot retained bytes
      - record: pg:ins:slot_retained_bytes
        expr: pg_slot_retained_bytes

      # replica walreceiver
      - record: pg:ins:recv_init_lsn
        expr: pg_walreceiver_init_lsn
      - record: pg:ins:recv_last_lsn
        expr: pg_walreceiver_last_lsn
      - record: pg:ins:recv_init_tli
        expr: pg_walreceiver_init_tli
      - record: pg:ins:recv_last_tli
        expr: pg_walreceiver_last_tli




      #==============================================================#
      # Cluster Level Metrics
      #==============================================================#
      # cluster member count
      - record: pg:cls:leader
        expr: count by (cls, ins) (max by (cls, ins) (pg_status{}) == 3)
      - record: pg:cls:size
        expr: count by (cls) (max by (cls, ins) (pg_up{}))
      - record: pg:cls:timeline
        expr: max by (cls) (pg_checkpoint_tli{})
      - record: pg:cls:primarys
        expr: count by (cls) (max by (cls, ins) (pg_in_recovery{}) == 0)
      - record: pg:cls:replicas
        expr: count by (cls) (max by (cls, ins) (pg_in_recovery{}) == 1)
      - record: pg:cls:synchronous
        expr: max by (cls) (pg_sync_standby_enabled) > bool 0
      - record: pg:cls:bridging_instances
        expr: count by (cls, role, ins, ip) (pg_replication_lsn{state="streaming", role!="primary"} > 0)
      - record: pg:cls:bridging
        expr: count by (cls) (pg:cls:bridging_instances)
      - record: pg:cls:cascading
        expr: count by (cls) (pg_replication_lsn{state="streaming", role!="primary"})





      #==============================================================#
      #                    Pgbouncer List                            #
      #==============================================================#
      # object list
      - record: pg:ins:pools
        expr: pgbouncer_list_items{list="pools"}
      - record: pg:ins:pool_databases
        expr: pgbouncer_list_items{list="databases"}
      - record: pg:ins:pool_users
        expr: pgbouncer_list_items{list="users"}
      - record: pg:ins:login_clients
        expr: pgbouncer_list_items{list="login_clients"}
      - record: pg:ins:free_clients
        expr: pgbouncer_list_items{list="free_clients"}
      - record: pg:ins:used_clients
        expr: pgbouncer_list_items{list="used_clients"}
      - record: pg:ins:free_servers
        expr: pgbouncer_list_items{list="free_servers"}



      #==============================================================#
      #                  DBConfig (Pgbouncer)                        #
      #==============================================================#
      - record: pg:db:pool_max_conn
        expr: pgbouncer_database_pool_size{datname!="pgbouncer"} + pgbouncer_database_reserve_pool{datname!="pgbouncer"}
      - record: pg:db:pool_size
        expr: pgbouncer_database_pool_size{datname!="pgbouncer"}
      - record: pg:db:pool_reserve_size
        expr: pgbouncer_database_reserve_pool{datname!="pgbouncer"}
      - record: pg:db:pool_current_conn
        expr: pgbouncer_database_current_connections{datname!="pgbouncer"}
      - record: pg:db:pool_paused
        expr: pgbouncer_database_paused{datname!="pgbouncer"}
      - record: pg:db:pool_disabled
        expr: pgbouncer_database_disabled{datname!="pgbouncer"}



      #==============================================================#
      #                  Waiting (Pgbouncer)                         #
      #==============================================================#
      # average wait time
      - record: pg:db:wait_rt
        expr: pgbouncer_stat_avg_wait_time{datname!="pgbouncer"} / 1000000

      # max wait time among all clients
      - record: pg:pool:maxwait
        expr: pgbouncer_pool_maxwait{datname!="pgbouncer"} + pgbouncer_pool_maxwait_us{datname!="pgbouncer"} / 1000000
      - record: pg:db:maxwait
        expr: max without(user) (pg:pool:maxwait)
      - record: pg:ins:maxwait
        expr: max without(user, datname) (pg:db:maxwait)
      - record: pg:svc:maxwait
        expr: max by (cls, role) (pg:ins:maxwait)
      - record: pg:cls:maxwait
        expr: max by (cls) (pg:ins:maxwait)
      - record: pg:all:maxwait
        expr: max(pg:cls:maxwait)

...

5 - Alert Rules

Alter rules definition of Pigsty

Prometheus Alert Rules

Node Alert Rules

################################################################
#                          Node Alert                          #
################################################################
- name: node-alert
  rules:

    # node exporter down for 1m triggers a P1 alert
    - alert: NODE_EXPORTER_DOWN
      expr: up{instance=~"^.*:(9100)$"} == 0
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Exporter Down: {{ $labels.ins }} {{ $value }}"
        description: |
          up[instance={{ $labels.instance }}] = {{ $value }} == 0
          https://dba.p1staff.com/d/node?var-ip={{ $labels.instance }}&from=now-5m&to=now&refresh=10s          



    #==============================================================#
    #                          CPU & Load                          #
    #==============================================================#
    # node avg CPU usage > 90% for 1m
    - alert: NODE_CPU_HIGH
      expr: node:ins:cpu_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node CPU High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:cpu_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=28&fullscreen&var-ip={{ $labels.ip }}          

    # node load5 > 100%
    - alert: NODE_LOAD_HIGH
      expr: node:ins:stdload5 > 1
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node Load High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:stdload5[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 100%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=37&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                      Disk & Filesystem                       #
    #==============================================================#
    # main fs readonly triggers an immediate P0 alert
    - alert: NODE_FS_READONLY
      expr: node_filesystem_readonly{fstype!~"(n|root|tmp)fs.*"} == 1
      labels:
        severity: P0
      annotations:
        summary: "P0 Node Filesystem Readonly: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node_filesystem_readonly{ins={{ $labels.ins }}, ip={{ $labels.ip }},fstype!~"(n|root|tmp)fs.*"} == 1
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=110&fullscreen&var-ip={{ $labels.ip }}          

    # main fs usage > 90% for 1m triggers P1 alert
    - alert: NODE_FS_SPACE_FULL
      expr: node:fs:space_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Filesystem Space Full: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:fs:space_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=110&fullscreen&var-ip={{ $labels.ip }}          

    # main fs inode usage > 90% for 1m triggers P1 alert
    - alert: NODE_FS_INODE_FULL
      expr: node:fs:inode_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Filesystem iNode Full: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:fs:inode_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=110&fullscreen&var-ip={{ $labels.ip }}          

    # fd usage > 90% for 1m triggers P1 alert
    - alert: NODE_FD_FULL
      expr: node:fs:fd_usage > 0.90
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node File Descriptor Full: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:fs:fd_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 90%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=58&fullscreen&var-ip={{ $labels.ip }}          


    # ssd read latency > 32ms for 3m (except long-read)
    - alert: NODE_READ_LATENCY_HIGH
      expr: node:dev:disk_read_rt  < 10000 and node:dev:disk_read_rt  > 0.032
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node Read Latency High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:dev:disk_read_rt[ins={{ $labels.ins }}, ip={{ $labels.ip }}, device={{ $labels.device }}] = {{ $value }} > 32ms
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=29&fullscreen&var-ip={{ $labels.ip }}          

    # ssd write latency > 16ms for 3m
    - alert: NODE_WRITE_LATENCY_HIGH
      expr: node:dev:disk_write_rt  < 10000 and node:dev:disk_write_rt  > 0.016
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node Write Latency High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:dev:disk_write_rt[ins={{ $labels.ins }}, ip={{ $labels.ip }}, device={{ $labels.device }}] = {{ $value }} > 16ms
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=29&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                           Memory                             #
    #==============================================================#
    # shared memory usage > 80% for 1m triggers a P1 alert
    - alert: NODE_MEM_HIGH
      expr: node:ins:mem_usage > 0.80
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node Mem High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:mem_usage[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 80%
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=40&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                      Network & TCP                           #
    #==============================================================#
    # node tcp listen overflow > 2 for 3m
    - alert: NODE_TCP_LISTEN_OVERFLOW
      expr: node:ins:tcp_overflow_rate > 2
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node TCP Listen Overflow: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:tcp_overflow_rate[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 2
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=55&fullscreen&var-ip={{ $labels.ip }}          

    # node tcp retrans > 32 per sec for 3m
    - alert: NODE_TCP_RETRANS_HIGH
      expr: node:ins:tcp_retranssegs > 32
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node TCP Retrans High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node:ins:tcp_retranssegs[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 32
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=52&fullscreen&var-ip={{ $labels.ip }}          

    # node tcp conn > 32768 for 1m
    - alert: NODE_TCP_CONN_HIGH
      expr: node_netstat_Tcp_CurrEstab > 32768
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P2 Node TCP Connection High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node_netstat_Tcp_CurrEstab[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 32768
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=54&fullscreen&var-ip={{ $labels.ip }}          



    #==============================================================#
    #                          Misc                                #
    #==============================================================#
    # node ntp offset > 1s for 1m
    - alert: NODE_NTP_OFFSET_HIGH
      expr: abs(node_ntp_offset_seconds) > 1
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 Node NTP Offset High: {{ $labels.ins }} {{ $labels.ip }}"
        description: |
          node_ntp_offset_seconds[ins={{ $labels.ins }}, ip={{ $labels.ip }}] = {{ $value }} > 32768
          http://g.pigsty/d/node?&from=now-10m&to=now&viewPanel=70&fullscreen&var-ip={{ $labels.ip }}          


数据库与连接池报警规则

---
################################################################
#                         PgSQL Alert                          #
################################################################
- name: pgsql-alert
  rules:

    #==============================================================#
    #                     Error / Aliveness                        #
    #==============================================================#
    # cluster size change triggers a P0 alert (warn: auto heal in 5min)
    - alert: PGSQL_CLUSTER_SHRINK
      expr: delta(pg:cls:size{}[5m]) < 0
      for: 15s
      labels:
        severity: P1
      annotations:
        summary: 'delta(pg:cls:size{cls={{ $labels.cls }}}[15s]) = {{ $value | printf "%.0f" }} < 0'
        description: |
                    http://g.pigsty/d/pg-cluster&from=now-10m&to=now&var-cls={{ $labels.cls }}


    # postgres down for 15s triggers a P0 alert
    - alert: PGSQL_DOWN
      expr: PGSQL_up{} == 0
      labels:
        severity: P0
      annotations:
        summary: "[P0] PGSQL_DOWN: {{ $labels.ins }} {{ $value }}"
        description: |
          PGSQL_up[ins={{ $labels.ins }}] = {{ $value }} == 0
          http://g.pigsty/d/pg-instance&from=now-10m&to=now&var-ins={{ $labels.ins }}          

    # pgbouncer down for 15s triggers a P0 alert
    - alert: PGBOUNCER_DOWN
      expr: pgbouncer_up{} == 0
      labels:
        severity: P0
      annotations:
        summary: "P0 Pgbouncer Down: {{ $labels.ins }} {{ $value }}"
        description: |
          pgbouncer_up[ins={{ $labels.ins }}] = {{ $value }} == 0
          http://g.pigsty/d/pg-pgbouncer&from=now-10m&to=now&var-ins={{ $labels.ins }}          

    # pg/pgbouncer exporter down for 1m triggers a P1 alert
    - alert: PGSQL_EXPORTER_DOWN
      expr: up{instance=~"^.*:(9630|9631)$"} == 0
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG/PGB Exporter Down: {{ $labels.ins }} {{ $labels.instance }} {{ $value }}"
        description: |
          up[instance={{ $labels.instance }}] = {{ $value }} == 0
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=262&fullscreen&var-ins={{ $labels.ins }}          



    #==============================================================#
    #                         Latency                              #
    #==============================================================#
    # replication break for 1m triggers a P1 alert (warn: heal in 5m)
    - alert: PGSQL_REPLICATION_BREAK
      expr: delta(PGSQL_downstream_count{state="streaming"}[5m]) < 0
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Replication Break: {{ $labels.ins }} {{ $value }}"
        description: |
          PGSQL_downstream_count_delta[ins={{ $labels.ins }}] = {{ $value }} < 0
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=180&fullscreen&var-ins={{ $labels.ins }}          

    # replication lag greater than 8 second for 3m triggers a P1 alert
    - alert: PGSQL_REPLICATION_LAG
      expr: PGSQL_replication_replay_lag{application_name!='PGSQL_receivewal'} > 8
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Replication Lagged: {{ $labels.ins }} {{ $value }}"
        description: |
          PGSQL_replication_replay_lag[ins={{ $labels.ins }}] = {{ $value }} > 8s
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=384&fullscreen&var-ins={{ $labels.ins }}          

    # pg avg response time > 16ms
    - alert: PGSQL_QUERY_RT_HIGH
      expr: pg:ins:query_rt > 0.016
      for: 1m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Query Response Time High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:query_rt[ins={{ $labels.ins }}] = {{ $value }} > 16ms
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=137&fullscreen&var-ins={{ $labels.ins }}          


    #==============================================================#
    #                        Saturation                            #
    #==============================================================#
    # pg load1 high than 70% for 3m triggers a P1 alert
    - alert: PGSQL_LOAD_HIGH
      expr: pg:ins:load1{} > 0.70
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Load High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:load1[ins={{ $labels.ins }}] = {{ $value }} > 70%
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=210&fullscreen&var-ins={{ $labels.ins }}          

    # pg active backend more than 2 times of available cpu cores for 3m triggers a P1 alert
    - alert: PGSQL_BACKEND_HIGH
      expr: pg:ins:active_backends / on(ins) node:ins:cpu_count > 2
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Backend High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:active_backends/node:ins:cpu_count[ins={{ $labels.ins }}] = {{ $value }} > 2
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=150&fullscreen&var-ins={{ $labels.ins }}          

    # max idle xact duration exceed 3m
    - alert: PGSQL_IDLE_XACT_BACKEND_HIGH
      expr: pg:ins:ixact_backends > 1
      for: 3m
      labels:
        severity: P2
      annotations:
        summary: "P1 PG Idle In Transaction Backend High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:ixact_backends[ins={{ $labels.ins }}] = {{ $value }} > 1
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=161&fullscreen&var-ins={{ $labels.ins }}          


    # 2 waiting clients for 3m triggers a P1 alert
    - alert: PGSQL_CLIENT_QUEUING
      expr: pg:ins:waiting_clients > 2
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Client Queuing: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:waiting_clients[ins={{ $labels.ins }}] = {{ $value }} > 2
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=159&fullscreen&var-ins={{ $labels.ins }}          

    # age wrap around (near half) triggers a P1 alert
    - alert: PGSQL_AGE_HIGH
      expr: pg:ins:age > 1000000000
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 PG Age High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:age[ins={{ $labels.ins }}] = {{ $value }} > 1000000000
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=172&fullscreen&var-ins={{ $labels.ins }}          



    #==============================================================#
    #                         Traffic                              #
    #==============================================================#
    # more than 30k TPS lasts for 3m triggers a P1 (pgbouncer bottleneck)
    - alert: PGSQL_TPS_HIGH
      expr: pg:ins:xacts > 30000
      for: 3m
      labels:
        severity: P1
      annotations:
        summary: "P1 Postgres TPS High: {{ $labels.ins }} {{ $value }}"
        description: |
          pg:ins:xacts[ins={{ $labels.ins }}] = {{ $value }} > 30000
          http://g.pigsty/d/pg-instance?from=now-10m&to=now&viewPanel=125&fullscreen&var-ins={{ $labels.ins }}          

...

6 - Sandbox Stdout

standard output of sandbox bootstrap

Makefile shortcuts and their expected outputs

Overview

# Clone the repo and enter it
cd /tmp && git clone git@github.com:Vonng/pigsty.git && cd pigsty

make up         # 拉起vagrant虚拟机
make ssh        # 配置虚拟机ssh访问      【单次,下次启动无需再次执行】
sudo make dns   # 写入Pigsty静态DNS域名  【sudo输入密码,可选,单次】
make download   # 下载最新离线软件包      【可选,可显著加速初始化】
make upload     # 将离线软件包上传至元节点
make init       # 初始化Pigsty
make mon-view   # 打开Pigsty监控首页(默认用户密码:admin:admin)

clone

克隆并进入项目目录,后续操作均位于项目根目录中(以/tmp/pigsty为例)

cd /tmp && git clone git@github.com:Vonng/pigsty.git && cd pigsty

clean

清理所有的沙箱痕迹(如果有)

$ make clean
cd vagrant && vagrant destroy -f --parallel; exit 0
==> vagrant: A new version of Vagrant is available: 2.2.14 (installed version: 2.2.13)!
==> vagrant: To upgrade visit: https://www.vagrantup.com/downloads.html

==> node-3: Forcing shutdown of VM...
==> node-3: Destroying VM and associated drives...
==> node-2: Forcing shutdown of VM...
==> node-2: Destroying VM and associated drives...
==> node-1: Forcing shutdown of VM...
==> node-1: Destroying VM and associated drives...
==> meta: Forcing shutdown of VM...
==> meta: Destroying VM and associated drives...

up

执行make up将调用vagrant up命令,根据Vagrantfile中的定义,使用Virtualbox创建四台虚拟机。

请注意第一次执行vagrant up时,软件会自动从官网下载 CentOS/7 的虚拟机镜像。如果您的网络状况不佳(例如没有FQ代理),则可能需要等待相当长的一段时间。您也可以选择自己创建虚拟机,并根据 部署 一章的说明进行Pigsty部署(不建议)。

$ make up
cd vagrant && vagrant up
Bringing machine 'meta' up with 'virtualbox' provider...
Bringing machine 'node-1' up with 'virtualbox' provider...
Bringing machine 'node-2' up with 'virtualbox' provider...
Bringing machine 'node-3' up with 'virtualbox' provider...
==> meta: Cloning VM...
==> meta: Matching MAC address for NAT networking...
==> meta: Setting the name of the VM: vagrant_meta_1614587906789_29514
==> meta: Clearing any previously set network interfaces...
==> meta: Preparing network interfaces based on configuration...
    meta: Adapter 1: nat
    meta: Adapter 2: hostonly
==> meta: Forwarding ports...
    meta: 22 (guest) => 2222 (host) (adapter 1)
==> meta: Running 'pre-boot' VM customizations...
==> meta: Booting VM...
==> meta: Waiting for machine to boot. This may take a few minutes...
    meta: SSH address: 127.0.0.1:2222
    meta: SSH username: vagrant
    meta: SSH auth method: private key
==> meta: Machine booted and ready!
==> meta: Checking for guest additions in VM...
    meta: No guest additions were detected on the base box for this VM! Guest
    meta: additions are required for forwarded ports, shared folders, host only
    meta: networking, and more. If SSH fails on this machine, please install
    meta: the guest additions and repackage the box to continue.
    meta:
    meta: This is not an error message; everything may continue to work properly,
    meta: in which case you may ignore this message.
==> meta: Setting hostname...
==> meta: Configuring and enabling network interfaces...
==> meta: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> meta: Running provisioner: shell...
    meta: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-1jv6obp.sh
    meta: [INFO] write ssh config to /home/vagrant/.ssh
==> node-1: Cloning VM...
==> node-1: Matching MAC address for NAT networking...
==> node-1: Setting the name of the VM: vagrant_node-1_1614587930603_84690
==> node-1: Fixed port collision for 22 => 2222. Now on port 2200.
==> node-1: Clearing any previously set network interfaces...
==> node-1: Preparing network interfaces based on configuration...
    node-1: Adapter 1: nat
    node-1: Adapter 2: hostonly
==> node-1: Forwarding ports...
    node-1: 22 (guest) => 2200 (host) (adapter 1)
==> node-1: Running 'pre-boot' VM customizations...
==> node-1: Booting VM...
==> node-1: Waiting for machine to boot. This may take a few minutes...
    node-1: SSH address: 127.0.0.1:2200
    node-1: SSH username: vagrant
    node-1: SSH auth method: private key
==> node-1: Machine booted and ready!
==> node-1: Checking for guest additions in VM...
    node-1: No guest additions were detected on the base box for this VM! Guest
    node-1: additions are required for forwarded ports, shared folders, host only
    node-1: networking, and more. If SSH fails on this machine, please install
    node-1: the guest additions and repackage the box to continue.
    node-1:
    node-1: This is not an error message; everything may continue to work properly,
    node-1: in which case you may ignore this message.
==> node-1: Setting hostname...
==> node-1: Configuring and enabling network interfaces...
==> node-1: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> node-1: Running provisioner: shell...
    node-1: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-5w83e1.sh
    node-1: [INFO] write ssh config to /home/vagrant/.ssh
==> node-2: Cloning VM...
==> node-2: Matching MAC address for NAT networking...
==> node-2: Setting the name of the VM: vagrant_node-2_1614587953786_32441
==> node-2: Fixed port collision for 22 => 2222. Now on port 2201.
==> node-2: Clearing any previously set network interfaces...
==> node-2: Preparing network interfaces based on configuration...
    node-2: Adapter 1: nat
    node-2: Adapter 2: hostonly
==> node-2: Forwarding ports...
    node-2: 22 (guest) => 2201 (host) (adapter 1)
==> node-2: Running 'pre-boot' VM customizations...
==> node-2: Booting VM...
==> node-2: Waiting for machine to boot. This may take a few minutes...
    node-2: SSH address: 127.0.0.1:2201
    node-2: SSH username: vagrant
    node-2: SSH auth method: private key
==> node-2: Machine booted and ready!
==> node-2: Checking for guest additions in VM...
    node-2: No guest additions were detected on the base box for this VM! Guest
    node-2: additions are required for forwarded ports, shared folders, host only
    node-2: networking, and more. If SSH fails on this machine, please install
    node-2: the guest additions and repackage the box to continue.
    node-2:
    node-2: This is not an error message; everything may continue to work properly,
    node-2: in which case you may ignore this message.
==> node-2: Setting hostname...
==> node-2: Configuring and enabling network interfaces...
==> node-2: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> node-2: Running provisioner: shell...
    node-2: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-1xljcde.sh
    node-2: [INFO] write ssh config to /home/vagrant/.ssh
==> node-3: Cloning VM...
==> node-3: Matching MAC address for NAT networking...
==> node-3: Setting the name of the VM: vagrant_node-3_1614587977533_52921
==> node-3: Fixed port collision for 22 => 2222. Now on port 2202.
==> node-3: Clearing any previously set network interfaces...
==> node-3: Preparing network interfaces based on configuration...
    node-3: Adapter 1: nat
    node-3: Adapter 2: hostonly
==> node-3: Forwarding ports...
    node-3: 22 (guest) => 2202 (host) (adapter 1)
==> node-3: Running 'pre-boot' VM customizations...
==> node-3: Booting VM...
==> node-3: Waiting for machine to boot. This may take a few minutes...
    node-3: SSH address: 127.0.0.1:2202
    node-3: SSH username: vagrant
    node-3: SSH auth method: private key
==> node-3: Machine booted and ready!
==> node-3: Checking for guest additions in VM...
    node-3: No guest additions were detected on the base box for this VM! Guest
    node-3: additions are required for forwarded ports, shared folders, host only
    node-3: networking, and more. If SSH fails on this machine, please install
    node-3: the guest additions and repackage the box to continue.
    node-3:
    node-3: This is not an error message; everything may continue to work properly,
    node-3: in which case you may ignore this message.
==> node-3: Setting hostname...
==> node-3: Configuring and enabling network interfaces...
==> node-3: Rsyncing folder: /Volumes/Data/pigsty/vagrant/ => /vagrant
==> node-3: Running provisioner: shell...
    node-3: Running: /var/folders/_5/_0mbf4292pl9y4xgy0kn2r1h0000gn/T/vagrant-shell20210301-60046-1cykx8o.sh
    node-3: [INFO] write ssh config to /home/vagrant/.ssh

ssh

新拉起的虚拟机默认用户为vagrant,需要配置本机到虚拟机的免密ssh访问。 执行make ssh命令将调用vagrant的ssh-config命令,将pigsty虚拟机节点的ssh配置文件写入~/.ssh/pigsty_config

通常该命令只需要在首次启动沙箱时执行一次,后续重新拉起的虚拟机通常会保有相同的SSH配置。

执行完毕后,用户才可以使用类似ssh node-1的方式通过SSH别名连接至沙箱内的虚拟机节点。

$ make ssh
cd vagrant && vagrant ssh-config > ~/.ssh/pigsty_config 2>/dev/null; true
if ! grep --quiet "pigsty_config" ~/.ssh/config ; then (echo 'Include ~/.ssh/pigsty_config' && cat ~/.ssh/config) >  ~/.ssh/config.tmp; mv ~/.ssh/config.tmp ~/.ssh/config && chmod 0600 ~/.ssh/config; fi
if ! grep --quiet "StrictHostKeyChecking=no" ~/.ssh/config ; then (echo 'StrictHostKeyChecking=no' && cat ~/.ssh/config) >  ~/.ssh/config.tmp; mv ~/.ssh/config.tmp ~/.ssh/config && chmod 0600 ~/.ssh/config; fi

dns

此命令将Pigsty沙箱虚拟机的静态DNS配置写入/etc/hosts,通常该命令只需要在首次启动沙箱时执行一次。

执行完毕后,用户才可以从本地浏览器使用域名访问 http://g.pigsty 等WebUI。

注意DNS命令需要SUDO权限执行,需要输入密码,因为/etc/hosts文件需要特权方可修改。

$ sudo make dns
Password: #<在此输入用户密码>
if ! grep --quiet "pigsty dns records" /etc/hosts ; then cat files/dns >> /etc/hosts; fi

download

从CDN下载最新的Pigsty离线安装包至本地,大小约1GB,约1分钟下载完成。

$ make download
curl http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg.tgz -o files/pkg.tgz
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1067M  100 1067M    0     0  15.2M      0  0:01:10  0:01:10 --:--:-- 29.0M

Pigsty是一个复杂的软件系统,为了确保系统的稳定,Pigsty会在初始化过程中从互联网下载所有依赖的软件包并建立本地Yum源。

所有依赖的软件总大小约1GB左右,下载速度取决于您的网络情况。尽管Pigsty已经尽量使用镜像源以加速下载,但少量包的下载仍可能受到防火墙的阻挠,可能出现非常慢的情况。您可以通过proxy_env配置项设置下载代理以完成首次下载,或直接下载预先打包好的离线安装包。

最新的离线安装包地址为:

Github Release:https://github.com/Vonng/pigsty/releases

CDN Download:http://pigsty-1304147732.cos.accelerate.myqcloud.com/pkg.tgz

您也可以手工下载好后放置于files/pkg.tgz

upload

将下载的离线安装包上传元节点并解压,加速后续初始化。

$ make upload
ssh -t meta "sudo rm -rf /tmp/pkg.tgz"
Connection to 127.0.0.1 closed.
scp -r files/pkg.tgz meta:/tmp/pkg.tgz
pkg.tgz                                                                                                                                                                 100% 1068MB  53.4MB/s   00:19
ssh -t meta "sudo mkdir -p /www/pigsty/; sudo rm -rf /www/pigsty/*; sudo tar -xf /tmp/pkg.tgz --strip-component=1 -C /www/pigsty/"
Connection to 127.0.0.1 closed.

init

完成上述操作后,执行make init即会调用ansible完成Pigsty系统的初始化。

$ make init
./sandbox.yml   # 快速初始化,并行初始化元节点与普通数据库节点

sandbox.yml是专门为本地沙箱环境准备的初始化剧本,通过同时初始化元节点和数据库节点节省了一半时间。 生产环境建议使用infra.ymlpgsql.yml分别依次完成元节点与普通节点的初始化。

如果您已经将离线安装包上传至元节点,那么初始化环境会比较快,视机器配置可能总共需要5~10分钟不等。

若离线安装包不存在,那么Pigsty会在初始化过程中从互联网下载约1GB数据,视网络条件可能需要20分钟或更久。

$ make init
./sandbox.yml                       # interleave sandbox provisioning
[WARNING]: Invalid characters were found in group names but not replaced, use -vvvv to see details

PLAY [Init local repo] ***********************************************************************************************************************************************************************************

TASK [repo : Create local repo directory] ****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Backup & remove existing repos] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Add required upstream repos] ****************************************************************************************************************************************************************
[WARNING]: Using a variable for a task's 'args' is unsafe in some situations (see https://docs.ansible.com/ansible/devel/reference_appendices/faq.html#argsplat-unsafe)
changed: [10.10.10.10] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
changed: [10.10.10.10] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
changed: [10.10.10.10] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
changed: [10.10.10.10] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
changed: [10.10.10.10] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
changed: [10.10.10.10] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
changed: [10.10.10.10] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
changed: [10.10.10.10] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
changed: [10.10.10.10] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
changed: [10.10.10.10] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})

TASK [repo : Check repo pkgs cache exists] ***************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Set fact whether repo_exists] ***************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Move upstream repo to backup] ***************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Add local file system repos] ****************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Remake yum cache if not exists] *************************************************************************************************************************************************************
[WARNING]: Consider using the yum module rather than running 'yum'.  If you need to use command because yum is insufficient you can add 'warn: false' to this command task or set
'command_warnings=False' in ansible.cfg to get rid of this message.
changed: [10.10.10.10]

TASK [repo : Install repo bootstrap packages] ************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=['yum-utils', 'createrepo', 'ansible', 'nginx', 'wget'])

TASK [repo : Render repo nginx server files] *************************************************************************************************************************************************************
changed: [10.10.10.10] => (item={'src': 'index.html.j2', 'dest': '/www/index.html'})
changed: [10.10.10.10] => (item={'src': 'default.conf.j2', 'dest': '/etc/nginx/conf.d/default.conf'})
changed: [10.10.10.10] => (item={'src': 'local.repo.j2', 'dest': '/www/pigsty.repo'})
changed: [10.10.10.10] => (item={'src': 'nginx.conf.j2', 'dest': '/etc/nginx/nginx.conf'})

TASK [repo : Disable selinux for repo server] ************************************************************************************************************************************************************
[WARNING]: SELinux state temporarily changed from 'enforcing' to 'permissive'. State change will take effect next reboot.
changed: [10.10.10.10]

TASK [repo : Launch repo nginx server] *******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [repo : Waits repo server online] *******************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [repo : Download web url packages] ******************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=https://github.com/Vonng/pg_exporter/releases/download/v0.3.2/pg_exporter-0.3.2-1.el7.x86_64.rpm)
skipping: [10.10.10.10] => (item=https://github.com/cybertec-postgresql/vip-manager/releases/download/v0.6/vip-manager_0.6-1_amd64.rpm)
skipping: [10.10.10.10] => (item=http://guichaz.free.fr/polysh/files/polysh-0.4-1.noarch.rpm)

TASK [repo : Download repo packages] *********************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=epel-release nginx wget yum-utils yum createrepo)
skipping: [10.10.10.10] => (item=ntp chrony uuid lz4 nc pv jq vim-enhanced make patch bash lsof wget unzip git tuned)
skipping: [10.10.10.10] => (item=readline zlib openssl libyaml libxml2 libxslt perl-ExtUtils-Embed ca-certificates)
skipping: [10.10.10.10] => (item=numactl grubby sysstat dstat iotop bind-utils net-tools tcpdump socat ipvsadm telnet)
skipping: [10.10.10.10] => (item=grafana prometheus2 pushgateway alertmanager)
skipping: [10.10.10.10] => (item=node_exporter postgres_exporter nginx_exporter blackbox_exporter)
skipping: [10.10.10.10] => (item=consul consul_exporter consul-template etcd)
skipping: [10.10.10.10] => (item=ansible python python-pip python-psycopg2 audit)
skipping: [10.10.10.10] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.10] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.10] => (item=haproxy keepalived dnsmasq)
skipping: [10.10.10.10] => (item=patroni patroni-consul patroni-etcd pgbouncer pg_cli pgbadger pg_activity)
skipping: [10.10.10.10] => (item=pgcenter boxinfo check_postgres emaj pgbconsole pg_bloat_check pgquarrel)
skipping: [10.10.10.10] => (item=barman barman-cli pgloader pgFormatter pitrery pspg pgxnclient PyGreSQL pgadmin4 tail_n_mail)
skipping: [10.10.10.10] => (item=postgresql13* postgis31* citus_13 timescaledb_13)
skipping: [10.10.10.10] => (item=pg_repack13 pg_squeeze13)
skipping: [10.10.10.10] => (item=pg_qualstats13 pg_stat_kcache13 system_stats_13 bgw_replstatus13)
skipping: [10.10.10.10] => (item=plr13 plsh13 plpgsql_check_13 plproxy13 plr13 plsh13 plpgsql_check_13 pldebugger13)
skipping: [10.10.10.10] => (item=hdfs_fdw_13 mongo_fdw13 mysql_fdw_13 ogr_fdw13 redis_fdw_13 pgbouncer_fdw13)
skipping: [10.10.10.10] => (item=wal2json13 count_distinct13 ddlx_13 geoip13 orafce13)
skipping: [10.10.10.10] => (item=rum_13 hypopg_13 ip4r13 jsquery_13 logerrors_13 periods_13 pg_auto_failover_13 pg_catcheck13)
skipping: [10.10.10.10] => (item=pg_fkpart13 pg_jobmon13 pg_partman13 pg_prioritize_13 pg_track_settings13 pgaudit15_13)
skipping: [10.10.10.10] => (item=pgcryptokey13 pgexportdoc13 pgimportdoc13 pgmemcache-13 pgmp13 pgq-13)
skipping: [10.10.10.10] => (item=pguint13 pguri13 prefix13  safeupdate_13 semver13  table_version13 tdigest13)

TASK [repo : Download repo pkg deps] *********************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=epel-release nginx wget yum-utils yum createrepo)
skipping: [10.10.10.10] => (item=ntp chrony uuid lz4 nc pv jq vim-enhanced make patch bash lsof wget unzip git tuned)
skipping: [10.10.10.10] => (item=readline zlib openssl libyaml libxml2 libxslt perl-ExtUtils-Embed ca-certificates)
skipping: [10.10.10.10] => (item=numactl grubby sysstat dstat iotop bind-utils net-tools tcpdump socat ipvsadm telnet)
skipping: [10.10.10.10] => (item=grafana prometheus2 pushgateway alertmanager)
skipping: [10.10.10.10] => (item=node_exporter postgres_exporter nginx_exporter blackbox_exporter)
skipping: [10.10.10.10] => (item=consul consul_exporter consul-template etcd)
skipping: [10.10.10.10] => (item=ansible python python-pip python-psycopg2 audit)
skipping: [10.10.10.10] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.10] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.10] => (item=haproxy keepalived dnsmasq)
skipping: [10.10.10.10] => (item=patroni patroni-consul patroni-etcd pgbouncer pg_cli pgbadger pg_activity)
skipping: [10.10.10.10] => (item=pgcenter boxinfo check_postgres emaj pgbconsole pg_bloat_check pgquarrel)
skipping: [10.10.10.10] => (item=barman barman-cli pgloader pgFormatter pitrery pspg pgxnclient PyGreSQL pgadmin4 tail_n_mail)
skipping: [10.10.10.10] => (item=postgresql13* postgis31* citus_13 timescaledb_13)
skipping: [10.10.10.10] => (item=pg_repack13 pg_squeeze13)
skipping: [10.10.10.10] => (item=pg_qualstats13 pg_stat_kcache13 system_stats_13 bgw_replstatus13)
skipping: [10.10.10.10] => (item=plr13 plsh13 plpgsql_check_13 plproxy13 plr13 plsh13 plpgsql_check_13 pldebugger13)
skipping: [10.10.10.10] => (item=hdfs_fdw_13 mongo_fdw13 mysql_fdw_13 ogr_fdw13 redis_fdw_13 pgbouncer_fdw13)
skipping: [10.10.10.10] => (item=wal2json13 count_distinct13 ddlx_13 geoip13 orafce13)
skipping: [10.10.10.10] => (item=rum_13 hypopg_13 ip4r13 jsquery_13 logerrors_13 periods_13 pg_auto_failover_13 pg_catcheck13)
skipping: [10.10.10.10] => (item=pg_fkpart13 pg_jobmon13 pg_partman13 pg_prioritize_13 pg_track_settings13 pgaudit15_13)
skipping: [10.10.10.10] => (item=pgcryptokey13 pgexportdoc13 pgimportdoc13 pgmemcache-13 pgmp13 pgq-13)
skipping: [10.10.10.10] => (item=pguint13 pguri13 prefix13  safeupdate_13 semver13  table_version13 tdigest13)

TASK [repo : Create local repo index] ********************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [repo : Copy bootstrap scripts] *********************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [repo : Mark repo cache as valid] *******************************************************************************************************************************************************************
skipping: [10.10.10.10]

PLAY [Provision Node] ************************************************************************************************************************************************************************************

TASK [node : Update node hostname] ***********************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Add new hostname to /etc/hosts] *************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Write static dns records] *******************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=10.10.10.10 yum.pigsty)
changed: [10.10.10.11] => (item=10.10.10.10 yum.pigsty)
changed: [10.10.10.13] => (item=10.10.10.10 yum.pigsty)
changed: [10.10.10.12] => (item=10.10.10.10 yum.pigsty)

TASK [node : Get old nameservers] ************************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Truncate resolv file] ***********************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Write resolv options] ***********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.12] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.10] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.13] => (item=options single-request-reopen timeout:1 rotate)
changed: [10.10.10.11] => (item=domain service.consul)
changed: [10.10.10.12] => (item=domain service.consul)
changed: [10.10.10.13] => (item=domain service.consul)
changed: [10.10.10.10] => (item=domain service.consul)

TASK [node : Add new nameservers] ************************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=10.10.10.10)
changed: [10.10.10.12] => (item=10.10.10.10)
changed: [10.10.10.10] => (item=10.10.10.10)
changed: [10.10.10.13] => (item=10.10.10.10)

TASK [node : Append old nameservers] *********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=10.0.2.3)
changed: [10.10.10.12] => (item=10.0.2.3)
changed: [10.10.10.10] => (item=10.0.2.3)
changed: [10.10.10.13] => (item=10.0.2.3)

TASK [node : Node configure disable firewall] ************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [node : Node disable selinux by default] ************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
[WARNING]: SELinux state change will take effect next reboot
ok: [10.10.10.10]

TASK [node : Backup existing repos] **********************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Install upstream repo] **********************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.10] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.12] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.13] => (item={'name': 'base', 'description': 'CentOS-$releasever - Base - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/os/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.11] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.13] => (item={'name': 'updates', 'description': 'CentOS-$releasever - Updates - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.13] => (item={'name': 'extras', 'description': 'CentOS-$releasever - Extras - Aliyun Mirror', 'baseurl': ['http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/', 'http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/', 'http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/'], 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.12] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.13] => (item={'name': 'epel', 'description': 'CentOS $releasever - EPEL - Aliyun Mirror', 'baseurl': 'http://mirrors.aliyun.com/epel/$releasever/$basearch', 'gpgcheck': False, 'failovermethod': 'priority'})
skipping: [10.10.10.13] => (item={'name': 'grafana', 'description': 'Grafana - TsingHua Mirror', 'gpgcheck': False, 'baseurl': 'https://mirrors.tuna.tsinghua.edu.cn/grafana/yum/rpm'})
skipping: [10.10.10.10] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.10] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.11] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.10] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.11] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.12] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.10] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.11] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.12] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.10] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.11] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.12] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.10] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'prometheus', 'description': 'Prometheus and exporters', 'gpgcheck': False, 'baseurl': 'https://packagecloud.io/prometheus-rpm/release/el/$releasever/$basearch'})
skipping: [10.10.10.11] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.12] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.13] => (item={'name': 'pgdg-common', 'description': 'PostgreSQL common RPMs for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/common/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.10] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.11] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.12] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.10] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'pgdg13', 'description': 'PostgreSQL 13 for RHEL/CentOS $releasever - $basearch', 'gpgcheck': False, 'baseurl': 'http://mirrors.zju.edu.cn/postgresql/repos/yum/13/redhat/rhel-$releasever-$basearch'})
skipping: [10.10.10.11] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.12] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'centos-sclo', 'description': 'CentOS-$releasever - SCLo', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-sclo'})
skipping: [10.10.10.11] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})
skipping: [10.10.10.12] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'centos-sclo-rh', 'description': 'CentOS-$releasever - SCLo rh', 'gpgcheck': False, 'mirrorlist': 'http://mirrorlist.centos.org?arch=$basearch&release=7&repo=sclo-rh'})
skipping: [10.10.10.12] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'nginx', 'description': 'Nginx Official Yum Repo', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'http://nginx.org/packages/centos/$releasever/$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'haproxy', 'description': 'Copr repo for haproxy', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/roidelapluie/haproxy/epel-$releasever-$basearch/'})
skipping: [10.10.10.13] => (item={'name': 'harbottle', 'description': 'Copr repo for main owned by harbottle', 'skip_if_unavailable': True, 'gpgcheck': False, 'baseurl': 'https://download.copr.fedorainfracloud.org/results/harbottle/main/epel-$releasever-$basearch/'})

TASK [node : Install local repo] *************************************************************************************************************************************************************************
changed: [10.10.10.13] => (item=http://yum.pigsty/pigsty.repo)
changed: [10.10.10.12] => (item=http://yum.pigsty/pigsty.repo)
changed: [10.10.10.11] => (item=http://yum.pigsty/pigsty.repo)
changed: [10.10.10.10] => (item=http://yum.pigsty/pigsty.repo)

TASK [node : Install node basic packages] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=[])
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])

TASK [node : Install node extra packages] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=[])
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])

TASK [node : Install meta specific packages] *************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=[])
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])

TASK [node : Install node basic packages] ****************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])
changed: [10.10.10.13] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])
changed: [10.10.10.11] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])
changed: [10.10.10.12] => (item=['wget,yum-utils,ntp,chrony,tuned,uuid,lz4,vim-minimal,make,patch,bash,lsof,wget,unzip,git,readline,zlib,openssl', 'numactl,grubby,sysstat,dstat,iotop,bind-utils,net-tools,tcpdump,socat,ipvsadm,telnet,tuned,pv,jq', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography', 'node_exporter,consul,consul-template,etcd,haproxy,keepalived,vip-manager'])

TASK [node : Install node extra packages] ****************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])
changed: [10.10.10.12] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])
changed: [10.10.10.13] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])
changed: [10.10.10.10] => (item=['patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity'])

TASK [node : Install meta specific packages] *************************************************************************************************************************************************************
skipping: [10.10.10.11] => (item=[])
skipping: [10.10.10.12] => (item=[])
skipping: [10.10.10.13] => (item=[])
changed: [10.10.10.10] => (item=['grafana,prometheus2,alertmanager,nginx_exporter,blackbox_exporter,pushgateway', 'dnsmasq,nginx,ansible,pgbadger,polysh'])

TASK [node : Node configure disable numa] ****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Node configure disable swap] ****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Node configure unmount swap] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=swap)
skipping: [10.10.10.10] => (item=none)
skipping: [10.10.10.11] => (item=swap)
skipping: [10.10.10.11] => (item=none)
skipping: [10.10.10.12] => (item=swap)
skipping: [10.10.10.12] => (item=none)
skipping: [10.10.10.13] => (item=swap)
skipping: [10.10.10.13] => (item=none)

TASK [node : Node setup static network] ******************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Node configure disable firewall] ************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [node : Node configure disk prefetch] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Enable linux kernel modules] ****************************************************************************************************************************************************************
changed: [10.10.10.13] => (item=softdog)
changed: [10.10.10.12] => (item=softdog)
changed: [10.10.10.11] => (item=softdog)
changed: [10.10.10.10] => (item=softdog)
changed: [10.10.10.13] => (item=br_netfilter)
changed: [10.10.10.12] => (item=br_netfilter)
changed: [10.10.10.11] => (item=br_netfilter)
changed: [10.10.10.10] => (item=br_netfilter)
changed: [10.10.10.12] => (item=ip_vs)
changed: [10.10.10.13] => (item=ip_vs)
changed: [10.10.10.11] => (item=ip_vs)
changed: [10.10.10.10] => (item=ip_vs)
changed: [10.10.10.13] => (item=ip_vs_rr)
changed: [10.10.10.12] => (item=ip_vs_rr)
changed: [10.10.10.11] => (item=ip_vs_rr)
changed: [10.10.10.10] => (item=ip_vs_rr)
ok: [10.10.10.13] => (item=ip_vs_rr)
ok: [10.10.10.12] => (item=ip_vs_rr)
ok: [10.10.10.11] => (item=ip_vs_rr)
ok: [10.10.10.10] => (item=ip_vs_rr)
changed: [10.10.10.13] => (item=ip_vs_wrr)
changed: [10.10.10.12] => (item=ip_vs_wrr)
changed: [10.10.10.11] => (item=ip_vs_wrr)
changed: [10.10.10.10] => (item=ip_vs_wrr)
changed: [10.10.10.13] => (item=ip_vs_sh)
changed: [10.10.10.12] => (item=ip_vs_sh)
changed: [10.10.10.11] => (item=ip_vs_sh)
changed: [10.10.10.10] => (item=ip_vs_sh)
changed: [10.10.10.13] => (item=nf_conntrack_ipv4)
changed: [10.10.10.12] => (item=nf_conntrack_ipv4)
changed: [10.10.10.11] => (item=nf_conntrack_ipv4)
changed: [10.10.10.10] => (item=nf_conntrack_ipv4)

TASK [node : Enable kernel module on reboot] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.10]

TASK [node : Get config parameter page count] ************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Get config parameter page size] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Tune shmmax and shmall via mem] *************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [node : Create tuned profiles] **********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=oltp)
changed: [10.10.10.12] => (item=oltp)
changed: [10.10.10.10] => (item=oltp)
changed: [10.10.10.13] => (item=oltp)
changed: [10.10.10.11] => (item=olap)
changed: [10.10.10.12] => (item=olap)
changed: [10.10.10.13] => (item=olap)
changed: [10.10.10.10] => (item=olap)
changed: [10.10.10.11] => (item=crit)
changed: [10.10.10.12] => (item=crit)
changed: [10.10.10.13] => (item=crit)
changed: [10.10.10.10] => (item=crit)
changed: [10.10.10.11] => (item=tiny)
changed: [10.10.10.12] => (item=tiny)
changed: [10.10.10.13] => (item=tiny)
changed: [10.10.10.10] => (item=tiny)

TASK [node : Render tuned profiles] **********************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=oltp)
changed: [10.10.10.12] => (item=oltp)
changed: [10.10.10.13] => (item=oltp)
changed: [10.10.10.10] => (item=oltp)
changed: [10.10.10.12] => (item=olap)
changed: [10.10.10.11] => (item=olap)
changed: [10.10.10.13] => (item=olap)
changed: [10.10.10.10] => (item=olap)
changed: [10.10.10.12] => (item=crit)
changed: [10.10.10.11] => (item=crit)
changed: [10.10.10.13] => (item=crit)
changed: [10.10.10.10] => (item=crit)
changed: [10.10.10.11] => (item=tiny)
changed: [10.10.10.12] => (item=tiny)
changed: [10.10.10.13] => (item=tiny)
changed: [10.10.10.10] => (item=tiny)

TASK [node : Active tuned profile] ***********************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [node : Change additional sysctl params] ************************************************************************************************************************************************************
changed: [10.10.10.13] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})
changed: [10.10.10.12] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})
changed: [10.10.10.11] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})
changed: [10.10.10.10] => (item={'key': 'net.bridge.bridge-nf-call-iptables', 'value': 1})

TASK [node : Copy default user bash profile] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Setup node default pam ulimits] *************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Create os user group admin] *****************************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Create os user admin] ***********************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]

TASK [node : Grant admin group nopass sudo] **************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Add no host checking to ssh config] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Add admin ssh no host checking] *************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [node : Fetch all admin public keys] ****************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [node : Exchange all admin ssh keys] ****************************************************************************************************************************************************************
changed: [10.10.10.10 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.10'])
changed: [10.10.10.13 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.10'])
changed: [10.10.10.12 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.10'])
changed: [10.10.10.11 -> meta] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.10'])
changed: [10.10.10.10 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.11'])
changed: [10.10.10.12 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.11'])
changed: [10.10.10.13 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.11'])
changed: [10.10.10.11 -> node-1] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.11'])
changed: [10.10.10.10 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.12'])
changed: [10.10.10.13 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.12'])
changed: [10.10.10.12 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.12'])
changed: [10.10.10.11 -> node-2] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.12'])
changed: [10.10.10.10 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDfXbkp7ATV3rIzcpCwxcwpumIjnjldzDp9qfu65d4W5gSNumN/wvOORnG17rB2y/msyjstu1C42v2V60yho/XjPNIqqPWPtM/bc6MHNeNJJxvEEtDsY530z3n37QTcVI1kg3zRqnzm8HDKEE+BAll+iyXjzTFoGHc39syDRF8r5sZpG0qiNY2QaqEnByASsoHM4RQ3Jw2D2SbA78wFBz1zqsdz5VympAcc9wcfuUqhwk0ExL+AtrPNUeyEXwgRr1Br6JXVHjT6EHLsZburTD7uT94Jqzixd3LXRwsmuCrPIssASrYvfnWVQ29MxhiZqrmLcwp4ImjQetcZE2EgfzEp ansible-generated on meta', '10.10.10.13'])
changed: [10.10.10.13 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbkD6WQhs9KAv9HTYtZ+q2Nfxqhj72YbP16m0mTrEOS2evd4MWDBhVgAE6qK4gvAhVBdEdNaHc3f2W/wDpKvvbvCbwy+HZldUCTVUe1W3sycm1ZwP7m9Xr7Rg0Dd1Nom87CWsqmlmN6afPYyvJV3wCl4ZuqrAMQ5oCrR4D1B8yZBL7rj55JpzggnNJYv7+ueIeUYoPzE6mu32k9wPxEa2qXcdVelgL7dwjTAt1nsNukWAufuAI1nZcJahsNjj1B2XEEwgA1mHUzDPpemn5alCNeCb+Hdb0Y12No/Wo2Gcn3b5vh9pOamLCm3CGrrsAXZ2B8tQPGFObhGkSOB6pddkT ansible-generated on node-3', '10.10.10.13'])
changed: [10.10.10.11 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2TJItJzBUEZ452k7ADL6mIQsGk7gb4AUqvN0pAHwR06pVv1XUmpCI5Wb0RUOoNFwmSBVTUXoXCnK7SB44ftpzD29cpxw3tlLEphYeY1wfrd2lblhpn2KxzBhyJZ27lK2qcZk7Ik20pZDhQZRuZuhb6HufYn7FGOutB8kgQChrcpqr9zRhjZOe4Y8tLR2lmEAVrp6ZsS04rjiBJ65TDCWCNSnin8DVbM1EerJ6Pvxy1cOY+B00EYMHlMni/3orzcrlnZqpkR/NRpgs9+lo+DZ4SCuEtIEOzpPzcm/O4oLhxSnTMJKTFwcc+bgmE0t1LMxvIKOQTwhIX+KoBE/syxh9 ansible-generated on node-1', '10.10.10.13'])
changed: [10.10.10.12 -> node-3] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC3IAopnkVwQ779/Hk5MceAVZbhb/y3YaUu7ZROI87TaY/XK5WKJjplfNlLBC2vXGNkYMirbW+Qmmz/XIsyL7qvKmQfcMGP3ILD4FtMMlJMWLwBTIw5ORxvoZGxaWfw0bcZSIw5rv9rBA4UJR9JfZhpUkBMj7cq8jNDyIrLpoJ+hlnJa5G5zyiMWBqe7VKOoiBo7d2WBIauhRgHY3G79H9pVxJti6JJOeQ1tsUI5UtOMCRO+dbmsuRWruac4jWOj864RG/EjFveWEfCTagMFakqaxPTgF3RHAwPVBjbMm3+2lBiVNd2Zt2g/2gPdkEbIE+xXXP/f5kh21gXFea4ENsV ansible-generated on node-2', '10.10.10.13'])

TASK [node : Install public keys] ************************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)
changed: [10.10.10.10] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)
changed: [10.10.10.12] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)
changed: [10.10.10.13] => (item=ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAAgQC7IMAMNavYtWwzAJajKqwdn3ar5BhvcwCnBTxxEkXhGlCO2vfgosSAQMEflfgvkiI5nM1HIFQ8KINlx1XLO7SdL5KdInG5LIJjAFh0pujS4kNCT9a5IGvSq1BrzGqhbEcwWYdju1ZPYBcJm/MG+JD0dYCh8vfrYB/cYMD0SOmNkQ== vagrant@pigsty.com)

TASK [node : Install ntp package] ************************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.10]
ok: [10.10.10.13]

TASK [node : Install chrony package] *********************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]
ok: [10.10.10.10]

TASK [node : Setup default node timezone] ****************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]

TASK [node : Copy the ntp.conf file] *********************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Copy the chrony.conf template] **************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [node : Launch ntpd service] ************************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [node : Launch chronyd service] *********************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

PLAY [Init meta service] *********************************************************************************************************************************************************************************

TASK [ca : Create local ca directory] ********************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [ca : Copy ca cert from local files] ****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=ca.key)
skipping: [10.10.10.10] => (item=ca.crt)

TASK [ca : Check ca key cert exists] *********************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [ca : Create self-signed CA key-cert] ***************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Make sure dnsmasq package installed] **************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nameserver : Copy dnsmasq /etc/dnsmasq.d/config] ***************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Add dynamic dns records to meta] ******************************************************************************************************************************************************
changed: [10.10.10.10] => (item=10.10.10.2  pg-meta)
changed: [10.10.10.10] => (item=10.10.10.3  pg-test)
changed: [10.10.10.10] => (item=10.10.10.10 meta-1)
changed: [10.10.10.10] => (item=10.10.10.11 node-1)
changed: [10.10.10.10] => (item=10.10.10.12 node-2)
changed: [10.10.10.10] => (item=10.10.10.13 node-3)
changed: [10.10.10.10] => (item=10.10.10.10 pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 y.pigsty yum.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 c.pigsty consul.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 g.pigsty grafana.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 p.pigsty prometheus.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 a.pigsty alertmanager.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 n.pigsty ntp.pigsty)
changed: [10.10.10.10] => (item=10.10.10.10 h.pigsty haproxy.pigsty)

TASK [nameserver : Launch meta dnsmasq service] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Wait for meta dnsmasq online] *********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nameserver : Register consul dnsmasq service] ******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nameserver : Reload consul] ************************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Make sure nginx installed] *****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Create local html directory] ***************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Create nginx config directory] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Update default nginx index page] ***********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Copy nginx default config] *****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Copy nginx upstream conf] ******************************************************************************************************************************************************************
changed: [10.10.10.10] => (item={'name': 'home', 'host': 'pigsty', 'url': '127.0.0.1:3000'})
changed: [10.10.10.10] => (item={'name': 'consul', 'host': 'c.pigsty', 'url': '127.0.0.1:8500'})
changed: [10.10.10.10] => (item={'name': 'grafana', 'host': 'g.pigsty', 'url': '127.0.0.1:3000'})
changed: [10.10.10.10] => (item={'name': 'prometheus', 'host': 'p.pigsty', 'url': '127.0.0.1:9090'})
changed: [10.10.10.10] => (item={'name': 'alertmanager', 'host': 'a.pigsty', 'url': '127.0.0.1:9093'})
changed: [10.10.10.10] => (item={'name': 'haproxy', 'host': 'h.pigsty', 'url': '127.0.0.1:9091'})

TASK [nginx : Templating /etc/nginx/haproxy.conf] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Render haproxy upstream in cluster mode] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [nginx : Render haproxy location in cluster mode] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [nginx : Templating haproxy cluster index] **********************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [nginx : Templating haproxy cluster index] **********************************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
ok: [10.10.10.10] => (item=pg-test)

TASK [nginx : Restart meta nginx service] ****************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Wait for nginx service online] *************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Make sure nginx exporter installed] ********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Config nginx_exporter options] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Restart nginx_exporter service] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Wait for nginx exporter online] ************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [nginx : Register cosnul nginx service] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Register consul nginx-exporter service] ****************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [nginx : Reload consul] *****************************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Install prometheus and alertmanager] **************************************************************************************************************************************************
ok: [10.10.10.10] => (item=prometheus2)
ok: [10.10.10.10] => (item=alertmanager)

TASK [prometheus : Wipe out prometheus config dir] *******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Wipe out existing prometheus data] ****************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [prometheus : Create postgres directory structure] **************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/prometheus)
changed: [10.10.10.10] => (item=/etc/prometheus/bin)
changed: [10.10.10.10] => (item=/etc/prometheus/rules)
changed: [10.10.10.10] => (item=/etc/prometheus/targets)
changed: [10.10.10.10] => (item=/export/prometheus/data)

TASK [prometheus : Copy prometheus bin scripts] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy prometheus rules scripts] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy altermanager config] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Render prometheus config] *************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Config /etc/prometheus opts] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Launch prometheus service] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Launch alertmanager service] **********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Wait for prometheus online] ***********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [prometheus : Wait for alertmanager online] *********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [prometheus : Render prometheus targets in cluster mode] ********************************************************************************************************************************************
changed: [10.10.10.10] => (item=pg-meta)
changed: [10.10.10.10] => (item=pg-test)

TASK [prometheus : Reload prometheus service] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy prometheus service definition] ***************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Copy alertmanager service definition] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [prometheus : Reload consul to register prometheus] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Make sure grafana is installed] **********************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [grafana : Check grafana plugin cache exists] *******************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [grafana : Provision grafana plugins via cache] *****************************************************************************************************************************************************
[WARNING]: Consider using the file module with state=absent rather than running 'rm'.  If you need to use command because file is insufficient you can add 'warn: false' to this command task or set
'command_warnings=False' in ansible.cfg to get rid of this message.
changed: [10.10.10.10]

TASK [grafana : Download grafana plugins from web] *******************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=redis-datasource)
skipping: [10.10.10.10] => (item=simpod-json-datasource)
skipping: [10.10.10.10] => (item=fifemon-graphql-datasource)
skipping: [10.10.10.10] => (item=sbueringer-consul-datasource)
skipping: [10.10.10.10] => (item=camptocamp-prometheus-alertmanager-datasource)
skipping: [10.10.10.10] => (item=ryantxu-ajax-panel)
skipping: [10.10.10.10] => (item=marcusolsson-hourly-heatmap-panel)
skipping: [10.10.10.10] => (item=michaeldmoore-multistat-panel)
skipping: [10.10.10.10] => (item=marcusolsson-treemap-panel)
skipping: [10.10.10.10] => (item=pr0ps-trackmap-panel)
skipping: [10.10.10.10] => (item=dalvany-image-panel)
skipping: [10.10.10.10] => (item=magnesium-wordcloud-panel)
skipping: [10.10.10.10] => (item=cloudspout-button-panel)
skipping: [10.10.10.10] => (item=speakyourcode-button-panel)
skipping: [10.10.10.10] => (item=jdbranham-diagram-panel)
skipping: [10.10.10.10] => (item=grafana-piechart-panel)
skipping: [10.10.10.10] => (item=snuids-radar-panel)
skipping: [10.10.10.10] => (item=digrich-bubblechart-panel)

TASK [grafana : Download grafana plugins from web] *******************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=https://github.com/Vonng/grafana-echarts)

TASK [grafana : Create grafana plugins cache] ************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [grafana : Copy /etc/grafana/grafana.ini] ***********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Remove grafana provision dir] ************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Copy provisioning content] ***************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Copy pigsty dashboards] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Copy pigsty icon image] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Replace grafana icon with pigsty] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Launch grafana service] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Wait for grafana online] *****************************************************************************************************************************************************************
ok: [10.10.10.10]

TASK [grafana : Update grafana default preferences] ******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Register consul grafana service] *********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [grafana : Reload consul] ***************************************************************************************************************************************************************************
changed: [10.10.10.10]

PLAY [Init dcs] ******************************************************************************************************************************************************************************************

TASK [consul : Check for existing consul] ****************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Consul exists flag fact set] **************************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [consul : Abort due to consul exists] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Clean existing consul instance] ***********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Stop any running consul instance] *********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Remove existing consul dir] ***************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/consul.d)
changed: [10.10.10.11] => (item=/etc/consul.d)
changed: [10.10.10.12] => (item=/etc/consul.d)
changed: [10.10.10.13] => (item=/etc/consul.d)
changed: [10.10.10.10] => (item=/var/lib/consul)
changed: [10.10.10.11] => (item=/var/lib/consul)
changed: [10.10.10.12] => (item=/var/lib/consul)
changed: [10.10.10.13] => (item=/var/lib/consul)

TASK [consul : Recreate consul dir] **********************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/consul.d)
changed: [10.10.10.11] => (item=/etc/consul.d)
changed: [10.10.10.12] => (item=/etc/consul.d)
changed: [10.10.10.13] => (item=/etc/consul.d)
changed: [10.10.10.10] => (item=/var/lib/consul)
changed: [10.10.10.11] => (item=/var/lib/consul)
changed: [10.10.10.13] => (item=/var/lib/consul)
changed: [10.10.10.12] => (item=/var/lib/consul)

TASK [consul : Make sure consul is installed] ************************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [consul : Make sure consul dir exists] **************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Get dcs server node names] ****************************************************************************************************************************************************************
ok: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Get dcs node name from var] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Get dcs node name from var] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [consul : Fetch hostname as dcs node name] **********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Get dcs name from hostname] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Copy /etc/consul.d/consul.json] ***********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Copy consul agent service] ****************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [consul : Get dcs bootstrap expect quroum] **********************************************************************************************************************************************************
ok: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Copy consul server service unit] **********************************************************************************************************************************************************
changed: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Launch consul server service] *************************************************************************************************************************************************************
changed: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Wait for consul server online] ************************************************************************************************************************************************************
ok: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [consul : Launch consul agent service] **************************************************************************************************************************************************************
skipping: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [consul : Wait for consul agent online] *************************************************************************************************************************************************************
skipping: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

PLAY [Init database cluster] *****************************************************************************************************************************************************************************

TASK [postgres : Create os group postgres] ***************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Make sure dcs group exists] *************************************************************************************************************************************************************
ok: [10.10.10.10] => (item=consul)
ok: [10.10.10.11] => (item=consul)
ok: [10.10.10.12] => (item=consul)
ok: [10.10.10.13] => (item=consul)
ok: [10.10.10.11] => (item=etcd)
ok: [10.10.10.10] => (item=etcd)
ok: [10.10.10.12] => (item=etcd)
ok: [10.10.10.13] => (item=etcd)

TASK [postgres : Create dbsu postgres] *******************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Grant dbsu nopass sudo] *****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Grant dbsu all sudo] ********************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Grant dbsu limited sudo] ****************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Config patroni watchdog support] ********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Add dbsu ssh no host checking] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Fetch dbsu public keys] *****************************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Exchange dbsu ssh keys] *****************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.10'])
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.11'])
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.12'])
skipping: [10.10.10.10] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8ahlH3Yo0nTb1hhd7SGTF1sCwnjEVA/yGra2ktQcZ/i8S/2tfumVomxtnNTeOZqNeQygVUbRgIH77lABXrXwBOimw+J0EmoekPsW7q/NCT5EJgqfoDe5vWBpyhrCe1ixCxESlP2GfpaJYGqeMW2G8HiFU6ieDZcfGcFn1q9JBjtrrV851Htw+Ik/fed93ipGgWzzZnu4NOjz7tpmrsmE3/1J/RvPQdRT7Pjuy2pLn+oCjMkQHJezvUKruVTVwxjObaWO7WFlvQCy2dRez1GBxEK80LRbsZfmgkfIQPzmqHOaacqNBAHe+OeYlBh3fMMbpALzJHnhgJSW5GpdRwiUJ ansible-generated on meta', '10.10.10.13'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.10'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.11'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.12'])
skipping: [10.10.10.11] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDCIr/IW4qyd4Ls8dztCJyYHt354iPFbhLAUUiEK9R3A5W8UOSiJK/WVwlxMazH8QUaMWHuQAlTtW66kW1DDU+fsJ4xGxrNjEnwUbmWfj3BBnoANJQHYOid8iLJwWZuykvz0EIdGMDVpUpIx/qqm3/ZlC+cD0iukXQyEyAw3Qgts/Twqr5IJGeQOFy9Z4rmqSXtz/8tS0YOHCHVC5GGsUpD5+GLqhwPd64xCbWnvpYY61IX45Hzf+zO80xGqPeQLqF9HULs5wi2i6plKrSRl76VWCq9T7QMQMKJJSLUabnrXrKm+sr21LImgpSxSbqbBVVNUVS+adQvvylWb6yaFWov ansible-generated on node-1', '10.10.10.13'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.10'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.11'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.12'])
skipping: [10.10.10.12] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQChMymmlyxGn7PnUvAUvh968/gxTnwGZhhMhIc2+aiuA0QP/D8CSmKfzRYoMVP6/nm3cJsYXM28wzWZ1X/sLp33rYYxbwWpj5n8oBalzqKmSzK0HI5CePKAlWlEeLRDxvKpZYhZwXmro5Ov9lfp63kNHU84nAP7BPBOlufFyydn50bUwP1xKEsG1BC9Xqd4XqB5+eRLjkQDuC743bgxFc3FM8fij1/MuvxtG3HvL6DgEvCo3Lx4qkiVO3akR6Lo3bQEkf76Gq94cFbecAAnYZzdkPHR5LqJiIGS0DYj0yZQXrdN+DtjpyIBfZzi+TFdcVW1Agy1IUQ7Lrt29HJw+/sD ansible-generated on node-2', '10.10.10.13'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.10'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.11'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.12'])
skipping: [10.10.10.13] => (item=['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCo9KBPH2DVYQrM/WZ4CO4Ipvr+5L6FhqWBr1A6C0Ms+qi77aKHwFEIbrxKqj7wZFbHWoTPt/cbWkXhZgnkfDBR81/wBImnFz0QfuL0tNDN0/YP/4cePo5bQERGcnBI6vkjmXMyGGpRQobNRj71fX/Wt5WMw6dM+d4XjfgUKHIJxEKnz8HYnkiwWm5Flc9EHKTWN+87vZ9B6cdi7gxLQu8LL3x+4e2ArRoz9u5yZIajUTvexqD2IIReqsFt+QObpinLaTc/g7Q+w/no1hAZERS3pImx9l0GF6Ktdp/HMHH1vk2cwnyogrk+OLw1WccI1YkBes/xdzBFTWOwUX3w/vBt ansible-generated on node-3', '10.10.10.13'])

TASK [postgres : Install offical pgdg yum repo] **********************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=postgresql${pg_version}*)
skipping: [10.10.10.10] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.10] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.11] => (item=postgresql${pg_version}*)
skipping: [10.10.10.10] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.11] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.10] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.11] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.12] => (item=postgresql${pg_version}*)
skipping: [10.10.10.10] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.11] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.12] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.11] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.12] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.13] => (item=postgresql${pg_version}*)
skipping: [10.10.10.11] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.12] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.13] => (item=postgis31_${pg_version}*)
skipping: [10.10.10.12] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.13] => (item=pgbouncer patroni pg_exporter pgbadger)
skipping: [10.10.10.12] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)
skipping: [10.10.10.13] => (item=patroni patroni-consul patroni-etcd pgbouncer pgbadger pg_activity)
skipping: [10.10.10.13] => (item=python3 python3-psycopg2 python36-requests python3-etcd python3-consul)
skipping: [10.10.10.13] => (item=python36-urllib3 python36-idna python36-pyOpenSSL python36-cryptography)

TASK [postgres : Install pg packages] ********************************************************************************************************************************************************************
changed: [10.10.10.10] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])
changed: [10.10.10.11] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])
changed: [10.10.10.13] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])
changed: [10.10.10.12] => (item=['postgresql13*', 'postgis31_13*', 'pgbouncer,patroni,pg_exporter,pgbadger', 'patroni,patroni-consul,patroni-etcd,pgbouncer,pgbadger,pg_activity', 'python3,python3-psycopg2,python36-requests,python3-etcd,python3-consul', 'python36-urllib3,python36-idna,python36-pyOpenSSL,python36-cryptography'])

TASK [postgres : Install pg extensions] ******************************************************************************************************************************************************************
changed: [10.10.10.11] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])
changed: [10.10.10.10] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])
changed: [10.10.10.13] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])
changed: [10.10.10.12] => (item=['pg_repack13,pg_qualstats13,pg_stat_kcache13,wal2json13'])

TASK [postgres : Link /usr/pgsql to current version] *****************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Add pg bin dir to profile path] *********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Fix directory ownership] ****************************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Remove default postgres service] ********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Check necessary variables exists] *******************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.11] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.12] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.13] => {
    "changed": false,
    "msg": "All assertions passed"
}

TASK [postgres : Fetch variables via pg_cluster] *********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Set cluster basic facts for hosts] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Assert cluster primary singleton] *******************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.11] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.12] => {
    "changed": false,
    "msg": "All assertions passed"
}
ok: [10.10.10.13] => {
    "changed": false,
    "msg": "All assertions passed"
}

TASK [postgres : Setup cluster primary ip address] *******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Setup repl upstream for primary] ********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Setup repl upstream for replicas] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Debug print instance summary] ***********************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "msg": "cluster=pg-meta service=pg-meta-primary instance=pg-meta-1 replication=[primary:itself]->10.10.10.10"
}
ok: [10.10.10.11] => {
    "msg": "cluster=pg-test service=pg-test-primary instance=pg-test-1 replication=[primary:itself]->10.10.10.11"
}
ok: [10.10.10.12] => {
    "msg": "cluster=pg-test service=pg-test-replica instance=pg-test-2 replication=[primary:itself]->10.10.10.12"
}
ok: [10.10.10.13] => {
    "msg": "cluster=pg-test service=pg-test-offline instance=pg-test-3 replication=[primary:itself]->10.10.10.13"
}

TASK [postgres : Check for existing postgres instance] ***************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Set fact whether pg port is open] *******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Abort due to existing postgres instance] ************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Clean existing postgres instance] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Shutdown existing postgres service] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Remove registerd consul service] ********************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.10]

TASK [postgres : Remove postgres metadata in consul] *****************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.10]

TASK [postgres : Remove existing postgres data] **********************************************************************************************************************************************************
ok: [10.10.10.10] => (item=/pg)
ok: [10.10.10.11] => (item=/pg)
ok: [10.10.10.12] => (item=/pg)
ok: [10.10.10.13] => (item=/pg)
ok: [10.10.10.10] => (item=/export/postgres)
ok: [10.10.10.11] => (item=/export/postgres)
ok: [10.10.10.12] => (item=/export/postgres)
ok: [10.10.10.13] => (item=/export/postgres)
ok: [10.10.10.10] => (item=/var/backups/postgres)
ok: [10.10.10.11] => (item=/var/backups/postgres)
ok: [10.10.10.12] => (item=/var/backups/postgres)
ok: [10.10.10.13] => (item=/var/backups/postgres)
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)

TASK [postgres : Make sure main and backup dir exists] ***************************************************************************************************************************************************
changed: [10.10.10.11] => (item=/export)
changed: [10.10.10.12] => (item=/export)
changed: [10.10.10.13] => (item=/export)
changed: [10.10.10.10] => (item=/export)
changed: [10.10.10.11] => (item=/var/backups)
changed: [10.10.10.12] => (item=/var/backups)
changed: [10.10.10.13] => (item=/var/backups)
changed: [10.10.10.10] => (item=/var/backups)

TASK [postgres : Create postgres directory structure] ****************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/export/postgres)
changed: [10.10.10.11] => (item=/export/postgres)
changed: [10.10.10.12] => (item=/export/postgres)
changed: [10.10.10.13] => (item=/export/postgres)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/bin)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/bin)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/bin)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/bin)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/log)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/log)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/log)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/log)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/tmp)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/tmp)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/tmp)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/tmp)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/conf)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/conf)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/conf)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/conf)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/data)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/data)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/data)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/data)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/meta)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/meta)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/meta)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/meta)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/stat)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/stat)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/stat)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/stat)
changed: [10.10.10.10] => (item=/export/postgres/pg-meta-13/change)
changed: [10.10.10.12] => (item=/export/postgres/pg-test-13/change)
changed: [10.10.10.11] => (item=/export/postgres/pg-test-13/change)
changed: [10.10.10.13] => (item=/export/postgres/pg-test-13/change)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/postgres)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/postgres)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/postgres)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/postgres)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/arcwal)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/arcwal)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/arcwal)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/arcwal)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/backup)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/backup)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/backup)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/backup)
changed: [10.10.10.10] => (item=/var/backups/postgres/pg-meta-13/remote)
changed: [10.10.10.12] => (item=/var/backups/postgres/pg-test-13/remote)
changed: [10.10.10.11] => (item=/var/backups/postgres/pg-test-13/remote)
changed: [10.10.10.13] => (item=/var/backups/postgres/pg-test-13/remote)

TASK [postgres : Create pgbouncer directory structure] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)

TASK [postgres : Create links from pgbkup to pgroot] *****************************************************************************************************************************************************
changed: [10.10.10.10] => (item=arcwal)
changed: [10.10.10.11] => (item=arcwal)
changed: [10.10.10.12] => (item=arcwal)
changed: [10.10.10.13] => (item=arcwal)
changed: [10.10.10.10] => (item=backup)
changed: [10.10.10.11] => (item=backup)
changed: [10.10.10.12] => (item=backup)
changed: [10.10.10.13] => (item=backup)
changed: [10.10.10.10] => (item=remote)
changed: [10.10.10.11] => (item=remote)
changed: [10.10.10.12] => (item=remote)
changed: [10.10.10.13] => (item=remote)

TASK [postgres : Create links from current cluster] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Copy pg_cluster to /pg/meta/cluster] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_version to /pg/meta/version] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_instance to /pg/meta/instance] **************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_seq to /pg/meta/sequence] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy pg_role to /pg/meta/role] **********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Copy postgres scripts to /pg/bin/] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Copy alias profile to /etc/profile.d] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Copy psqlrc to postgres home] ***********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Setup hostname to pg instance name] *****************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Copy consul node-meta definition] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Restart consul to load new node-meta] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Config patroni watchdog support] ********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Get config parameter page count] ********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Get config parameter page size] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Tune shared buffer and work mem] ********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Hanlde small size mem occasion] *********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Calculate postgres mem params] **********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : create patroni config dir] **************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : use predefined patroni template] ********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Render default /pg/conf/patroni.yml] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Link /pg/conf/patroni to /pg/bin/] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Link /pg/bin/patroni.yml to /etc/patroni/] **********************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Config patroni watchdog support] ********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Copy patroni systemd service file] ******************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : create patroni systemd drop-in dir] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Copy postgres systemd service file] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Drop-In consul dependency for patroni] **************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Render default initdb scripts] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Launch patroni on primary instance] *****************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Wait for patroni primary online] ********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
ok: [10.10.10.10]
ok: [10.10.10.11]

TASK [postgres : Wait for postgres primary online] *******************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
ok: [10.10.10.10]
ok: [10.10.10.11]

TASK [postgres : Check primary postgres service ready] ***************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
[WARNING]: Module remote_tmp /var/lib/pgsql/.ansible/tmp did not exist and was created with a mode of 0700, this may cause issues when running as another user. To avoid this, create the remote_tmp dir
with the correct permissions manually
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Check replication connectivity to primary] **********************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Render init roles sql] ******************************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Render init template sql] ***************************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Render default pg-init scripts] *********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.10]

TASK [postgres : Execute initialization scripts] *********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Check primary instance ready] ***********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Add dbsu password to pgpass if exists] **************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Add system user to pgpass] **************************************************************************************************************************************************************
changed: [10.10.10.10] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.11] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.12] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.13] => (item={'username': 'replicator', 'password': 'DBUser.Replicator'})
changed: [10.10.10.11] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.10] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.13] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.12] => (item={'username': 'dbuser_monitor', 'password': 'DBUser.Monitor'})
changed: [10.10.10.13] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})
changed: [10.10.10.12] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})
changed: [10.10.10.10] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})
changed: [10.10.10.11] => (item={'username': 'dbuser_admin', 'password': 'DBUser.Admin'})

TASK [postgres : Check replication connectivity to primary] **********************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Launch patroni on replica instances] ****************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Wait for patroni replica online] ********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Wait for postgres replica online] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Check replica postgres service ready] ***************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Render hba rules] ***********************************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [postgres : Reload hba rules] ***********************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Pause patroni] **************************************************************************************************************************************************************************
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Stop patroni on replica instance] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Stop patroni on primary instance] *******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Launch raw postgres on primary] *********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Launch raw postgres on primary] *********************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Wait for postgres online] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Check pgbouncer is installed] ***********************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [postgres : Stop existing pgbouncer service] ********************************************************************************************************************************************************
ok: [10.10.10.11]
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Remove existing pgbouncer dirs] *********************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)

TASK [postgres : Recreate dirs with owner postgres] ******************************************************************************************************************************************************
changed: [10.10.10.10] => (item=/etc/pgbouncer)
changed: [10.10.10.11] => (item=/etc/pgbouncer)
changed: [10.10.10.12] => (item=/etc/pgbouncer)
changed: [10.10.10.13] => (item=/etc/pgbouncer)
changed: [10.10.10.10] => (item=/var/log/pgbouncer)
changed: [10.10.10.12] => (item=/var/log/pgbouncer)
changed: [10.10.10.11] => (item=/var/log/pgbouncer)
changed: [10.10.10.13] => (item=/var/log/pgbouncer)
changed: [10.10.10.10] => (item=/var/run/pgbouncer)
changed: [10.10.10.12] => (item=/var/run/pgbouncer)
changed: [10.10.10.11] => (item=/var/run/pgbouncer)
changed: [10.10.10.13] => (item=/var/run/pgbouncer)

TASK [postgres : Copy /etc/pgbouncer/pgbouncer.ini] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Copy /etc/pgbouncer/pgb_hba.conf] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Touch userlist and database list] *******************************************************************************************************************************************************
changed: [10.10.10.10] => (item=database.txt)
changed: [10.10.10.11] => (item=database.txt)
changed: [10.10.10.12] => (item=database.txt)
changed: [10.10.10.13] => (item=database.txt)
changed: [10.10.10.10] => (item=userlist.txt)
changed: [10.10.10.11] => (item=userlist.txt)
changed: [10.10.10.12] => (item=userlist.txt)
changed: [10.10.10.13] => (item=userlist.txt)

TASK [postgres : Add default users to pgbouncer] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Copy pgbouncer systemd service] *********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Launch pgbouncer pool service] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : Wait for pgbouncer service online] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [postgres : Check pgbouncer service is ready] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : include_tasks] **************************************************************************************************************************************************************************
included: /private/tmp/pigsty/roles/postgres/tasks/createuser.yml for 10.10.10.10 => (item={'name': 'dbuser_meta', 'password': 'DBUser.Meta', 'login': True, 'superuser': False, 'createdb': False, 'createrole': False, 'inherit': True, 'replication': False, 'bypassrls': False, 'connlimit': -1, 'expire_at': '2030-12-31', 'expire_in': 365, 'roles': ['dbrole_readwrite'], 'pgbouncer': True, 'parameters': {'search_path': 'public'}, 'comment': 'test user'})
included: /private/tmp/pigsty/roles/postgres/tasks/createuser.yml for 10.10.10.10 => (item={'name': 'dbuser_vonng2', 'password': 'DBUser.Vonng', 'roles': ['dbrole_offline'], 'expire_in': 365, 'pgbouncer': False, 'comment': 'example personal user for interactive queries'})
included: /private/tmp/pigsty/roles/postgres/tasks/createuser.yml for 10.10.10.11, 10.10.10.12, 10.10.10.13 => (item={'name': 'test', 'password': 'test', 'roles': ['dbrole_readwrite'], 'pgbouncer': True, 'comment': 'default test user for production usage'})

TASK [postgres : Render user dbuser_meta creation sql] ***************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute user dbuser_meta creation sql on primary] ***************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Add user to pgbouncer] ******************************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Render user dbuser_vonng2 creation sql] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute user dbuser_vonng2 creation sql on primary] *************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Add user to pgbouncer] ******************************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [postgres : Render user test creation sql] **********************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Execute user test creation sql on primary] **********************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Add user to pgbouncer] ******************************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [postgres : include_tasks] **************************************************************************************************************************************************************************
included: /private/tmp/pigsty/roles/postgres/tasks/createdb.yml for 10.10.10.10 => (item={'name': 'meta', 'allowconn': True, 'revokeconn': False, 'connlimit': -1, 'extensions': [{'name': 'postgis', 'schema': 'public'}], 'parameters': {'enable_partitionwise_join': True}, 'pgbouncer': True, 'comment': 'pigsty meta database'})
included: /private/tmp/pigsty/roles/postgres/tasks/createdb.yml for 10.10.10.11, 10.10.10.12, 10.10.10.13 => (item={'name': 'test'})

TASK [postgres : debug] **********************************************************************************************************************************************************************************
ok: [10.10.10.10] => {
    "msg": {
        "allowconn": true,
        "comment": "pigsty meta database",
        "connlimit": -1,
        "extensions": [
            {
                "name": "postgis",
                "schema": "public"
            }
        ],
        "name": "meta",
        "parameters": {
            "enable_partitionwise_join": true
        },
        "pgbouncer": true,
        "revokeconn": false
    }
}

TASK [postgres : Render database meta creation sql] ******************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Render database meta baseline sql] ******************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [postgres : Execute database meta creation command] *************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute database meta creation sql] *****************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : Execute database meta creation sql] *****************************************************************************************************************************************************
skipping: [10.10.10.10]

TASK [postgres : Add pgbouncer busniess database] ********************************************************************************************************************************************************
changed: [10.10.10.10]

TASK [postgres : debug] **********************************************************************************************************************************************************************************
ok: [10.10.10.11] => {
    "msg": {
        "name": "test"
    }
}
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Render database test creation sql] ******************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Render database test baseline sql] ******************************************************************************************************************************************************
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Execute database test creation command] *************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Execute database test creation sql] *****************************************************************************************************************************************************
skipping: [10.10.10.12]
skipping: [10.10.10.13]
changed: [10.10.10.11]

TASK [postgres : Execute database test creation sql] *****************************************************************************************************************************************************
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [postgres : Add pgbouncer busniess database] ********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [postgres : Reload pgbouncer to add db and users] ***************************************************************************************************************************************************
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.11]

TASK [postgres : Copy pg service definition to consul] ***************************************************************************************************************************************************
changed: [10.10.10.10] => (item=postgres)
changed: [10.10.10.11] => (item=postgres)
changed: [10.10.10.12] => (item=postgres)
changed: [10.10.10.13] => (item=postgres)
changed: [10.10.10.10] => (item=pgbouncer)
changed: [10.10.10.11] => (item=pgbouncer)
changed: [10.10.10.12] => (item=pgbouncer)
changed: [10.10.10.13] => (item=pgbouncer)
changed: [10.10.10.10] => (item=patroni)
changed: [10.10.10.11] => (item=patroni)
changed: [10.10.10.12] => (item=patroni)
changed: [10.10.10.13] => (item=patroni)

TASK [postgres : Reload postgres consul service] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [postgres : Render grafana datasource definition] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [postgres : Register datasource to grafana] *********************************************************************************************************************************************************
[WARNING]: Consider using the get_url or uri module rather than running 'curl'.  If you need to use command because get_url or uri is insufficient you can add 'warn: false' to this command task or set
'command_warnings=False' in ansible.cfg to get rid of this message.
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [monitor : Install exporter yum repo] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [monitor : Install node_exporter and pg_exporter] ***************************************************************************************************************************************************
skipping: [10.10.10.10] => (item=node_exporter)
skipping: [10.10.10.10] => (item=pg_exporter)
skipping: [10.10.10.11] => (item=node_exporter)
skipping: [10.10.10.11] => (item=pg_exporter)
skipping: [10.10.10.12] => (item=node_exporter)
skipping: [10.10.10.12] => (item=pg_exporter)
skipping: [10.10.10.13] => (item=node_exporter)
skipping: [10.10.10.13] => (item=pg_exporter)

TASK [monitor : Copy node_exporter binary] ***************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [monitor : Copy pg_exporter binary] *****************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [monitor : Create /etc/pg_exporter conf dir] ********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Copy default pg_exporter.yaml] ***********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [monitor : Config /etc/default/pg_exporter] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Config pg_exporter service unit] *********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [monitor : Launch pg_exporter systemd service] ******************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [monitor : Wait for pg_exporter service online] *****************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.11]
ok: [10.10.10.13]

TASK [monitor : Register pg-exporter consul service] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Reload pg-exporter consul service] *******************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.10]

TASK [monitor : Config pgbouncer_exporter opts] **********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Config pgbouncer_exporter service] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [monitor : Launch pgbouncer_exporter service] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]

TASK [monitor : Wait for pgbouncer_exporter online] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [monitor : Register pgb-exporter consul service] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]

TASK [monitor : Reload pgb-exporter consul service] ******************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [monitor : Copy node_exporter systemd service] ******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [monitor : Config default node_exporter options] ****************************************************************************************************************************************************
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [monitor : Launch node_exporter service unit] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [monitor : Wait for node_exporter online] ***********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [monitor : Register node-exporter service to consul] ************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [monitor : Reload node-exporter consul service] *****************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [service : Make sure haproxy is installed] **********************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [service : Create haproxy directory] ****************************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.13]
ok: [10.10.10.11]

TASK [service : Copy haproxy systemd service file] *******************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [service : Fetch postgres cluster memberships] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.11]
ok: [10.10.10.12]
ok: [10.10.10.13]

TASK [service : Templating /etc/haproxy/haproxy.cfg] *****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.11]
changed: [10.10.10.12]

TASK [service : Launch haproxy load balancer service] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.13]
changed: [10.10.10.11]

TASK [service : Wait for haproxy load balancer online] ***************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.12]
ok: [10.10.10.11]
ok: [10.10.10.13]

TASK [service : Reload haproxy load balancer service] ****************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [service : Copy haproxy exporter definition] ********************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [service : Copy haproxy service definition] *********************************************************************************************************************************************************
changed: [10.10.10.12] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.10] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.11] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.13] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
changed: [10.10.10.10] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.12] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.13] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.11] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
changed: [10.10.10.10] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.12] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.11] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.13] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
changed: [10.10.10.10] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
changed: [10.10.10.12] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
changed: [10.10.10.11] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
changed: [10.10.10.13] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})

TASK [service : Reload haproxy consul service] ***********************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [service : Make sure vip-manager is installed] ******************************************************************************************************************************************************
ok: [10.10.10.10]
ok: [10.10.10.13]
ok: [10.10.10.11]
ok: [10.10.10.12]

TASK [service : Copy vip-manager systemd service file] ***************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.12]
changed: [10.10.10.11]
changed: [10.10.10.13]

TASK [service : create vip-manager systemd drop-in dir] **************************************************************************************************************************************************
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.10]
changed: [10.10.10.13]

TASK [service : create vip-manager systemd drop-in file] *************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.13]
changed: [10.10.10.12]
changed: [10.10.10.11]

TASK [service : Templating /etc/default/vip-manager.yml] *************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.12]
changed: [10.10.10.13]

TASK [service : Launch vip-manager] **********************************************************************************************************************************************************************
changed: [10.10.10.10]
changed: [10.10.10.11]
changed: [10.10.10.13]
changed: [10.10.10.12]

TASK [service : Fetch postgres cluster memberships] ******************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

TASK [service : Render L4 VIP configs] *******************************************************************************************************************************************************************
skipping: [10.10.10.10] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.10] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.10] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.11] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.10] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
skipping: [10.10.10.11] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.11] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.12] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.11] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
skipping: [10.10.10.12] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.13] => (item={'name': 'primary', 'src_ip': '*', 'src_port': 5433, 'dst_port': 'pgbouncer', 'check_url': '/primary', 'selector': '[]'})
skipping: [10.10.10.12] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.13] => (item={'name': 'replica', 'src_ip': '*', 'src_port': 5434, 'dst_port': 'pgbouncer', 'check_url': '/read-only', 'selector': '[]', 'selector_backup': '[? pg_role == `primary`]'})
skipping: [10.10.10.12] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})
skipping: [10.10.10.13] => (item={'name': 'default', 'src_ip': '*', 'src_port': 5436, 'dst_port': 'postgres', 'check_method': 'http', 'check_port': 'patroni', 'check_url': '/primary', 'check_code': 200, 'selector': '[]', 'haproxy': {'maxconn': 3000, 'balance': 'roundrobin', 'default_server_options': 'inter 3s fastinter 1s downinter 5s rise 3 fall 3 on-marked-down shutdown-sessions slowstart 30s maxconn 3000 maxqueue 128 weight 100'}})
skipping: [10.10.10.13] => (item={'name': 'offline', 'src_ip': '*', 'src_port': 5438, 'dst_port': 'postgres', 'check_url': '/replica', 'selector': '[? pg_role == `offline` || pg_offline_query ]', 'selector_backup': '[? pg_role == `replica` && !pg_offline_query]'})

TASK [service : include_tasks] ***************************************************************************************************************************************************************************
skipping: [10.10.10.10]
skipping: [10.10.10.11]
skipping: [10.10.10.12]
skipping: [10.10.10.13]

PLAY RECAP ***********************************************************************************************************************************************************************************************
10.10.10.10                : ok=264  changed=205  unreachable=0    failed=0    skipped=62   rescued=0    ignored=0
10.10.10.11                : ok=182  changed=146  unreachable=0    failed=0    skipped=55   rescued=0    ignored=0
10.10.10.12                : ok=171  changed=135  unreachable=0    failed=0    skipped=66   rescued=0    ignored=0
10.10.10.13                : ok=171  changed=135  unreachable=0    failed=0    skipped=66   rescued=0    ignored=0

烈建议在第一次完成初始化后执行 make cache 命令,该命令会将下载好的软件打为离线缓存包,并放置于files/pkg.tgz中。这样当下一次创建新的pigsty环境时,只要宿主机内操作系统一致,就可以直接复用该离线包,省去大量下载时间。

mon-view

初始化完毕后,您可以通过浏览器访问 http://pigsty 前往监控系统主页。默认的用户名与密码均为admin

如果没有配置DNS,或者没有使用默认的IP地址,也可以直接访问 http://meta_ip_address:3000前往监控系统首页。

$ make mon-view
open -n 'http://g.pigsty/'

7 - PG Exporter

PG Exporter reference

PG Exporter

Prometheus exporter for PostgreSQL metrics. Gives you complete insight on your favourate elephant!

Latest binaries & rpms can be found on release page. Supported pg version: PostgreSQL 9.4+ & Pgbouncer 1.8+. Default collectors definition is compatible with PostgreSQL 10,11,12,13.

Latest pg_exporter version: 0.3.1

Features

  • Support both Postgres & Pgbouncer
  • Flexible: Almost all metrics are defined in customizable configuration files in SQL style.
  • Fine-grained execution control (Tags Filter, Facts Filter, Version Filter, Timeout, Cache, etc…)
  • Dynamic Planning: User could provide multiple branches of a metric queries. Queries matches server version & fact & tag will be actually installed.
  • Configurable caching policy & query timeout
  • Rich metrics about pg_exporter itself.
  • Auto discovery multi database in the same cluster (multiple database scrape TBD)
  • Tested and verified in real world production environment for years (200+ Nodes)
  • Metrics overhelming! Gives you complete insight on your favourate elephant!
  • (Pgbouncer mode is enabled when target dbname is pgbouncer)

性能表现

对于极端场景(几十万张表与几万种查询),一次抓取最多可能耗费秒级的时长。

好在所有指标收集器都是可选关闭的,且pg_exporter 允许为收集器配置主动超时取消(默认100ms)

自监控

Exporter展示了监控系统组件本身的监控指标,包括:

  • Exporter是否存活,Uptime,Exporter每分钟被抓取的次数
  • 每个监控查询的耗时,产生的指标数量与错误数量。

Prometheus的配置

Prometheus的抓取频率建议采用10~15秒,并配置适当的超时。

演示或特殊情况也可以配置的更精细(例如2秒,5秒等)

单Prometheus节点可以支持几百个实例的监控,约几百万个时间序列 (Dell R740 64 Core / 400GB Mem/ 3TB PCI-E SSD)

更大规模的集群可以通过Prometheus级联、联邦或分片实现伸缩。例如为每一个数据库集群部署一个Prometheus,并使用上级Prometheus统筹抓取并计算衍生指标

8 - Prometheus Service Discovery

How to perform static service discovery for Prometheus

当使用 prometheus_sd_method == ‘static’ 的静态文件服务发现模式时,Prometheus会使用静态文件进行服务发现,目标配置文件地址默认为 /etc/prometheus/targets/ 目录中的所有yml文件。

集中式配置

prometheus_sd_target 配置为batch 模式时,Pigsty会采用集中式配置管理Prometheus监控目标。

所有监控对象都定义于单一配置文件:/etc/prometheus/targets/all.yml 中。

#==============================================================#
# File      :   targets/all.yml
# Ctime     :   2021-02-18
# Mtime     :   2021-02-18
# Atime     :   2021-03-01 16:46
# Note      :   Managed by Ansible
# Desc      :   Prometheus Static Monitoring Targets Definition
# Path      :   /etc/prometheus/targets/all.yml
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# static monitor targets, batch version

#======> pg-meta-1 [primary]
- labels: {cls: pg-meta, ins: pg-meta-1, ip: 10.10.10.10, role: primary, svc: pg-meta-primary}
  targets: [10.10.10.10:9630, 10.10.10.10:9100, 10.10.10.10:9631, 10.10.10.10:9101]

#======> pg-test-1 [primary]
- labels: {cls: pg-test, ins: pg-test-1, ip: 10.10.10.11, role: primary, svc: pg-test-primary}
  targets: [10.10.10.11:9630, 10.10.10.11:9100, 10.10.10.11:9631, 10.10.10.11:9101]

#======> pg-test-2 [replica]
- labels: {cls: pg-test, ins: pg-test-2, ip: 10.10.10.12, role: replica, svc: pg-test-replica}
  targets: [10.10.10.12:9630, 10.10.10.12:9100, 10.10.10.12:9631, 10.10.10.12:9101]

#======> pg-test-3 [replica]
- labels: {cls: pg-test, ins: pg-test-3, ip: 10.10.10.13, role: replica, svc: pg-test-replica}
  targets: [10.10.10.13:9630, 10.10.10.13:9100, 10.10.10.13:9631, 10.10.10.13:9101]

分立式配置

prometheus_sd_target 配置为single 模式时,Pigsty会采用分立式配置管理Prometheus监控目标。

每个监控实例,都拥有自己独占的单一配置文件:/etc/prometheus/targets/{{ pg_instance }}.yml 中。

pg-meta-1 实例为例,其配置文件位置为:/etc/prometheus/targets/pg-meta-1.yml,内容为:

# pg-meta-1 [primary]
- labels: {cls: pg-meta, ins: pg-meta-1, ip: 10.10.10.10, role: primary, svc: pg-meta-primary}
  targets: [10.10.10.10:9630, 10.10.10.10:9100, 10.10.10.10:9631, 10.10.10.10:9101]

9 - Tuned Template

Several pre-defined tuned templates

9.1 - OLTP

Tuned OLTP模板

Tuned OLTP模板主要针对延迟进行优化,此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to oltp mode
# Path      :   /etc/tuned/oltp/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLTP System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

# vm.dirty_background_bytes=67108864 # 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=10                 # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

9.2 - TINY

Tuned TINY模板

Tuned TINY模板主要针对极低配置的虚拟机进行优化,

此模板针对的典型机型是1核/1GB的虚拟机节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to tiny mode
# Path      :   /etc/tuned/tiny/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL TINY System
# include=virtual-guest

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up.  Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40

# Filesystem I/O is usually much more efficient than swapping, so try to keep
# swapping low.  It's usually safe to go even lower than this on systems with
# server-grade storage.
vm.swappiness = 30

#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

9.3 - OLAP

Tuned OLAP模板,针对高并行,长查询,高吞吐实例优化

Tuned OLAP模板主要针对吞吐量与计算并行度进行优化

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-09-18
# Desc      :   Tune operatiing system to olap mode
# Path      :   /etc/tuned/olap/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL OLAP System
include=network-throughput

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
# vm.swappiness=10

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80

vm.dirty_background_ratio = 10    # throughput-performance default
vm.dirty_ratio=80                 # throughput-performance default 40 -> 80

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

9.4 - CRIT

Tuned CRIT模板,针对金融场景、不允许数据丢失错漏的场景进行优化。

Tuned CRIT模板主要针对RPO进行优化,尽可能减少内存中脏数据的量。

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

# tuned configuration
#==============================================================#
# File      :   tuned.conf
# Mtime     :   2020-06-29
# Desc      :   Tune operatiing system to crit mode
# Path      :   /etc/tuned/crit/tuned.conf
# Author    :   Vonng(fengruohang@outlook.com)
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

[main]
summary=Optimize for PostgreSQL CRIT System
include=network-latency

[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
# disable transparent hugepages
transparent_hugepages=never

[sysctl]
#-------------------------------------------------------------#
#                           KERNEL                            #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0

# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}

# total shmem size in pages:  $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}

# total shmem segs 4096 -> 8192
kernel.shmmni=8192

# total msg queue number, set to mem size in MB
kernel.msgmni=32768

# max length of message queue
kernel.msgmnb=65536

# max size of message
kernel.msgmax=65536

kernel.pid_max=131072

# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536

# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0

# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000

#-------------------------------------------------------------#
#                             VM                              #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0

# disable when most mem are for file cache
vm.zone_reclaim_mode=0

# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=100

# 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_bytes=67108864
# vm.dirty_background_ratio=3       # latency-performance default
vm.dirty_ratio=6                    # latency-performance default

# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536

#-------------------------------------------------------------#
#                        Filesystem                           #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160

# max concurrent unfinished async io, should be larger than 1M.  65536->1M
fs.aio-max-nr=1048576


#-------------------------------------------------------------#
#                          Network                            #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304

# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000

# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1

# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"

# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60

net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000

net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1

# max connection tracking number
net.netfilter.nf_conntrack_max=1048576

10 - Patroni Template

Four pre-defined patroni templates

Pigsty使用Patroni管理与初始化Postgres数据库集群。

Pigsty使用Patroni完成供给的主体工作,即使用户选择了无Patroni模式,拉起数据库集群也会由Patroni负责,并在创建完成后移除Patroni组件。

用户可以通过Patroni配置文件,完成大部分的PostgreSQL集群定制工作,Patroni配置文件格式详情请参考 Patroni官方文档

预定义模板

Pigsty提供了四种预定义的初始化模板,初始化模板是用于初始化数据库集群的定义文件,默认位于roles/postgres/templates/。包括:

  • oltp.yml OLTP模板,默认配置,针对生产机型优化延迟与性能。
  • `olap.yml OLAP模板,提高并行度,针对吞吐量,长查询进行优化。
  • crit.yml) 核心业务模板,基于OLTP模板针对RPO、安全性、数据完整性进行优化,启用同步复制与数据校验和。
  • tiny.yml 微型数据库模板,针对低资源场景进行优化,例如运行于虚拟机中的演示数据库集群。

通过 pg_conf 参数指定所需使用的模板路径,如果使用预制模板,则只需填入模板文件名称即可。

如果使用定制的 Patroni配置模板,通常也应当针对机器节点使用配套的 节点优化模板

更详细的配置信息,请参考 PG供给

10.1 - OLTP

Patroni OLTP模板

Patroni OLTP模板主要针对延迟进行优化,此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (oltp)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Transactional Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# OLTP database are optimized for performance, rt latency
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}

#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'

#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    # - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'
    {{ pg_admin_username }}:
      password: '{{ pg_admin_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # template
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 10

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 1MB
    maximum_lag_on_failover: 1048576

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 30

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: false

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true


      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 400                    # 100 -> 400
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 128          # 64 -> 128
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 8                 # default 8, set to cpu core
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 16                     # 10 -> 16
        max_replication_slots: 16               # 10 -> 16
        wal_keep_size: 100GB                    # keep at least 100GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 32MB                          # 4MB -> 32MB
        huge_pages: try                         # try huge pages
        temp_file_limit: 100GB                  # 0 -> 100GB
        vacuum_cost_delay: 2ms                  # wait 2ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 800              # 100 -> 800
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 1MB             # max allowed data loss
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 60min               # checkpoint 5min -> 1h
        checkpoint_completion_target: 0.95      # 0.5 -> 0.95
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 50000         # 0->50000 last 50000 xact changes will not be vacuumed
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 8
        max_sync_workers_per_subscription: 8    # 4 -> 8

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        # enable_partitionwise_join: on
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 320GB             # max mem - shared buffer
        default_statistics_target: 1000         # stat bucket 100 -> 1000

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 100         # log slow query (>100ms)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 8192         # max query length in pg_stat_activity

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 3               # default autovacuum worker 3
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 10min  # 10min timeout for idle in transaction

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 10000           # 5000 -> 10000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: off   # do not track query other than CRUD
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5

...

10.2 - TINY

Patroni TINY模板

Patroni TINY模板主要针对极低配置的虚拟机进行优化,

此模板针对的典型机型是1核/1GB的虚拟机节点。您可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (tiny)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Tiny Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# TINY database are optimized for low-resource situation (e.g 1 Core 1G)
# typical spec: 1 Core | 1-4 GB RAM | Normal SSD  10x GB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}


#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'


#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # customization
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 10

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 1MB
    maximum_lag_on_failover: 1048576

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 30

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: false

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true


      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 50                     # default 100 -> 50
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 64           # default 64
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 1                 # default 8 -> 1 (set to cpu core)
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 10                     # default 10
        max_replication_slots: 10               # default 10
        wal_keep_size: 1GB                      # keep at least 1GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 4MB                           # default 4MB
        huge_pages: try                         # try huge pages
        temp_file_limit: 40GB                   # 0 -> 40GB (according to your disk)
        vacuum_cost_delay: 5ms                  # wait 5ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 800              # 100 -> 800
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 1MB             # max allowed data loss
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 15min               # checkpoint 5min -> 15min
        checkpoint_completion_target: 0.80      # 0.5 -> 0.8
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 50000         # 0->50000 last 50000 xact changes will not be vacuumed
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 2 (set according to your cpu core)
        max_sync_workers_per_subscription: 8    # 4 -> 2

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        # enable_partitionwise_join: on
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 2GB               # max mem - shared buffer
        default_statistics_target: 200          # stat bucket 100 -> 200

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 100         # log slow query (>100ms)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 8192         # max query length in pg_stat_activity

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 1               # default autovacuum worker 3 -> 1
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 10min  # 10min timeout for idle in transaction

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 3000            # 5000 -> 3000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: off   # do not track query other than CRUD
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5

...

10.3 - OLAP

Patroni OLAP模板,针对高并行,长查询,高吞吐实例优化

Patroni OLAP模板主要针对吞吐量与计算并行度进行优化

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。您可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (olap)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Analysis Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# OLTP database are optimized for throughput
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}

#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'

#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    # - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'
    {{ pg_admin_username }}:
      password: '{{ pg_admin_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # template
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 10

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 16MB (analysis tolerate more data loss)
    maximum_lag_on_failover: 16777216

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 30

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: false

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true

      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 400                    # 100 -> 400
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 256          # 64 -> 256 (analysis)
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 64                # default 8 -> 64, SET THIS ACCORDING TO YOUR CPU CORES
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 16                     # 10 -> 16
        max_replication_slots: 16               # 10 -> 16
        wal_keep_size: 100GB                    # keep at least 100GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 128MB                         # 4MB -> 128MB (analysis)
        huge_pages: try                         # try huge pages
        temp_file_limit: 500GB                  # 0 -> 500GB (analysis)
        vacuum_cost_delay: 2ms                  # wait 2ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 1600             # 100 -> 1600 (analysis)
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer
        max_parallel_workers: 64                # SET THIS ACCORDING TO YOUR CPU CORES
        max_parallel_workers_per_gather: 64     # SET THIS ACCORDING TO YOUR CPU CORES
        max_parallel_maintenance_workers: 4     # 2 -> 4

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 16MB            # max allowed data loss (analysis)
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 60min               # checkpoint 5min -> 1h
        checkpoint_completion_target: 0.95      # 0.5 -> 0.95
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 0             # 0 (default)
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 8
        max_sync_workers_per_subscription: 8    # 4 -> 8

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        enable_partitionwise_join: on           # enable on analysis
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 320GB             # max mem - shared buffer
        default_statistics_target: 1000         # stat bucket 100 -> 1000
        jit: on                                 # default on
        jit_above_cost: 100000                  # default jit threshold

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 1000         # log slow query (>1s)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 8192         # max query length in pg_stat_activity

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 3               # default autovacuum worker 3
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 0  # Disable idle in xact timeout in analysis database

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 10000           # 5000 -> 10000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: off   # do not track query other than CRUD
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5

...

10.4 - CRIT

Patroni CRIT模板,针对金融场景、不允许数据丢失错漏的场景进行优化。

Patroni CRIT模板主要针对RPO进行优化,采用同步复制,发生故障时确保不会有数据丢失。

此模板针对的机型是Dell R740 64核/400GB内存,使用PCI-E SSD的节点。用户可以根据自己的实际机型进行调整。

#!/usr/bin/env patroni
#==============================================================#
# File      :   patroni.yml
# Ctime     :   2020-04-08
# Mtime     :   2020-12-22
# Desc      :   patroni cluster definition for {{ pg_cluster }} (crit)
# Path      :   /pg/bin/patroni.yml
# Real Path :   /pg/conf/{{ pg_instance }}.yml
# Link      :   /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note      :   Critical Database Cluster Template
# Doc       :   https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#

# CRIT database are optimized for security, integrity, RPO
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB

---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/          # namespace
scope: {{ pg_cluster }}                 # cluster name
name: {{ pg_instance }}                 # instance name

#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
  level: INFO                           #  NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
  dir: /pg/log/                         #  default log file: /pg/log/patroni.log
  file_size: 100000000                  #  100MB log triggers a log rotate
  # format: '%(asctime)s %(levelname)s: %(message)s'

#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
  host: 127.0.0.1:8500
  consistency: default         # default|consistent|stale
  register_service: true
  service_check_interval: 15s
  service_tags:
    - {{ pg_cluster }}

#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
  listen: 0.0.0.0:{{ patroni_port }}
  connect_address: {{ inventory_hostname }}:{{ patroni_port }}
  authentication:
    verify_client: none                 # none|optional|required
    username: {{ pg_monitor_username }}
    password: '{{ pg_monitor_password }}'

#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
  optional:
    insecure: true
    # cacert: '/path/to/ca/cert'
    # certfile: '/path/to/cert/file'
    # keyfile: '/path/to/key/file'

#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
  nofailover: false
  clonefrom: true
  noloadbalance: false
  nosync: false
{% if pg_upstream is defined %}
  replicatefrom: {{ pg_upstream }}    # clone from another replica rather than primary
{% endif %}

#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
  mode: {{ patroni_watchdog_mode }}
  device: /dev/watchdog
  # safety_margin: 10s

#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:

  #----------------------------------------------------------------------------
  # bootstrap method
  #----------------------------------------------------------------------------
  method: initdb
  # add custom bootstrap method here

  # default bootstrap method: initdb
  initdb:
    - locale: C
    - encoding: UTF8
    # - data-checksums    # enable data-checksum


  #----------------------------------------------------------------------------
  # bootstrap users
  #---------------------------------------------------------------------------
  # additional users which need to be created after initializing new cluster
  # replication user and monitor user are required
  users:
    {{ pg_replication_username }}:
      password: '{{ pg_replication_password }}'
    {{ pg_monitor_username }}:
      password: '{{ pg_monitor_password }}'
    {{ pg_admin_username }}:
      password: '{{ pg_admin_password }}'

  # bootstrap hba, allow local and intranet password access & replication
  # will be overwritten later
  pg_hba:
    - local   all             postgres                                ident
    - local   all             all                                     md5
    - host    all             all            0.0.0.0/0                md5
    - local   replication     postgres                                ident
    - local   replication     all                                     md5
    - host    replication     all            0.0.0.0/0                md5


  #----------------------------------------------------------------------------
  # template
  #---------------------------------------------------------------------------
  # post_init: /pg/bin/pg-init

  #----------------------------------------------------------------------------
  # bootstrap config
  #---------------------------------------------------------------------------
  # this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
  # if will NOT take any effect after cluster bootstrap
  dcs:

{% if pg_role == 'primary' and pg_upstream is defined %}
    #----------------------------------------------------------------------------
    # standby cluster definition
    #---------------------------------------------------------------------------
    standby_cluster:
      host: {{ pg_upstream }}
      port: {{ pg_port }}
      # primary_slot_name: patroni     # must be create manually on upstream server, if specified
      create_replica_methods:
        - basebackup
{% endif %}

    #----------------------------------------------------------------------------
    # important parameters
    #---------------------------------------------------------------------------
    # constraint: ttl >: loop_wait + retry_timeout * 2

    # the number of seconds the loop will sleep. Default value: 10
    # this is patroni check loop interval
    loop_wait: 10

    # the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
    # config this according to your network condition to avoid false-positive failover
    ttl: 30

    # timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
    retry_timeout: 10

    # the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
    # Max RTO: 2 loop wait + master_start_timeout
    master_start_timeout: 120   # more patient on critical database

    # import: candidate will not be promoted if replication lag is higher than this
    # maximum RPO: 0 for critical database
    maximum_lag_on_failover: 1

    # The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
    master_stop_timeout: 10   # more patient on critical database

    # turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
    # set to true for RPO mode
    synchronous_mode: true  # use sync replication on critical database

    # prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
    synchronous_mode_strict: false


    #----------------------------------------------------------------------------
    # postgres parameters
    #---------------------------------------------------------------------------
    postgresql:
      use_slots: true
      use_pg_rewind: true
      remove_data_directory_on_rewind_failure: true


      parameters:
        #----------------------------------------------------------------------
        # IMPORTANT PARAMETERS
        #----------------------------------------------------------------------
        max_connections: 400                    # 100 -> 400
        superuser_reserved_connections: 10      # reserve 10 connection for su
        max_locks_per_transaction: 128          # 64 -> 128
        max_prepared_transactions: 0            # 0 disable 2PC
        track_commit_timestamp: on              # enabled xact timestamp
        max_worker_processes: 8                 # default 8, set to cpu core
        wal_level: logical                      # logical
        wal_log_hints: on                       # wal log hints to support rewind
        max_wal_senders: 16                     # 10 -> 16
        max_replication_slots: 16               # 10 -> 16
        wal_keep_size: 100GB                    # keep at least 100GB WAL
        password_encryption: md5                # use traditional md5 auth

        #----------------------------------------------------------------------
        # RESOURCE USAGE (except WAL)
        #----------------------------------------------------------------------
        # memory: shared_buffers and maintenance_work_mem will be dynamically set
        shared_buffers: {{ pg_shared_buffers }}
        maintenance_work_mem: {{ pg_maintenance_work_mem }}
        work_mem: 32MB                          # 4MB -> 32MB
        huge_pages: try                         # try huge pages
        temp_file_limit: 100GB                  # 0 -> 100GB
        vacuum_cost_delay: 2ms                  # wait 2ms per 10000 cost
        vacuum_cost_limit: 10000                # 10000 cost each round
        bgwriter_delay: 10ms                    # check dirty page every 10ms
        bgwriter_lru_maxpages: 800              # 100 -> 800
        bgwriter_lru_multiplier: 5.0            # 2.0 -> 5.0  more cushion buffer

        #----------------------------------------------------------------------
        # WAL
        #----------------------------------------------------------------------
        wal_buffers: 16MB                       # max to 16MB
        wal_writer_delay: 20ms                  # wait period
        wal_writer_flush_after: 1MB             # max allowed data loss
        min_wal_size: 100GB                     # at least 100GB WAL
        max_wal_size: 400GB                     # at most 400GB WAL
        commit_delay: 20                        # 200ms -> 20ms, increase speed
        commit_siblings: 10                     # 5 -> 10
        checkpoint_timeout: 60min               # checkpoint 5min -> 1h
        checkpoint_completion_target: 0.95      # 0.5 -> 0.95
        archive_mode: on
        archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'

        #----------------------------------------------------------------------
        # REPLICATION
        #----------------------------------------------------------------------
        # synchronous_standby_names: ''
        vacuum_defer_cleanup_age: 50000         # 0->50000 last 50000 xact changes will not be vacuumed
        promote_trigger_file: promote.signal    # default promote trigger file path
        max_standby_archive_delay: 10min        # max delay before canceling queries when reading WAL from archive;
        max_standby_streaming_delay: 3min       # max delay before canceling queries when reading streaming WAL;
        wal_receiver_status_interval: 1s        # send replies at least this often
        hot_standby_feedback: on                # send info from standby to prevent query conflicts
        wal_receiver_timeout: 60s               # time that receiver waits for
        max_logical_replication_workers: 8      # 4 -> 8
        max_sync_workers_per_subscription: 8    # 4 -> 8

        #----------------------------------------------------------------------
        # QUERY TUNING
        #----------------------------------------------------------------------
        # planner
        # enable_partitionwise_join: on
        random_page_cost: 1.1                   # 4 for HDD, 1.1 for SSD
        effective_cache_size: 320GB             # max mem - shared buffer
        default_statistics_target: 1000         # stat bucket 100 -> 1000

        #----------------------------------------------------------------------
        # REPORTING AND LOGGING
        #----------------------------------------------------------------------
        log_destination: csvlog                 # use standard csv log
        logging_collector: on                   # enable csvlog
        log_directory: log                      # default log dir: /pg/data/log
        # log_filename: 'postgresql-%a.log'     # weekly auto-recycle
        log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
        log_checkpoints: on                     # log checkpoint info
        log_lock_waits: on                      # log lock wait info
        log_replication_commands: on            # log replication info
        log_statement: ddl                      # log ddl change
        log_min_duration_statement: 100         # log slow query (>100ms)

        #----------------------------------------------------------------------
        # STATISTICS
        #----------------------------------------------------------------------
        track_io_timing: on                     # collect io statistics
        track_functions: all                    # track all functions (none|pl|all)
        track_activity_query_size: 32768        # show full query on critical database

        #----------------------------------------------------------------------
        # AUTOVACUUM
        #----------------------------------------------------------------------
        log_autovacuum_min_duration: 1s         # log autovacuum activity take more than 1s
        autovacuum_max_workers: 3               # default autovacuum worker 3
        autovacuum_naptime: 1min                # default autovacuum naptime 1min
        autovacuum_vacuum_scale_factor: 0.08    # fraction of table size before vacuum   20% -> 8%
        autovacuum_analyze_scale_factor: 0.04   # fraction of table size before analyze  10% -> 4%
        autovacuum_vacuum_cost_delay: -1        # default vacuum cost delay: same as vacuum_cost_delay
        autovacuum_vacuum_cost_limit: -1        # default vacuum cost limit: same as vacuum_cost_limit
        autovacuum_freeze_max_age: 100000000    # age > 1 billion triggers force vacuum

        #----------------------------------------------------------------------
        # CLIENT
        #----------------------------------------------------------------------
        deadlock_timeout: 50ms                  # 50ms for deadlock
        idle_in_transaction_session_timeout: 1min  # 1min timeout for idle in transaction in critical database

        #----------------------------------------------------------------------
        # CUSTOMIZED OPTIONS
        #----------------------------------------------------------------------
        # extensions
        shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'

        # auto_explain
        auto_explain.log_min_duration: 1s       # auto explain query slower than 1s
        auto_explain.log_analyze: true          # explain analyze
        auto_explain.log_verbose: true          # explain verbose
        auto_explain.log_timing: true           # explain timing
        auto_explain.log_nested_statements: true

        # pg_stat_statements
        pg_stat_statements.max: 10000           # 5000 -> 10000 queries
        pg_stat_statements.track: all           # track all statements (all|top|none)
        pg_stat_statements.track_utility: on    # TRACK all queries on critical database
        pg_stat_statements.track_planning: off  # do not track planning metrics


#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:

  #----------------------------------------------------------------------------
  # how to connect to postgres
  #----------------------------------------------------------------------------
  bin_dir: {{ pg_bin_dir }}
  data_dir: {{ pg_data }}
  config_dir: {{ pg_data }}
  pgpass: {{ pg_dbsu_home }}/.pgpass
  listen: {{ pg_listen }}:{{ pg_port }}
  connect_address: {{ inventory_hostname }}:{{ pg_port }}
  use_unix_socket: true # default: /var/run/postgresql, /tmp

  #----------------------------------------------------------------------------
  # who to connect to postgres
  #----------------------------------------------------------------------------
  authentication:
    superuser:
      username: {{ pg_dbsu }}
    replication:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'
    rewind:
      username: {{ pg_replication_username }}
      password: '{{ pg_replication_password }}'

  #----------------------------------------------------------------------------
  # how to react to database operations
  #----------------------------------------------------------------------------
  # event callback script log: /pg/log/callback.log
  callbacks:
    on_start: /pg/bin/pg-failover-callback
    on_stop: /pg/bin/pg-failover-callback
    on_reload: /pg/bin/pg-failover-callback
    on_restart: /pg/bin/pg-failover-callback
    on_role_change: /pg/bin/pg-failover-callback

  # rewind policy: data checksum should be enabled before using rewind
  use_pg_rewind: true
  remove_data_directory_on_rewind_failure: true
  remove_data_directory_on_diverged_timelines: false

  #----------------------------------------------------------------------------
  # how to create replica
  #----------------------------------------------------------------------------
  # create replica method: default pg_basebackup
  create_replica_methods:
    - basebackup
  basebackup:
    - max-rate: '1000M'
    - checkpoint: fast
    - status-interva: 1s
    - verbose
    - progress

  #----------------------------------------------------------------------------
  # ad hoc parameters (overwrite with default)
  #----------------------------------------------------------------------------
  # parameters:

  #----------------------------------------------------------------------------
  # host based authentication, overwrite default pg_hba.conf
  #----------------------------------------------------------------------------
  # pg_hba:
  #   - local   all             postgres                                ident
  #   - local   all             all                                     md5
  #   - host    all             all            0.0.0.0/0                md5
  #   - local   replication     postgres                                ident
  #   - local   replication     all                                     md5
  #   - host    replication     all            0.0.0.0/0                md5
...