Pigsty内置了基本的数据库基础设施,用户可以通过参数定制所需的数据库、用户、模式、默认权限、扩展插件。或者提供自定义脚本作为template1
或业务数据库的基线。Pigsty的定制选项已经足以覆盖绝大多数场景下的数据库定制需求。
定制
- 定制初始化脚本
- 定制操作系统优化方案
- 定制数据库模板
- 定制访问控制系统
- 定制Patroni模板
- 定制自定义基线脚本
Pigsty内置了基本的数据库基础设施,用户可以通过参数定制所需的数据库、用户、模式、默认权限、扩展插件。或者提供自定义脚本作为template1
或业务数据库的基线。Pigsty的定制选项已经足以覆盖绝大多数场景下的数据库定制需求。
Pigsty使用Patroni管理与初始化Postgres数据库集群。Pigsty使用Patroni完成供给的主体工作,即使您选择了无Patroni模式,拉起数据库集群也会由Patroni负责,并在创建完成后移除Patroni组件。
您可以通过Patroni配置文件,完成大部分的PostgreSQL集群定制工作,Patroni配置文件格式详情请参考 Patroni官方文档。
Pigsty提供了四种预定义的初始化模板,初始化模板是用于初始化数据库集群的定义文件,默认位于roles/postgres/templates/
。包括:
oltp.yml
OLTP模板,默认配置,针对生产机型优化延迟与性能。olap.yml
OLAP模板,提高并行度,针对吞吐量,长查询进行优化。crit.yml
) 核心业务模板,基于OLTP模板针对RPO、安全性、数据完整性进行优化,启用同步复制与数据校验和。tiny.yml
微型数据库模板,针对低资源场景进行优化,例如运行于虚拟机中的演示数据库集群。通过pg_conf
参数指定所需使用的模板路径,如果使用预制模板,则只需填入模板文件名称即可。
如果您使用了定制的Patroni配置模板,通常也应当针对机器节点使用对应的基础设施模板。
更详细的配置信息,请参考PG供给参数
#!/usr/bin/env patroni
#==============================================================#
# File : patroni.yml
# Ctime : 2020-04-08
# Mtime : 2020-12-22
# Desc : patroni cluster definition for {{ pg_cluster }} (oltp)
# Path : /pg/bin/patroni.yml
# Real Path : /pg/conf/{{ pg_instance }}.yml
# Link : /pg/bin/patroni.yml -> /pg/conf/{{ pg_instance}}.yml
# Note : Transactional Database Cluster Template
# Doc : https://patroni.readthedocs.io/en/latest/SETTINGS.html
# Copyright (C) 2018-2021 Ruohang Feng
#==============================================================#
# OLTP database are optimized for performance, rt latency
# typical spec: 64 Core | 400 GB RAM | PCI-E SSD xTB
---
#------------------------------------------------------------------------------
# identity
#------------------------------------------------------------------------------
namespace: {{ pg_namespace }}/ # namespace
scope: {{ pg_cluster }} # cluster name
name: {{ pg_instance }} # instance name
#------------------------------------------------------------------------------
# log
#------------------------------------------------------------------------------
log:
level: INFO # NOTEST|DEBUG|INFO|WARNING|ERROR|CRITICAL
dir: /pg/log/ # default log file: /pg/log/patroni.log
file_size: 100000000 # 100MB log triggers a log rotate
# format: '%(asctime)s %(levelname)s: %(message)s'
#------------------------------------------------------------------------------
# dcs
#------------------------------------------------------------------------------
consul:
host: 127.0.0.1:8500
consistency: default # default|consistent|stale
register_service: true
service_check_interval: 15s
service_tags:
- {{ pg_cluster }}
#------------------------------------------------------------------------------
# api
#------------------------------------------------------------------------------
# how to expose patroni service
# listen on all ipv4, connect via public ip, use same credential as dbuser_monitor
restapi:
listen: 0.0.0.0:{{ patroni_port }}
connect_address: {{ inventory_hostname }}:{{ patroni_port }}
authentication:
verify_client: none # none|optional|required
username: {{ pg_monitor_username }}
password: '{{ pg_monitor_password }}'
#------------------------------------------------------------------------------
# ctl
#------------------------------------------------------------------------------
ctl:
optional:
insecure: true
# cacert: '/path/to/ca/cert'
# certfile: '/path/to/cert/file'
# keyfile: '/path/to/key/file'
#------------------------------------------------------------------------------
# tags
#------------------------------------------------------------------------------
tags:
nofailover: false
clonefrom: true
noloadbalance: false
nosync: false
{% if pg_upstream is defined %}
replicatefrom: {{ pg_upstream }} # clone from another replica rather than primary
{% endif %}
#------------------------------------------------------------------------------
# watchdog
#------------------------------------------------------------------------------
# available mode: off|automatic|required
watchdog:
mode: {{ patroni_watchdog_mode }}
device: /dev/watchdog
# safety_margin: 10s
#------------------------------------------------------------------------------
# bootstrap
#------------------------------------------------------------------------------
bootstrap:
#----------------------------------------------------------------------------
# bootstrap method
#----------------------------------------------------------------------------
method: initdb
# add custom bootstrap method here
# default bootstrap method: initdb
initdb:
- locale: C
- encoding: UTF8
# - data-checksums # enable data-checksum
#----------------------------------------------------------------------------
# bootstrap users
#---------------------------------------------------------------------------
# additional users which need to be created after initializing new cluster
# replication user and monitor user are required
users:
{{ pg_replication_username }}:
password: '{{ pg_replication_password }}'
{{ pg_monitor_username }}:
password: '{{ pg_monitor_password }}'
{{ pg_admin_username }}:
password: '{{ pg_admin_password }}'
# bootstrap hba, allow local and intranet password access & replication
# will be overwritten later
pg_hba:
- local all postgres ident
- local all all md5
- host all all 0.0.0.0/0 md5
- local replication postgres ident
- local replication all md5
- host replication all 0.0.0.0/0 md5
#----------------------------------------------------------------------------
# template
#---------------------------------------------------------------------------
# post_init: /pg/bin/pg-init
#----------------------------------------------------------------------------
# bootstrap config
#---------------------------------------------------------------------------
# this section will be written to /{{ pg_namespace }}/{{ pg_cluster }}/config
# if will NOT take any effect after cluster bootstrap
dcs:
{% if pg_role == 'primary' and pg_upstream is defined %}
#----------------------------------------------------------------------------
# standby cluster definition
#---------------------------------------------------------------------------
standby_cluster:
host: {{ pg_upstream }}
port: {{ pg_port }}
# primary_slot_name: patroni # must be create manually on upstream server, if specified
create_replica_methods:
- basebackup
{% endif %}
#----------------------------------------------------------------------------
# important parameters
#---------------------------------------------------------------------------
# constraint: ttl >: loop_wait + retry_timeout * 2
# the number of seconds the loop will sleep. Default value: 10
# this is patroni check loop interval
loop_wait: 10
# the TTL to acquire the leader lock (in seconds). Think of it as the length of time before initiation of the automatic failover process. Default value: 30
# config this according to your network condition to avoid false-positive failover
ttl: 30
# timeout for DCS and PostgreSQL operation retries (in seconds). DCS or network issues shorter than this will not cause Patroni to demote the leader. Default value: 10
retry_timeout: 10
# the amount of time a master is allowed to recover from failures before failover is triggered (in seconds)
# Max RTO: 2 loop wait + master_start_timeout
master_start_timeout: 10
# import: candidate will not be promoted if replication lag is higher than this
# maximum RPO: 1MB
maximum_lag_on_failover: 1048576
# The number of seconds Patroni is allowed to wait when stopping Postgres and effective only when synchronous_mode is enabled
master_stop_timeout: 30
# turns on synchronous replication mode. In this mode a replica will be chosen as synchronous and only the latest leader and synchronous replica are able to participate in leader election
# set to true for RPO mode
synchronous_mode: false
# prevents disabling synchronous replication if no synchronous replicas are available, blocking all client writes to the master
synchronous_mode_strict: false
#----------------------------------------------------------------------------
# postgres parameters
#---------------------------------------------------------------------------
postgresql:
use_slots: true
use_pg_rewind: true
remove_data_directory_on_rewind_failure: true
parameters:
#----------------------------------------------------------------------
# IMPORTANT PARAMETERS
#----------------------------------------------------------------------
max_connections: 400 # 100 -> 400
superuser_reserved_connections: 10 # reserve 10 connection for su
max_locks_per_transaction: 128 # 64 -> 128
max_prepared_transactions: 0 # 0 disable 2PC
track_commit_timestamp: on # enabled xact timestamp
max_worker_processes: 8 # default 8, set to cpu core
wal_level: logical # logical
wal_log_hints: on # wal log hints to support rewind
max_wal_senders: 16 # 10 -> 16
max_replication_slots: 16 # 10 -> 16
wal_keep_size: 100GB # keep at least 100GB WAL
password_encryption: md5 # use traditional md5 auth
#----------------------------------------------------------------------
# RESOURCE USAGE (except WAL)
#----------------------------------------------------------------------
# memory: shared_buffers and maintenance_work_mem will be dynamically set
shared_buffers: {{ pg_shared_buffers }}
maintenance_work_mem: {{ pg_maintenance_work_mem }}
work_mem: 32MB # 4MB -> 32MB
huge_pages: try # try huge pages
temp_file_limit: 100GB # 0 -> 100GB
vacuum_cost_delay: 2ms # wait 2ms per 10000 cost
vacuum_cost_limit: 10000 # 10000 cost each round
bgwriter_delay: 10ms # check dirty page every 10ms
bgwriter_lru_maxpages: 800 # 100 -> 800
bgwriter_lru_multiplier: 5.0 # 2.0 -> 5.0 more cushion buffer
#----------------------------------------------------------------------
# WAL
#----------------------------------------------------------------------
wal_buffers: 16MB # max to 16MB
wal_writer_delay: 20ms # wait period
wal_writer_flush_after: 1MB # max allowed data loss
min_wal_size: 100GB # at least 100GB WAL
max_wal_size: 400GB # at most 400GB WAL
commit_delay: 20 # 200ms -> 20ms, increase speed
commit_siblings: 10 # 5 -> 10
checkpoint_timeout: 60min # checkpoint 5min -> 1h
checkpoint_completion_target: 0.95 # 0.5 -> 0.95
archive_mode: on
archive_command: 'wal_dir=/pg/arcwal; [[ $(date +%H%M) == 1200 ]] && rm -rf ${wal_dir}/$(date -d"yesterday" +%Y%m%d); /bin/mkdir -p ${wal_dir}/$(date +%Y%m%d) && /usr/bin/lz4 -q -z %p > ${wal_dir}/$(date +%Y%m%d)/%f.lz4'
#----------------------------------------------------------------------
# REPLICATION
#----------------------------------------------------------------------
# synchronous_standby_names: ''
vacuum_defer_cleanup_age: 50000 # 0->50000 last 50000 xact changes will not be vacuumed
promote_trigger_file: promote.signal # default promote trigger file path
max_standby_archive_delay: 10min # max delay before canceling queries when reading WAL from archive;
max_standby_streaming_delay: 3min # max delay before canceling queries when reading streaming WAL;
wal_receiver_status_interval: 1s # send replies at least this often
hot_standby_feedback: on # send info from standby to prevent query conflicts
wal_receiver_timeout: 60s # time that receiver waits for
max_logical_replication_workers: 8 # 4 -> 8
max_sync_workers_per_subscription: 8 # 4 -> 8
#----------------------------------------------------------------------
# QUERY TUNING
#----------------------------------------------------------------------
# planner
# enable_partitionwise_join: on
random_page_cost: 1.1 # 4 for HDD, 1.1 for SSD
effective_cache_size: 320GB # max mem - shared buffer
default_statistics_target: 1000 # stat bucket 100 -> 1000
#----------------------------------------------------------------------
# REPORTING AND LOGGING
#----------------------------------------------------------------------
log_destination: csvlog # use standard csv log
logging_collector: on # enable csvlog
log_directory: log # default log dir: /pg/data/log
# log_filename: 'postgresql-%a.log' # weekly auto-recycle
log_filename: 'postgresql-%Y-%m-%d.log' # YYYY-MM-DD full log retention
log_checkpoints: on # log checkpoint info
log_lock_waits: on # log lock wait info
log_replication_commands: on # log replication info
log_statement: ddl # log ddl change
log_min_duration_statement: 100 # log slow query (>100ms)
#----------------------------------------------------------------------
# STATISTICS
#----------------------------------------------------------------------
track_io_timing: on # collect io statistics
track_functions: all # track all functions (none|pl|all)
track_activity_query_size: 8192 # max query length in pg_stat_activity
#----------------------------------------------------------------------
# AUTOVACUUM
#----------------------------------------------------------------------
log_autovacuum_min_duration: 1s # log autovacuum activity take more than 1s
autovacuum_max_workers: 3 # default autovacuum worker 3
autovacuum_naptime: 1min # default autovacuum naptime 1min
autovacuum_vacuum_scale_factor: 0.08 # fraction of table size before vacuum 20% -> 8%
autovacuum_analyze_scale_factor: 0.04 # fraction of table size before analyze 10% -> 4%
autovacuum_vacuum_cost_delay: -1 # default vacuum cost delay: same as vacuum_cost_delay
autovacuum_vacuum_cost_limit: -1 # default vacuum cost limit: same as vacuum_cost_limit
autovacuum_freeze_max_age: 100000000 # age > 1 billion triggers force vacuum
#----------------------------------------------------------------------
# CLIENT
#----------------------------------------------------------------------
deadlock_timeout: 50ms # 50ms for deadlock
idle_in_transaction_session_timeout: 10min # 10min timeout for idle in transaction
#----------------------------------------------------------------------
# CUSTOMIZED OPTIONS
#----------------------------------------------------------------------
# extensions
shared_preload_libraries: '{{ pg_shared_libraries | default("pg_stat_statements, auto_explain") }}'
# auto_explain
auto_explain.log_min_duration: 1s # auto explain query slower than 1s
auto_explain.log_analyze: true # explain analyze
auto_explain.log_verbose: true # explain verbose
auto_explain.log_timing: true # explain timing
auto_explain.log_nested_statements: true
# pg_stat_statements
pg_stat_statements.max: 10000 # 5000 -> 10000 queries
pg_stat_statements.track: all # track all statements (all|top|none)
pg_stat_statements.track_utility: off # do not track query other than CRUD
pg_stat_statements.track_planning: off # do not track planning metrics
#------------------------------------------------------------------------------
# postgres
#------------------------------------------------------------------------------
postgresql:
#----------------------------------------------------------------------------
# how to connect to postgres
#----------------------------------------------------------------------------
bin_dir: {{ pg_bin_dir }}
data_dir: {{ pg_data }}
config_dir: {{ pg_data }}
pgpass: {{ pg_dbsu_home }}/.pgpass
listen: {{ pg_listen }}:{{ pg_port }}
connect_address: {{ inventory_hostname }}:{{ pg_port }}
use_unix_socket: true # default: /var/run/postgresql, /tmp
#----------------------------------------------------------------------------
# who to connect to postgres
#----------------------------------------------------------------------------
authentication:
superuser:
username: {{ pg_dbsu }}
replication:
username: {{ pg_replication_username }}
password: '{{ pg_replication_password }}'
rewind:
username: {{ pg_replication_username }}
password: '{{ pg_replication_password }}'
#----------------------------------------------------------------------------
# how to react to database operations
#----------------------------------------------------------------------------
# event callback script log: /pg/log/callback.log
callbacks:
on_start: /pg/bin/pg-failover-callback
on_stop: /pg/bin/pg-failover-callback
on_reload: /pg/bin/pg-failover-callback
on_restart: /pg/bin/pg-failover-callback
on_role_change: /pg/bin/pg-failover-callback
# rewind policy: data checksum should be enabled before using rewind
use_pg_rewind: true
remove_data_directory_on_rewind_failure: true
remove_data_directory_on_diverged_timelines: false
#----------------------------------------------------------------------------
# how to create replica
#----------------------------------------------------------------------------
# create replica method: default pg_basebackup
create_replica_methods:
- basebackup
basebackup:
- max-rate: '1000M'
- checkpoint: fast
- status-interva: 1s
- verbose
- progress
#----------------------------------------------------------------------------
# ad hoc parameters (overwrite with default)
#----------------------------------------------------------------------------
# parameters:
...
您可以使用以下参数,对集群中的模板数据库template1
进行定制。
pg_init
pg_replication_username
pg_replication_password
pg_monitor_username
pg_monitor_password
pg_admin_username
pg_admin_password
pg_default_roles
pg_default_privilegs
pg_default_schemas
pg_default_extensions
pg_hba_rules
pg_hba_rules_extra
pgbouncer_hba_rules
pgbouncer_hba_rules_extra
详情参考:PG模板参数
pg-init
是用于自定义初始化模板的Shell脚本路径,该脚本将以postgres用户身份,仅在主库上执行,执行时数据库集群主库已经被拉起,可以执行任意Shell命令,或通过psql执行任意SQL命令。
如果不指定自定义的pg-init
,Pigsty会使用默认的pg-init
脚本,如下所示:
#!/usr/bin/env bash
set -uo pipefail
#==================================================================#
# System Template #
#==================================================================#
# system default template
psql template1 -qAXwtf /pg/tmp/pg-init-template.sql
# make postgres same as templated database (optional)
psql postgres -qAXwtf /pg/tmp/pg-init-template.sql
#==================================================================#
# Customize Logic #
#==================================================================#
# add your template logic here
如果您需要执行定制逻辑,建议在该脚本的基础上进行追加。
pg-init-template.sql 是用于初始化template1
数据的脚本模板
Pigsty强烈建议通过提供自定义的pg-init
脚本完成复杂的定制,如无必要,尽量不要改动pg-init-business
中的原有逻辑。
----------------------------------------------------------------------
-- File : pg-init-template.sql
-- Ctime : 2018-10-30
-- Mtime : 2020-12-17
-- Desc : init postgres cluster template schema
-- Path : /pg/tmp/pg-init-template.sql
-- Author : Vonng(fengruohang@outlook.com)
-- Copyright (C) 2018-2021 Ruohang Feng
----------------------------------------------------------------------
--==================================================================--
-- executions --
--==================================================================--
-- psql template1 -AXtwqf /pg/tmp/pg-init-template.sql
-- this sql scripts is responsible for post-init procedure
-- it will
-- * create system users such as replicator, monitor user, admin user
-- * create system default role system
-- * create schema, extensions in template1 database
-- * create monitor views in template1 database
--==================================================================--
-- Roles --
--==================================================================--
-- default roles
{% for user in pg_default_roles %}
CREATE ROLE "{{ user.username }}";
{% endfor %}
-- system users
CREATE USER "{{ pg_replication_username }}";
CREATE USER "{{ pg_monitor_username }}";
CREATE USER "{{ pg_admin_username }}";
{% for user in pg_default_roles %}
--------------------------
-- {{ user.username }}
--------------------------
{% if 'password' in user %}
{% if user.password == '' %}ALTER ROLE "{{ user.username }}" PASSWORD NULL;
{% else %}ALTER ROLE "{{ user.username }}" PASSWORD '{{ user.password }}';{% endif %}
{% endif %}
{% if 'options' in user %}ALTER ROLE "{{ user.username }}" {{ user.options }};{% endif %}
{% if 'comment' in user %}COMMENT ON ROLE "{{ user.username }}" IS '{{ user.comment }}';{% endif %}
{% if 'groups' in user %}
{% for group in user.groups %}
GRANT "{{ group }}" TO "{{ user.username }}";
{% endfor %}
{% endif %}
{% endfor %}
-- enforce password overwrite for system users
ALTER ROLE "{{ pg_replication_username }}" PASSWORD '{{ pg_replication_password }}';
ALTER ROLE "{{ pg_monitor_username }}" PASSWORD '{{ pg_monitor_password }}';
ALTER ROLE "{{ pg_admin_username }}" PASSWORD '{{ pg_admin_password }}';
--==================================================================--
-- Default Privileges --
--==================================================================--
{% for priv in pg_default_privilegs %}
ALTER DEFAULT PRIVILEGES FOR ROLE {{ pg_dbsu }} {{ priv }};
{% endfor %}
{% for priv in pg_default_privilegs %}
ALTER DEFAULT PRIVILEGES FOR ROLE {{ pg_admin_username }} {{ priv }};
{% endfor %}
-- for business admin, they can set role to dbrole_admin
{% for priv in pg_default_privilegs %}
ALTER DEFAULT PRIVILEGES FOR ROLE "dbrole_admin" {{ priv }};
{% endfor %}
--==================================================================--
-- Schemas --
--==================================================================--
{% for schema_name in pg_default_schemas %}
CREATE SCHEMA IF NOT EXISTS "{{ schema_name }}";
{% endfor %}
-- revoke public creation
REVOKE CREATE ON SCHEMA public FROM PUBLIC;
--==================================================================--
-- Extensions --
--==================================================================--
{% for extension in pg_default_extensions %}
CREATE EXTENSION IF NOT EXISTS "{{ extension.name }}"{% if 'schema' in extension %}WITH SCHEMA "{{ extension.schema }}"{% endif %};
{% endfor %}
--==================================================================--
-- Monitor Views --
--==================================================================--
-- ....
您可以通过pg_users
与pg_databases
为数据库集群创建新的业务用户与业务数据库。
请尽可能通过声明的方式创建业务用户与业务数据库,而不是在数据库中手工创建。因为业务用户与业务数据库需要同时在数据库与连接池中进行变更。
详情请参考:PG模板参数
#------------------------------------------------------------------------------
# BUSINESS TEMPLATE
#------------------------------------------------------------------------------
# - business - #
# users that are ad hoc to each cluster
pg_users:
- username: dbuser_test
password: DBUser.Test
options: LOGIN NOINHERIT
comment: business read-write user
groups: [dbrole_readwrite]
pg_databases: # additional business database
- name: test # one object for each database
owner: dbuser_test
schemas: [monitor, public]
extensions: [{name: "postgis", schema: "public"}]
parameters:
search_path: 'yay,public,monitor' # set default search path
该模块的实现原理是:
这里User与DB的声明式定义将由模板 pg-init-business.sql
渲染成幂等的SQL脚本并执行。
/pg/tmp/pg-init-business.sql
如果您希望执行自定义的数据库初始化动作,可以考虑在pg_database.<database>.baseline
中指定自定义SQL脚本的路径。该脚本将用于业务数据库的初始化。
您可以通过修改pg-init-business
的方式完成对数据库的额外定制工作
Pigsty强烈建议通过提供自定义的pg-init
脚本完成复杂的定制,如无必要,尽量不要改动pg-init-business
中的原有逻辑。
----------------------------------------------------------------------
-- File : pg-init-business.sql
-- Ctime : 2020-12-21
-- Mtime : 2020-12-21
-- Desc : business schema baseline
-- Path : /pg/tmp/pg-init-business.sql
-- Author : Vonng(fengruohang@outlook.com)
-- Copyright (C) 2018-2021 Ruohang Feng
----------------------------------------------------------------------
--==================================================================--
-- executions --
--==================================================================--
-- psql template1 -AXtwqf /pg/tmp/pg-init-business.sql
-- this sql scripts is responsible for create business roles and databases
--==================================================================--
-- Users --
--==================================================================--
-- default roles
{% for user in pg_users %}
CREATE USER "{{ user.username }}";
{% endfor %}
{% for user in pg_users %}
--------------------------
-- {{ user.username }}
--------------------------
{% if 'password' in user %}
{% if user.password == '' %}ALTER ROLE "{{ user.username }}" PASSWORD NULL;
{% else %}ALTER ROLE "{{ user.username }}" PASSWORD '{{ user.password }}';{% endif %}
{% endif %}
{% if 'options' in user %}ALTER ROLE "{{ user.username }}" {{ user.options }};{% endif %}
{% if 'comment' in user %}COMMENT ON ROLE "{{ user.username }}" IS '{{ user.comment }}';{% endif %}
{% if 'groups' in user %}
{% for group in user.groups %}
GRANT "{{ group }}" TO "{{ user.username }}";
{% endfor %}
{% endif %}
{% endfor %}
--==================================================================--
-- Databases --
--==================================================================--
{% for database in pg_databases %}
CREATE DATABASE "{{ database.name }}";
-- admin role have create privilege
REVOKE CREATE ON DATABASE "{{ database.name }}" FROM PUBLIC;
GRANT CREATE ON DATABASE "{{ database.name }}" TO "dbrole_admin";
-- if owner is set, revoke public connect privilege
{% if 'owner' in database %}
-- setup owner
ALTER DATABASE "{{ database.name }}" OWNER TO {{ database.owner }};
-- revoke public connect
REVOKE CONNECT ON DATABASE "{{ database.name }}" FROM PUBLIC;
-- replicator, monitor have connect privilege
GRANT CONNECT ON DATABASE "{{ database.name }}" TO "{{ pg_replication_username }}";
GRANT CONNECT ON DATABASE "{{ database.name }}" TO "{{ pg_monitor_username }}";
-- admin and dbowner have connect privilege with grant option
GRANT CONNECT ON DATABASE "{{ database.name }}" TO "{{ pg_admin_username }}" WITH GRANT OPTION;
GRANT CONNECT ON DATABASE "{{ database.name }}" TO "{{ database.owner }}" WITH GRANT OPTION;
{% endif %}
{% endfor %}
{% for database in pg_databases %}
--------------------------
-- database: {{ database.name }}
--------------------------
-- connect to database {{ database.name }}
\c {{ database.name }}
-- create schemas
{% if 'schemas' in database %}{% for schema_name in database.schemas %}
CREATE SCHEMA IF NOT EXISTS "{{ schema_name }}";
{% endfor %}{% endif %}
-- revoke public schema creation
REVOKE CREATE ON SCHEMA public FROM PUBLIC;
GRANT CREATE ON SCHEMA public TO "dbrole_admin"; -- admin can create objects
-- create extensions
{% if 'extensions' in database %}{% for extension in database.extensions %}
CREATE EXTENSION IF NOT EXISTS "{{ extension.name }}"{% if 'schema' in extension %}WITH SCHEMA "{{ extension.schema }}"{% endif %};
{% endfor %}{% endif %}
-- alter databaes parameters
{% if 'parameters' in database %}{% for key, value in database.parameters.items() %}
ALTER DATABASE "{{ database.name }}" SET {{ key }} = {{ value }};
{% endfor %}{% endif %}
{% endfor %}
以下参数与Pigsty目录结构相关
/var/lib/pgsql
/usr/pgsql/bin/
/pg/data
/export
/var/backups
(可选,也可以选择备份到主数据盘上)#------------------------------------------------------------------------------
# Create Directory
#------------------------------------------------------------------------------
# this assumes that
# /pg is shortcut for postgres home
# {{ pg_fs_main }} contains the main data (MUST ALREADY MOUNTED)
# {{ pg_fs_bkup }} contains archive and backup data (MUST ALREADY MOUNTED)
# cluster-version is the default parent folder for pgdata (e.g pg-test-12)
#------------------------------------------------------------------------------
# default variable:
# pg_fs_main = /export fast ssd
# pg_fs_bkup = /var/backups cheap hdd
#
# /pg -> /export/postgres/pg-test-12
# /pg/data -> /export/postgres/pg-test-12/data
#------------------------------------------------------------------------------
- name: Create postgresql directories
tags: pg_dir
become: yes
block:
- name: Make sure main and backup dir exists
file: path={{ item }} state=directory owner=root mode=0777
with_items:
- "{{ pg_fs_main }}"
- "{{ pg_fs_bkup }}"
# pg_cluster_dir: "{{ pg_fs_main }}/postgres/{{ pg_cluster }}-{{ pg_version }}"
- name: Create postgres directory structure
file: path={{ item }} state=directory owner={{ pg_dbsu }} group=postgres mode=0700
with_items:
- "{{ pg_fs_main }}/postgres"
- "{{ pg_cluster_dir }}"
- "{{ pg_cluster_dir }}/bin"
- "{{ pg_cluster_dir }}/log"
- "{{ pg_cluster_dir }}/tmp"
- "{{ pg_cluster_dir }}/conf"
- "{{ pg_cluster_dir }}/data"
- "{{ pg_cluster_dir }}/meta"
- "{{ pg_cluster_dir }}/stat"
- "{{ pg_cluster_dir }}/change"
- "{{ pg_backup_dir }}/postgres"
- "{{ pg_backup_dir }}/arcwal"
- "{{ pg_backup_dir }}/backup"
- "{{ pg_backup_dir }}/remote"
在RedHat/CentOS上,默认的Postgres发行版安装位置为
/usr/pgsql-${pg_version}/
安装剧本会自动创建指向当前安装版本的软连接,例如,如果安装了13版本的Postgres,则有:
/usr/pgsql -> /usr/pgsql-13
因此,默认的pg_bin_dir
为/usr/pgsql/bin/
,该路径会在/etc/profile.d/pgsql.sh
中添加至所有用户的PATH
环境变量中。
Pigsty假设用于部署数据库实例的单个节点上至少有一块主数据盘(pg_fs_main
),以及一块可选的备份数据盘(pg_fs_bkup
)。通常主数据盘是高性能SSD,而备份盘是大容量廉价HDD。
#------------------------------------------------------------------------------
# Create Directory
#------------------------------------------------------------------------------
# this assumes that
# /pg is shortcut for postgres home
# {{ pg_fs_main }} contains the main data (MUST ALREADY MOUNTED)
# {{ pg_fs_bkup }} contains archive and backup data (MAYBE ALREADY MOUNTED)
# {{ pg_cluster }}-{{ pg_version }} is the default parent folder
# for pgdata (e.g pg-test-12)
#------------------------------------------------------------------------------
# default variable:
# pg_fs_main = /export fast ssd
# pg_fs_bkup = /var/backups cheap hdd
#
# /pg -> /export/postgres/pg-test-12
# /pg/data -> /export/postgres/pg-test-12/data
# basic
{{ pg_fs_main }} /export # contains all business data (pg,consul,etc..)
{{ pg_dir_main }} /export/postgres # contains postgres main data
{{ pg_cluster_dir }} /export/postgres/pg-test-13 # contains cluster `pg-test` data (of version 13)
/export/postgres/pg-test-13/bin # binary scripts
/export/postgres/pg-test-13/log # misc logs
/export/postgres/pg-test-13/tmp # tmp, sql files, records
/export/postgres/pg-test-13/conf # configurations
/export/postgres/pg-test-13/data # main data directory
/export/postgres/pg-test-13/meta # identity information
/export/postgres/pg-test-13/stat # stats information
/export/postgres/pg-test-13/change # changing records
{{ pg_fs_bkup }} /var/backups # contains all backup data (pg,consul,etc..)
{{ pg_dir_bkup }} /var/backups/postgres # contains postgres backup data
{{ pg_backup_dir }} /var/backups/postgres/pg-test-13 # contains cluster `pg-test` backup (of version 13)
/var/backups/postgres/pg-test-13/backup # base backup
/var/backups/postgres/pg-test-13/arcwal # WAL archive
/var/backups/postgres/pg-test-13/remote # mount NFS/S3 remote resources here
# links
/pg -> /export/postgres/pg-test-12 # pg root link
/pg/data -> /export/postgres/pg-test-12/data # real data dir
/pg/backup -> /var/backups/postgres/pg-test-13/backup # base backup
/pg/arcwal -> /var/backups/postgres/pg-test-13/arcwal # WAL archive
/pg/remote -> /var/backups/postgres/pg-test-13/remote # mount NFS/S3 remote resources here
Pgbouncer使用Postgres用户运行,配置文件位于/etc/pgbouncer
。配置文件包括:
pgbouncer.ini
,主配置文件userlist.txt
:列出连接池中的用户pgb_hba.conf
:列出连接池用户的访问权限database.txt
:列出连接池中的数据库PostgreSQL提供了两类访问控制机制:认证(Authentication) 与 权限(Privileges)
Pigsty附带有基本的访问控制模型,足以覆盖绝大多数应用场景。
Pigsty的默认权限系统包含四个默认用户与四类默认角色,覆盖绝大多数业务场景。 您可以通过配置文件修改默认用户的名字,但默认角色的名字不建议新手用户修改。 如需修改默认角色名称,请配套修改初始化模板中的默认权限逻辑。
Pigsty带有四个默认角色:
dbrole_readonly
):只读dbrole_readwrite
):读写,继承dbrole_readonly
dbrole_admin
):执行DDL变更,继承dbrole_readwrite
dbrole_offline
):只读,用于执行慢查询/ETL/交互查询,仅允许在特定实例上访问。Pigsty带有四个默认用户:
postgres
),数据库的拥有者与创建者,与操作系统用户一致replicator
),用于主从复制的用户。dbuser_monitor
),用于监控数据库指标的用户。dbuser_admin
),执行日常管理操作与数据库变更。以下是8个默认用户与角色的相关变量
#------------------------------------------------------------------------------
# POSTGRES TEMPLATE
#------------------------------------------------------------------------------
# - system roles - #
pg_replication_username: replicator # system replication user
pg_replication_password: DBUser.Replicator # system replication password
pg_monitor_username: dbuser_monitor # system monitor user
pg_monitor_password: DBUser.Monitor # system monitor password
pg_admin_username: dbuser_admin # system admin user
pg_admin_password: DBUser.Admin # system admin password
# - default roles - #
pg_default_roles:
# roles
- username: dbrole_readonly # production read-only roles
options: NOLOGIN
comment: role for readonly access
- username: dbrole_readwrite # production read-write roles
options: NOLOGIN
comment: role for read-write access
groups: [ dbrole_readonly ] # read-write includes read-only access
- username: dbrole_offline # restricted read-only user (only allowed in specific replica)
options: NOLOGIN # for running slow queries, interactive queries, perform ETL
comment: role for restricted read-only access
- username: dbrole_admin # production admin role (for DDL change)
options: NOLOGIN BYPASSRLS # admin can bypass row level security
comment: role for object creation
groups: [dbrole_readwrite,pg_monitor,pg_signal_backend]
# users
# NOTE: replicator, monitor, admin password are overwrite by separated config entry
- username: postgres # dbsu
options: SUPERUSER LOGIN
comment: system superuser
- username: replicator # default replication user
options: REPLICATION LOGIN
groups: [pg_monitor, dbrole_readonly]
comment: system replicator
- username: dbuser_monitor # default monitor user
options: LOGIN CONNECTION LIMIT 10
comment: system monitor user
groups: [pg_monitor, dbrole_readonly]
- username: dbuser_admin # default admin user
options: LOGIN BYPASSRLS
comment: system admin user
groups: [dbrole_admin]
- username: dbuser_stats # default stats user (for ETL)
password: DBUser.Stats
options: LOGIN
comment: business read-only user for statistics
groups: [dbrole_offline]
默认用户有专用的用户名与密码配置选项,会覆盖pg_default_roles
中的选项。因此无需在pg_default_roles
中为默认用户配置密码。
出于安全考虑,不建议为DBSU配置密码,故pg_dbsu
没有专门的密码配置项。如有需要,用户可以在pg_default_roles
中为超级用户指定密码。
Pgbouncer的操作系统用户将与数据库超级用户保持一致。
Pigsty默认会使用Postgres管理用户作为Pgbouncer的管理用户,使用Postgres的监控用户同时作为Pgbouncer的监控用户。
Pgbouncer的用户权限通过/etc/pgbouncer/pgb_hba.conf
进行控制。
Pgbounce的用户列表通过/etc/pgbouncer/userlist.txt
文件进行控制。
默认情况下,角色拥有的权限如下所示
GRANT USAGE ON SCHEMAS TO dbrole_readonly
GRANT SELECT ON TABLES TO dbrole_readonly
GRANT SELECT ON SEQUENCES TO dbrole_readonly
GRANT EXECUTE ON FUNCTIONS TO dbrole_readonly
GRANT USAGE ON SCHEMAS TO dbrole_offline
GRANT SELECT ON TABLES TO dbrole_offline
GRANT SELECT ON SEQUENCES TO dbrole_offline
GRANT EXECUTE ON FUNCTIONS TO dbrole_readonly
GRANT INSERT, UPDATE, DELETE ON TABLES TO dbrole_readwrite
GRANT USAGE, UPDATE ON SEQUENCES TO dbrole_readwrite
GRANT TRUNCATE, REFERENCES, TRIGGER ON TABLES TO dbrole_admin
GRANT CREATE ON SCHEMAS TO dbrole_admin
GRANT USAGE ON TYPES TO dbrole_admin
其他业务用户默认都应当属于四种默认角色之一:只读,读写,管理员,离线访问。
所有用户都可以访问所有Schema,只读用户可以读取所有表,读写用户可以对所有表进行DML操作,管理员可以执行DDL变更操作。离线用户与只读用户类似,但只允许访问pg_role == 'offline'
或带有 pg_offline_query = true
的实例。
数据库有三种权限:CONNECT
, CREATE
, TEMP
,以及特殊的属主OWNERSHIP
。
默认情况下,如果数据库没有配置属主,那么超级用户将会作为数据库的默认OWNER
,而所有业务用户都会具有数据库的CONNECT
权限。
如果为数据库配置了属主,则数据库的owner
为该用户。在这种情况下数据库的CONNECT
权限会被回收,只有OWNER可以CONNECT
,管理员与OWNER也可以将CONNECT
权限授予其他角色。这种设置对于多租户实例尤为实用。
默认情况下,出于安全考虑,Pigsty会撤销PUBLIC
用户在数据库下CREATE
新模式的权限,同时也会撤销PUBLIC
用户在public
模式下创建新关系的权限。数据库超级用户与管理员不受此限制,他们总是可以在任何地方执行DDL变更。
我们非常不建议使用业务用户执行DDL变更,因为PostgreSQL的ALTER DEFAULT PRIVILEGE
仅针对“由特定用户创建的对象”生效,默认情况下超级用户postgres
和dbuser_admin
创建的对象拥有默认的权限配置,如果您希望授予业务用户dbrole_admin
,请在执行DDL变更时首先执行
SET ROLE dbrole_admin; -- dbrole_admin 创建的对象具有正确的默认权限
如果您希望简单的使用数据库,那么在pg_databases
中将owner
留空即可,所有新创建的业务用户都可以连接至该数据库。
如果您希望在单个实例中承载多个数据库(尽管非常不建议这样做),则可以为数据库配置特定的属主。 在这种情况下,默认只有该属主用户可以连接至该数据库,且该属主可以将连接权限授予其他用户。
pg_databases:
- {name: testdb , owner: dbuser_test} # 去掉owner
在数据库中创建对象的权限与用户是否为数据库属主无关,这只取决于创建该用户时是否为该用户赋予管理员权限。
pg_users:
- {username: test1, password: xxx , groups: [dbrole_readwrite]} # 不能创建Schema与对象
- {username: test2, password: xxx , groups: [dbrole_admin]} # 可以创建Schema与对象
HBA是Host Based Authentication的缩写,可以将其视作IP黑白名单。
在Pigsty中,所有实例的HBA都由配置文件生成而来,最终生成的HBA规则取决于实例的角色(pg_role
)
Pigsty的HBA由下列变量控制:
pg_hba_rules
: 环境统一的HBA规则pg_hba_rules_extra
: 特定于实例或集群的HBA规则pgbouncer_hba_rules
: 链接池使用的HBA规则pgbouncer_hba_rules_extra
: 特定于实例或集群的链接池HBA规则每个变量都是由下列样式的规则组成的数组:
- title: allow intranet admin password access
role: common
rules:
- host all +dbrole_admin 10.0.0.0/8 md5
- host all +dbrole_admin 172.16.0.0/12 md5
- host all +dbrole_admin 192.168.0.0/16 md5
role = common
的HBA规则组会安装到所有的实例上,而其他的取值,例如(role : primary
)则只会安装至pg_role = primary
的实例上。因此用户可以通过角色体系定义灵活的HBA规则。
作为一个特例,role: offline
的HBA规则,除了会安装至pg_role == 'offline'
的实例,也会安装至pg_offline_query == true
的实例上。
在默认配置下,主库与从库会使用以下的HBA规则:
pg_role == 'offline'
或带有pg_offline_query == true
的实例上,会添加允许dbrole_offline
分组用户访问的HBA规则。#==============================================================#
# Default HBA
#==============================================================#
# allow local su with ident"
local all postgres ident
local replication postgres ident
# allow local user password access
local all all md5
# allow local/intranet replication with password
local replication replicator md5
host replication replicator 127.0.0.1/32 md5
host all replicator 10.0.0.0/8 md5
host all replicator 172.16.0.0/12 md5
host all replicator 192.168.0.0/16 md5
host replication replicator 10.0.0.0/8 md5
host replication replicator 172.16.0.0/12 md5
host replication replicator 192.168.0.0/16 md5
# allow local role monitor with password
local all dbuser_monitor md5
host all dbuser_monitor 127.0.0.1/32 md5
#==============================================================#
# replica HBA
#==============================================================#
#==============================================================#
# special HBA for instance marked with 'pg_offline_query = true'
#==============================================================#
# allow offline query (ETL,SAGA,Interactive) on offline instance
host all +dbrole_offline 10.0.0.0/8 md5
host all +dbrole_offline 172.16.0.0/12 md5
host all +dbrole_offline 192.168.0.0/16 md5
#==============================================================#
# Common HBA
#==============================================================#
# allow meta node password access
host all all 10.10.10.10/32 md5
# allow intranet admin password access
host all +dbrole_admin 10.0.0.0/8 md5
host all +dbrole_admin 172.16.0.0/12 md5
host all +dbrole_admin 192.168.0.0/16 md5
# allow intranet password access
host all all 10.0.0.0/8 md5
host all all 172.16.0.0/12 md5
host all all 192.168.0.0/16 md5
# allow local read/write (local production user via pgbouncer)
local all +dbrole_readonly md5
host all +dbrole_readonly 127.0.0.1/32 md5
#==============================================================#
# Extra HBA
#==============================================================#
# add extra hba rules here
#==============================================================#
# Ad Hoc HBA
#==============================================================#
# manual maintained hba rules