Kernel Optimize
Pigsty parameter tuning for the OS kernel
Pigsty使用tuned
调整操作系统配置,tuned
是CentOS7自带的调参工具。
Pigsty Tuned配置
Pigsty默认会为操作系统安装四种tuned profile
:
tuned-adm profile oltp # 启用OLTP模式
tuned-adm profile olap # 启用OLAP模式
tuned-adm profile crit # 启用CRIT模式
tuned-adm profile tiny # 启用TINY模式
Tuned基本操作
# 如需启动 tuned,请以 root 身份运行下列指令:
systemctl start tuned
# 若要在每次计算机启动时激活 tuned,请输入以下指令:
systemctl enable tuned
# 其它的 tuned 控制,例如配置文件选择等,请使用:
tuned-adm
# 若要查看可用的已安装配置文件,此命令需要 tuned 服务正在运行。
tuned-adm list
# 若要查看目前已激活的配置文件,请运行:
tuned-adm active
# 若要选择或激活某一配置文件,请运行:
tuned-adm profile profile
# 例如
tuned-adm profile powersave
# 若要让 tuned 推荐最适合您的系统的配置文件,同时不改变任何现有的配置文件,也不使用安装期间使用过的逻辑,请运行以下指令:
tuned-adm recommend
# 要禁用所有微调:
tuned-adm off
要列出所有可用配置文件并识别目前激活的配置文件,请运行:
tuned-adm list
要只显示当前激活的配置文件请运行:
tuned-adm active
要切换到某个可用的配置文件请运行:
tuned-adm profile profile_name
例如:
tuned-adm profile server-powersave
OLTP配置
# tuned configuration
#==============================================================#
# File : tuned.conf
# Mtime : 2020-06-29
# Desc : Tune operatiing system to oltp mode
# Path : /etc/tuned/oltp/tuned.conf
# Author : Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#
[main]
summary=Optimize for PostgreSQL OLTP System
include=network-latency
[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100
[vm]
# disable transparent hugepages
transparent_hugepages=never
[sysctl]
#-------------------------------------------------------------#
# KERNEL #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0
# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}
# total shmem size in pages: $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}
# total shmem segs 4096 -> 8192
kernel.shmmni=8192
# total msg queue number, set to mem size in MB
kernel.msgmni=32768
# max length of message queue
kernel.msgmnb=65536
# max size of message
kernel.msgmax=65536
kernel.pid_max=131072
# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536
# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0
# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000
#-------------------------------------------------------------#
# VM #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0
# disable when most mem are for file cache
vm.zone_reclaim_mode=0
# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80
# vm.dirty_background_bytes=67108864 # 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_ratio=3 # latency-performance default
vm.dirty_ratio=10 # latency-performance default
# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536
#-------------------------------------------------------------#
# Filesystem #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160
# max concurrent unfinished async io, should be larger than 1M. 65536->1M
fs.aio-max-nr=1048576
#-------------------------------------------------------------#
# Network #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304
# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1
# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"
# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60
net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1
# max connection tracking number
net.netfilter.nf_conntrack_max=1048576
OLAP配置
# tuned configuration
#==============================================================#
# File : tuned.conf
# Mtime : 2020-09-18
# Desc : Tune operatiing system to olap mode
# Path : /etc/tuned/olap/tuned.conf
# Author : Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#
[main]
summary=Optimize for PostgreSQL OLAP System
include=network-throughput
[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100
[vm]
# disable transparent hugepages
transparent_hugepages=never
[sysctl]
#-------------------------------------------------------------#
# KERNEL #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0
# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}
# total shmem size in pages: $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}
# total shmem segs 4096 -> 8192
kernel.shmmni=8192
# total msg queue number, set to mem size in MB
kernel.msgmni=32768
# max length of message queue
kernel.msgmnb=65536
# max size of message
kernel.msgmax=65536
kernel.pid_max=131072
# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536
# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0
# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000
#-------------------------------------------------------------#
# VM #
#-------------------------------------------------------------#
# try not using swap
# vm.swappiness=10
# disable when most mem are for file cache
vm.zone_reclaim_mode=0
# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=80
vm.dirty_background_ratio = 10 # throughput-performance default
vm.dirty_ratio=80 # throughput-performance default 40 -> 80
# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536
#-------------------------------------------------------------#
# Filesystem #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160
# max concurrent unfinished async io, should be larger than 1M. 65536->1M
fs.aio-max-nr=1048576
#-------------------------------------------------------------#
# Network #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304
# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1
# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"
# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60
net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1
# max connection tracking number
net.netfilter.nf_conntrack_max=1048576
CRIT配置
# tuned configuration
#==============================================================#
# File : tuned.conf
# Mtime : 2020-06-29
# Desc : Tune operatiing system to crit mode
# Path : /etc/tuned/crit/tuned.conf
# Author : Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#
[main]
summary=Optimize for PostgreSQL CRIT System
include=network-latency
[cpu]
force_latency=1
governor=performance
energy_perf_bias=performance
min_perf_pct=100
[vm]
# disable transparent hugepages
transparent_hugepages=never
[sysctl]
#-------------------------------------------------------------#
# KERNEL #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0
# total shmem size in bytes: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
{% if param_shmall is defined and param_shmall != '' %}
kernel.shmall = {{ param_shmall }}
{% endif %}
# total shmem size in pages: $(expr $(getconf _PHYS_PAGES) / 2)
{% if param_shmmax is defined and param_shmmax != '' %}
kernel.shmmax = {{ param_shmmax }}
{% endif %}
# total shmem segs 4096 -> 8192
kernel.shmmni=8192
# total msg queue number, set to mem size in MB
kernel.msgmni=32768
# max length of message queue
kernel.msgmnb=65536
# max size of message
kernel.msgmax=65536
kernel.pid_max=131072
# max(Sem in Set)=2048, max(Sem)=max(Sem in Set) x max(SemSet) , max(Sem per Ops)=2048, max(SemSet)=65536
kernel.sem=2048 134217728 2048 65536
# do not sched postgres process in group
kernel.sched_autogroup_enabled = 0
# total time the scheduler will consider a migrated process cache hot and, thus, less likely to be remigrated
# defaut = 0.5ms (500000ns), update to 5ms , depending on your typical query (e.g < 1ms)
kernel.sched_migration_cost_ns=5000000
#-------------------------------------------------------------#
# VM #
#-------------------------------------------------------------#
# try not using swap
vm.swappiness=0
# disable when most mem are for file cache
vm.zone_reclaim_mode=0
# overcommit threshhold = 80%
vm.overcommit_memory=2
vm.overcommit_ratio=100
# 64MB mem (2xRAID cache) wake the bgwriter
vm.dirty_background_bytes=67108864
# vm.dirty_background_ratio=3 # latency-performance default
vm.dirty_ratio=6 # latency-performance default
# deny access on 0x00000 - 0x10000
vm.mmap_min_addr=65536
#-------------------------------------------------------------#
# Filesystem #
#-------------------------------------------------------------#
# max open files: 382589 -> 167772160
fs.file-max=167772160
# max concurrent unfinished async io, should be larger than 1M. 65536->1M
fs.aio-max-nr=1048576
#-------------------------------------------------------------#
# Network #
#-------------------------------------------------------------#
# max connection in listen queue (triggers retrans if full)
net.core.somaxconn=65535
net.core.netdev_max_backlog=8192
# tcp receive/transmit buffer default = 256KiB
net.core.rmem_default=262144
net.core.wmem_default=262144
# receive/transmit buffer limit = 4MiB
net.core.rmem_max=4194304
net.core.wmem_max=4194304
# ip options
net.ipv4.ip_forward=1
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_local_port_range=32768 65000
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1
# tcp read/write buffer
net.ipv4.tcp_rmem="4096 87380 16777216"
net.ipv4.tcp_wmem="4096 16384 16777216"
net.ipv4.udp_mem="3145728 4194304 16777216"
# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60
net.ipv4.tcp_fin_timeout=5
net.ipv4.tcp_max_tw_buckets=262144
net.ipv4.tcp_max_syn_backlog=8192
net.ipv4.neigh.default.gc_thresh1=80000
net.ipv4.neigh.default.gc_thresh2=90000
net.ipv4.neigh.default.gc_thresh3=100000
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-arptables=1
# max connection tracking number
net.netfilter.nf_conntrack_max=1048576
TINY配置
# tuned configuration
#==============================================================#
# File : tuned.conf
# Mtime : 2020-06-29
# Desc : Tune operatiing system to tiny mode
# Path : /etc/tuned/tiny/tuned.conf
# Author : Vonng(fengruohang@outlook.com)
# Copyright (C) 2019-2020 Ruohang Feng
#==============================================================#
[main]
summary=Optimize for PostgreSQL TINY System
# include=virtual-guest
[vm]
# disable transparent hugepages
transparent_hugepages=never
[sysctl]
#-------------------------------------------------------------#
# KERNEL #
#-------------------------------------------------------------#
# disable numa balancing
kernel.numa_balancing=0
# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up. Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40
# Filesystem I/O is usually much more efficient than swapping, so try to keep
# swapping low. It's usually safe to go even lower than this on systems with
# server-grade storage.
vm.swappiness = 30
#-------------------------------------------------------------#
# Network #
#-------------------------------------------------------------#
# tcp options
net.ipv4.tcp_timestamps=1
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_tw_recycle=0
net.ipv4.tcp_syncookies=0
net.ipv4.tcp_synack_retries=1
net.ipv4.tcp_syn_retries=1
# tcp probe fail interval: 75s -> 20s
net.ipv4.tcp_keepalive_intvl=20
# tcp break after 3 * 20s = 1m
net.ipv4.tcp_keepalive_probes=3
# probe peroid = 1 min
net.ipv4.tcp_keepalive_time=60
数据库内核调优参考
# Database kernel optimisation
fs.aio-max-nr = 1048576 # 限制并发未完成的异步请求数目,,不应小于1M
fs.file-max = 16777216 # 最大打开16M个文件
# kernel
kernel.shmmax = 485058 # 共享内存最大页面数量: $(expr $(getconf _PHYS_PAGES) / 2)
kernel.shmall = 1986797568 # 共享内存总大小: $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE))
kernel.shmmni = 16384 # 系统范围内共享内存段的最大数量 4096 -> 16384
kernel.msgmni = 32768 # 系统的消息队列数目,影响可以启动的代理程序数 设为内存MB数
kernel.msgmnb = 65536 # 影响队列的大小
kernel.msgmax = 65536 # 影响队列中可以发送的消息的大小
kernel.numa_balancing = 0 # Numa禁用
kernel.sched_migration_cost_ns = 5000000 # 5ms内,调度认为进程还是Hot的。
kernel.sem = 2048 134217728 2048 65536 # 每个信号集最大信号量2048,系统总共可用信号量134217728,单次最大操作2048,信号集总数65536
# vm
vm.dirty_ratio = 80 # 绝对限制,超过80%阻塞写请求刷盘
vm.dirty_background_bytes = 268435456 # 256MB脏数据唤醒刷盘进程
vm.dirty_expire_centisecs = 6000 # 1分钟前的数据被认为需要刷盘
vm.dirty_writeback_centisecs= 500 # 刷新进程运行间隔5秒
vm.mmap_min_addr = 65536 # 禁止访问0x10000下的内存
vm.zone_reclaim_mode = 0 # Numa禁用
# vm swap
vm.swappiness = 0 # 禁用SWAP,但高水位仍会有
vm.overcommit_memory = 2 # 允许一定程度的Overcommit
vm.overcommit_ratio = 50 # 允许的Overcommit:$((($mem - $swap) * 100 / $mem))
# tcp memory
net.ipv4.tcp_rmem = 8192 65536 16777216 # tcp读buffer: 32M/256M/16G
net.ipv4.tcp_wmem = 8192 65536 16777216 # tcp写buffer: 32M/256M/16G
net.ipv4.tcp_mem = 131072 262144 16777216 # tcp 内存使用 512M/1G/16G
net.core.rmem_default = 262144 # 接受缓冲区默认大小: 256K
net.core.rmem_max = 4194304 # 接受缓冲区最大大小: 4M
net.core.wmem_default = 262144 # 发送缓冲区默认大小: 256K
net.core.wmem_max = 4194304 # 发送缓冲区最大大小: 4M
# tcp keepalive
net.ipv4.tcp_keepalive_intvl = 20 # 探测没有确认时,重新发送探测的频度。默认75s -> 20s
net.ipv4.tcp_keepalive_probes = 3 # 3 * 20 = 1分钟超时断开
net.ipv4.tcp_keepalive_time = 60 # 探活周期1分钟
# tcp port resure
net.ipv4.tcp_tw_reuse = 1 # 允许将TIME_WAIT socket用于新的TCP连接。默认为0
net.ipv4.tcp_tw_recycle = 0 # 快速回收,已弃用
net.ipv4.tcp_fin_timeout = 5 # 保持在FIN-WAIT-2状态的秒时间
net.ipv4.tcp_timestamps = 1
# tcp anti-flood
net.ipv4.tcp_syncookies = 1 # SYN_RECV队列满后发cookie,防止恶意攻击
net.ipv4.tcp_synack_retries = 1 # 收到不完整sync后的重试次数 5->2
net.ipv4.tcp_syn_retries = 1 #表示在内核放弃建立连接之前发送SYN包的数量。
# tcp load-balancer
net.ipv4.ip_forward = 1 # IP转发
net.ipv4.ip_nonlocal_bind = 1 # 绑定非本机地址
net.netfilter.nf_conntrack_max = 1048576 # 最大跟踪连接数
net.ipv4.ip_local_port_range = 10000 65535 # 端口范围
net.ipv4.tcp_max_tw_buckets = 262144 # 256k TIME_WAIT
net.core.somaxconn = 65535 # 限制LISTEN队列最大数据包量,触发重传机制。
net.ipv4.tcp_max_syn_backlog = 8192 # SYN队列大小:1024->8192
net.core.netdev_max_backlog = 8192 # 网卡收包快于内核时,允许队列长度
Last modified 2021-03-28: update en docs (f994b54)