1、配置清单及网络规划
配置清单
OS: centos7.9
kubernetes: 1.29.2
Container Runtime:Containerd 1.7.11
CRI: runc 1.10
CNI: cni-plugin 1.4
集群规划
IP | hostname | 配置 |
---|---|---|
172.16.20.191 | master01 | 4C8G |
172.16.20.192 | master02 | 4C8G |
172.16.20.193 | master03 | 4C8G |
172.16.20.194 | node01 | 4C8G |
172.16.20.195 | node02 | 4C8G |
172.16.20.196 | node03 | 4C8G |
集群网络规划
Pod 网络: 10.244.0.0/16
Service 网络: 10.96.0.0/12
Node 网络: 192.168.254.0/24
2、环境初始化
ssh-keygen
ssh-copy-id -i /root/.ssh/id_rsa.pub root@172.16.20.191
。。。
ssh-copy-id -i /root/.ssh/id_rsa.pub root@172.16.20.196
#安装ansible
yum install -y ansible
配置文件:vim /etc/ansible/hosts
[master]
172.16.20.191
172.16.20.192
172.16.20.193
[node]
172.16.20.194
172.16.20.195
172.16.20.196
[all]
172.16.20.191
172.16.20.192
172.16.20.193
172.16.20.194
172.16.20.195
172.16.20.196
# 将节点加入 hosts
ansible all -m shell -a 'cat << EOF >> /etc/hosts
172.16.20.191 master01
172.16.20.192 master02
172.16.20.193 master03
172.16.20.194 node01
172.16.20.195 node02
172.16.20.196 node03
EOF'
3、配置高可用ApiServer
1、配置nginx-stream
yum -y install nginx
systemctl status nginx
# 修改 nginx 配置文件
cat /etc/nginx/nginx.conf
user user;
worker_processes auto;
pid /run/nginx.pid;
include /etc/nginx/modules-enabled/*.conf;
events {
worker_connections 768;
# multi_accept on;
}
#添加了stream 这一段,其他的保持默认即可
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 172.16.20.191:6443; #master01的IP和6443端口
server 172.16.20.192:6443; #master02的IP和6443端口
server 172.16.20.193:6443; #master03的IP和6443端口
}
server {
listen 16443; #监听的是16443端口,因为nginx和master复用机器,所以不能是6443端口
proxy_pass k8s-apiserver; #使用proxy_pass模块进行反向代理
}
}
......
# 重启 nginx 服务
systemctl restart nginx && systemctl enable nginx && systemctl status nginx
#注意:如果出现以下报错说明nginx无stream模块 可执行:yum -y install nginx-mod-stream 解决【当然编译安装也可以~】
更多报错解决方式可以参考:https://blog.csdn.net/tky_1314/article/details/128655565
nginx: [emerg] unknown directive "stream" in /etc/nginx/nginx.conf:16
nginx: [emerg] dlopen() "/usr/share/nginx/modules/ngx_stream_module.so" failed (/usr/share/nginx/modules/ngx_stream_module.so: cannot open shared object file: No such file or directory) in /etc/nginx/nginx.conf:11
nginx: configuration file /etc/nginx/nginx.conf test failed
# 端口检查
# netstat -lntup| grep 16443
nc -l -p 16443
#nc: Address already in use
2、配置Keepalived
注:所有master节点均需要操作
yum install keepalived -y
# 写入 nginx 检查脚本
cat << EOF > /etc/keepalived/nginx_check.sh
#!/bin/bash
#1、判断Nginx是否存活
counter=`ps -C nginx --no-header | wc -l`
if [ $counter -eq 0 ]; then
#2、如果不存活则尝试启动Nginx
systemctl start nginx
sleep 2
#3、等待2秒后再次获取一次Nginx状态
counter=`ps -C nginx --no-header | wc -l`
#4、再次进行判断,如Nginx还不存活则停止Keepalived,让地址进行漂移
if [ $counter -eq 0 ]; then
killall keepalived
fi
fi
EOF
chmod +x /etc/keepalived/nginx_check.sh
master-1 Keepalived配置
cat << EOF > /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_nginx {
script "/etc/keepalived/nginx_check.sh" ## 检测 nginx 状态的脚本路径
interval 2 ## 检测时间间隔
weight -20 ## 如果条件成立,权重-20
}
vrrp_instance VI_1 {
state MASTER ##主节点为 MASTER,备份节点为 BACKUP
interface eth1 ##绑定 VIP 的网络接口,与本机IP地址所在网络接口相同
virtual_router_id 100 ##虚拟路由id,主从节点必须保持一致
priority 100 ##节点优先级,直范围0-254,MASTER 要比 BACKUP 高
advert_int 1
authentication { ##设置验证信息,两个节点必须一致
auth_type break
auth_pass 123456
}
track_script {
chk_nginx ##执行 Nginx 监控
}
virtual_ipaddress {
172.16.20.199 ##VIP,两个节点必须设置一样(可设置多个)
}
}
EOF
systemctl restart keepalived && systemctl enable keepalived.service
master02 、master03 修改以上配置state 为backup、priority 80
4、安装k8s
# 安装 Containerd
#wget -c https://github.com/containerd/containerd/releases/download/v1.7.11/containerd-1.7.11-linux-amd64.tar.gz
tar -xzvf containerd-1.7.11-linux-amd64.tar.gz
#解压出来一个bin目录,containerd可执行文件都在bin目录里面
mv bin/* /usr/local/bin/
rm -rf bin
cat << EOF > /usr/lib/systemd/system/containerd.service
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target
[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd
Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
# Comment TasksMax if your systemd version does not supports it.
# Only systemd 226 and above support this version.
TasksMax=infinity
OOMScoreAdjust=-999
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload && systemctl enable --now containerd
#systemctl status containerd
# 安装 runc
#runc是容器运行时,runc实现了容器的init,run,create,ps...我们在运行容器所需要的cmd:
#curl -LO https://github.com/opencontainers/runc/releases/download/v1.1.10/runc.amd64 && \
install -m 755 runc.amd64 /usr/local/sbin/runc
# 安装 CNI plugins
#wget -c https://github.com/containernetworking/plugins/releases/download/v1.4.0/cni-plugins-linux-amd64-v1.4.0.tgz
#根据官网的安装步骤来,创建一个目录用于存放cni插件
mkdir -p /opt/cni/bin
tar -xzvf cni-plugins-linux-amd64-v1.4.0.tgz -C /opt/cni/bin/
# 修改 Containd 配置
#修改containerd的配置,因为containerd默认从k8s官网拉取镜像
#创建一个目录用于存放containerd的配置文件
mkdir -p /etc/containerd
#把containerd配置导出到文件
containerd config default | sudo tee /etc/containerd/config.toml
# 修改沙箱镜像
sed -i 's#sandbox_image = "registry.k8s.io/pause:.*"#sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.9"#' /etc/containerd/config.toml
# 修改 cgroup 为 systemd
sed -i 's#SystemdCgroup = false#SystemdCgroup = true#' /etc/containerd/config.toml
# 配置镜像加速
sed -i 's#config_path = ""#config_path = "/etc/containerd/certs.d"#' /etc/containerd/config.toml
# 配置 Containerd 镜像源
# docker hub镜像加速
mkdir -p /etc/containerd/certs.d/docker.io
cat > /etc/containerd/certs.d/docker.io/hosts.toml << EOF
server = "https://docker.io"
[host."https://dockerproxy.com"]
capabilities = ["pull", "resolve"]
[host."https://docker.m.daocloud.io"]
capabilities = ["pull", "resolve"]
[host."https://reg-mirror.qiniu.com"]
capabilities = ["pull", "resolve"]
[host."https://registry.docker-cn.com"]
capabilities = ["pull", "resolve"]
[host."http://hub-mirror.c.163.com"]
capabilities = ["pull", "resolve"]
EOF
# k8s.gcr.io镜像加速
mkdir -p /etc/containerd/certs.d/k8s.gcr.io
tee /etc/containerd/certs.d/k8s.gcr.io/hosts.toml << 'EOF'
server = "https://k8s.gcr.io"
[host."https://k8s-gcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
EOF
#重启containerd
systemctl restart containerd
#systemctl status containerd
# 安装 kubeadm、kubelet、kubectl
# 此操作会覆盖 /etc/yum.repos.d/kubernetes.repo 中现存的所有配置
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/
enabled=1
gpgcheck=1
gpgkey=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key
exclude=kubelet kubeadm kubectl cri-tools kubernetes-cni
EOF
sudo yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
sudo systemctl enable --now kubelet
# 配置 crictl socket
crictl config runtime-endpoint unix:///run/containerd/containerd.sock
crictl config image-endpoint unix:///run/containerd/containerd.sock
部署ETCD
etcd集群在master01、master02、master03三个服务器上进行部署
yum install -y etcd
etcd1=172.16.20.191
etcd2=172.16.20.192
etcd3=172.16.20.193
$ wget https://github.com/etcd-io/etcd/releases/download/v3.4.27/etcd-v3.4.27-linux-amd64.tar.gz
$ tar xvf etcd-v3.4.27-linux-amd64.tar.gz
$ mv etcd-v3.4.27-linux-amd64/etcd* /usr/local/bin/ ##复制etcd命令文件
$ mkdir -p /var/lib/etcd/ ##创建数据存放目录
$ mkdir -p /etc/etcd ##创建配置文件存放目录
$ chmod 700 /var/lib/etcd/
etcd.conf配置如下
[root@master01 ~]# ansible master -m shell -a 'cat /etc/etcd/etcd.conf'
172.16.20.191 | CHANGED | rc=0 >>
ETCD_NAME=etcd-1
ETCD_DATA_DIR="/var/lib/etcd"
ETCD_LISTEN_PEER_URLS="http://172.16.20.191:2380"
ETCD_LISTEN_CLIENT_URLS="http://127.0.0.1:2379,http://172.16.20.191:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.20.191:2380"
ETCD_INITIAL_CLUSTER="etcd-1=http://172.16.20.191:2380,etcd-2=http://172.16.20.192:2380,etcd-3=http://172.16.20.193:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="Qaz767791."
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.20.191:2379"
172.16.20.192 | CHANGED | rc=0 >>
ETCD_NAME=etcd-2
ETCD_DATA_DIR="/var/lib/etcd"
ETCD_LISTEN_PEER_URLS="http://172.16.20.192:2380"
ETCD_LISTEN_CLIENT_URLS="http://127.0.0.1:2379,http://172.16.20.192:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.20.192:2380"
ETCD_INITIAL_CLUSTER="etcd-1=http://172.16.20.191:2380,etcd-2=http://172.16.20.192:2380,etcd-3=http://172.16.20.193:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="Qaz767791."
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.20.192:2379"
172.16.20.193 | CHANGED | rc=0 >>
ETCD_NAME=etcd-3
ETCD_DATA_DIR="/var/lib/etcd"
ETCD_LISTEN_PEER_URLS="http://172.16.20.193:2380"
ETCD_LISTEN_CLIENT_URLS="http://127.0.0.1:2379,http://172.16.20.193:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.20.193:2380"
ETCD_INITIAL_CLUSTER="etcd-1=http://172.16.20.191:2380,etcd-2=http://172.16.20.192:2380,etcd-3=http://172.16.20.193:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="Qaz767791."
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.20.193:2379"
$ cat /usr/lib/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
User=root
Type=notify
EnvironmentFile=-/etc/etcd/etcd.conf
ExecStart=/usr/local/bin/etcd
Restart=on-failure
RestartSec=10s
LimitNOFILE=40000
[Install]
WantedBy=multi-user.target
启动etcd
systemctl enable etcd --now
echo "验证etcd集群"
etcdctl member list
查看集群leader
etcdctl -w table endpoint status --endpoints=172.16.20.191:2379,172.16.20.192:2379,172.16.20.193:2379
查看集群成员健康状态
etcdctl -w table endpoint health --endpoints=172.16.20.191:2379,172.16.20.192:2379,172.16.20.193:2379
移除异常节点
etcdctl member remove da811274ead3f06a
增加节点
etcdctl member add etcd-4 --peer-urls=http://172.16.20.192:2380
初始化master01
暴露环境变量
export K8S_VERSION=1.29.1 # k8s 集群版本
export POD_CIDR=10.244.0.0/16 # pod 网段
export SERVICE_CIDR=10.96.0.0/12 # service 网段
export APISERVER_MASTER01=172.16.20.191 # master01 ip
export APISERVER_HA=172.16.20.199 # 集群 vip 地址
export APISERVER_HA_PORT=16443 # 集群 vip 地址
# 命令行方式初始化, 后面需要手动更改 kube-proxy 为 ipvs 模式
# kubeadm init --apiserver-advertise-address=$APISERVER_MASTER01 --apiserver-bind-port=6443 --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.29.1 --service-cidr=$SERVICE_CIDR --pod-network-cidr=$POD_CIDR --upload-certs
如果出现:[ERROR FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables does not exist
这种情况是因为之前配置的br_netfilter没有启动,运行一下命令即可,则确认是否驱动加载完成
#驱动加载
modprobe br_netfilter
bridge
同时在/etc/sysctl.conf中添加:
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
最后执行sysctl -p 时刷新
sysctl -p
# 命令行方式初始化, 后面需要手动更改 kube-proxy 为 ipvs 模式
# kubeadm init --apiserver-advertise-address=$APISERVER_MASTER01 --apiserver-bind-port=6443 --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.29.1 --service-cidr=$SERVICE_CIDR --pod-network-cidr=$POD_CIDR --upload-certs
# kubeadm config print init-defaults >Kubernetes-cluster.yaml # kubeadm 默认配置
cat << EOF > Kubernetes-cluster.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
# 将此处IP地址替换为主节点IP ETCD容器会试图通过此地址绑定端口 如果主机不存在则会失败
advertiseAddress: $APISERVER_MASTER01
bindPort: 6443
nodeRegistration:
criSocket: unix:///run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
name: $name # 节点 hostname
taints: null
---
# controlPlaneEndpoint 可配置高可用的 ApiServer
apiServer:
timeoutForControlPlane: 4m0s
certSANs: # 主节点IP
- $APISERVER_HA
- $APISERVER_MASTER01
apiVersion: kubeadm.k8s.io/v1beta3
controlPlaneEndpoint: "$APISERVER_HA:$APISERVER_HA_PORT"
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd: # 可使用外接 etcd 集群
local:
dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers # 国内源
kind: ClusterConfiguration
kubernetesVersion: $K8S_VERSION
networking:
dnsDomain: cluster.local
# 增加配置 指定pod网段
podSubnet: $POD_CIDR
serviceSubnet: $SERVICE_CIDR
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs # kubeproxy 使用 ipvs
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
EOF
kubeadm init --config Kubernetes-cluster.yaml --upload-certs
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 安装 calico
sed -i 's#cidr.*#cidr: '$POD_CIDR'#' custom-resources.yaml
kubectl create -f tigera-operator.yaml
kubectl create -f custom-resources.yaml
wget https://docs.projectcalico.org/v3.23/manifests/calico.yaml
sed -i 's#docker.io/##g' calico.yaml
kubectl apply -f calico.yaml
最后等待calico初始化各个节点网络,安装集群初始化后的信息将其他节点加入集群就好了~
评论区