63、Prometheus-独立部署的Prometheus监控K8S集群

发布时间 2023-04-18 19:49:03作者: 小粉优化大师

Kubernetes学习目录

1、简介

1.1、原因

这里我们以prometheus的配置解析如获取各各所需的文件和相关的原理问题,不会细写通过标签如果去获取数据的规则,先把获取K8S的数据链路打通,有助于后面的深入。

研究四五天,网上搜了,获取相关token和ca.crt文件这块都是忽略了事,踏了不少坑。

1.2、Prometheus版本

prometheus]# prometheus --version
prometheus, version 2.43.0 (branch: HEAD, revision: edfc3bcd025dd6fe296c167a14a216cab1e552ee)
  build user:       root@8a0ee342e522
  build date:       20230321-12:56:07
  go version:       go1.19.7
  platform:         linux/amd64
  tags:             netgo,builtinassets

2、Promtheus配置

2.1、复制ca.crt到prometheus server linux系统授信此CA

2.1.1、不复制授信CA报错

Apr 18 09:56:18 prometheus-server prometheus: ts=2023-04-18T01:56:18.116Z caller=klog.go:116 level=error component=k8s_client_runtime func=ErrorDepth msg="pkg/mod/k8s.io/client-go@v0.26.2/tools/cache/reflector.go:169: Failed to watch *v1.Pod: failed to list *v1.Pod: Get \"https://192.168.10.26:6443/api/v1/pods?limit=500&resourceVersion=0\": x509: certificate signed by unknown authority"

2.1.2、如何获取ca.crt

# 在apiserver操作
[root@master1 ~]# ll /etc/kubernetes/pki/ca.* -rw-r--r-- 1 root root 1099 Apr 14 16:52 /etc/kubernetes/pki/ca.crt -rw------- 1 root root 1675 Apr 14 16:52 /etc/kubernetes/pki/ca.key -rw-r--r-- 1 root root 17 Apr 16 23:58 /etc/kubernetes/pki/ca.srl

2.1.3、复制为linux系统授信

# 复制好,无需做其它操作
[root@master1 ~]# scp /etc/kubernetes/pki/ca.crt root@192.168.10.34:/etc/ssl/certs/k8s.crt

2.2、监控apiserver的示例解析-prometheus.yml

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:

- job_name: kubernetes-apiservers
  kubernetes_sd_configs:
  - role: endpoints
    api_server: https://192.168.10.26:6443
# 下面部分没有配置的话,会报使用匿名用户获取,没有RBAC权限的问题 bearer_token_file: /data/server/prometheus/etc/token tls_config: insecure_skip_verify: true
relabel_configs: - action: keep regex: default;kubernetes;https source_labels: - __meta_kubernetes_namespace - __meta_kubernetes_service_name - __meta_kubernetes_endpoint_port_name scheme: https
# 这部分没有配置的话,会报:server returned HTTP status 403 Forbidden bearer_token_file:
/data/server/prometheus/etc/token tls_config: insecure_skip_verify: true

# 这里监控为apiserver为例,切记标红新版本的需要,不然会引起失败,

2.3、serviceAccount与token关联

2.3.1、查询serviceAccount信息

master1 ~]# kubectl get sa my-prom-prometheus-server 
NAME                        SECRETS   AGE
my-prom-prometheus-server   0         21h

2.3.2、创建secret并且与sa关联

kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: prometheus
  annotations:
    kubernetes.io/service-account.name: my-prom-prometheus-server
type: kubernetes.io/service-account-token
EOF

2.3.3、查询sa关联的信息

[root@master1 ~]# kubectl describe sa my-prom-prometheus-server 
Name:                my-prom-prometheus-server
Namespace:           default
Labels:              app=prometheus
                     app.kubernetes.io/managed-by=Helm
                     chart=prometheus-20.2.0
                     component=server
                     heritage=Helm
                     release=my-prom
Annotations:         meta.helm.sh/release-name: my-prom
                     meta.helm.sh/release-namespace: default
Image pull secrets:  <none>
Mountable secrets:   <none>
Tokens:              prometheus
Events:              <none>

2.3.4、获取token值

[root@master1 ~]# kubectl get secrets prometheus -o go-template --template='{{.data.token}}' | base64 -d > /tmp/token
[root@master1 ~]# cat /tmp/token 
eyJhbGciOiJSUzI1NiIsImtpZCI6ImxlSDFNY1UtX2hWUzlkZVU5a3BEdGhXeFBqbmdLcXBrS3QydWJxOHdGQzgifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJkZWZhdWx0Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZWNyZXQubmFtZSI6InByb21ldGhldXMiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlcnZpY2UtYWNjb3VudC5uYW1lIjoibXktcHJvbS1wcm9tZXRoZXVzLXNlcnZlciIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50LnVpZCI6ImY1MGIxOGNmLTYzNTYtNDNiNy1hYWIzLTA5YTJlMWI0ODVmOCIsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDpkZWZhdWx0Om15LXByb20tcHJvbWV0aGV1cy1zZXJ2ZXIifQ.hKV62eieWNogKDurt96e0MKVQEcmdWKuvBBEurgxB097-eRXIxLIExY-qkMjHsxkjsyFdmbw0iXU-I6UR6ScwoUIEDEF93yzlkObV5b4go4pMwvuBTbniK3onoe6AZuAuU-SJY6mxwf2oDFGPhXiTNq7RRKD-cD5romGNHB-gczJlbUuzagUdl0ovDyQfhAdDST1ce4aYuE5OQlaaP67tn6wSXwSb3gT8uqQGCw_hHUjWS6CSzTGlOejTj3FAewjo0P-lNFfILOoGj2UlCwmKvI3rZD-9KAO-EJQOUA6FjJU1803QKdcqaN1r3eELUy7S3msMO_qmpYIGWtXbb0o9Q

2.3.5、将token复制到prometheus目录

scp /tmp/token root@192.168.10.34:/data/server/prometheus/etc/

2.3.6、在prometheus主机上测试toke是否可以使用

token=$(cat /data/server/prometheus/etc/token)
curl https://192.168.10.26:6443/metrics -H "Authorization: Bearer ${token}"  -k

2.4、重启prometheus服务并且查看效果

2.4.1、重启prometheus服务

systemctl restart prometheus

2.4.2、查看监控效果

说明监控K8S链路已经打通

3、Prometheus RBAC流程分析

3.1、创建serviceAccount【用户帐号】

cat > serviceaccount.yaml <<'EOF'
apiVersion: v1
kind: ServiceAccount
metadata:
  name: my-prom-prometheus-server
  namespace: default
EOF

3.2、创建ClusterRole【集群角色和权限】

cat > clusterrole.yaml <<'EOF'
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: release-name-prometheus-server
rules:
  - apiGroups:
      - ""
    resources:
      - nodes
      - nodes/proxy
- pods/proxy
- nodes/metrics - services - endpoints - pods - ingresses - configmaps verbs: - get - list - watch - apiGroups: - "extensions" - "networking.k8s.io" resources: - ingresses/status - ingresses verbs: - get - list - watch - nonResourceURLs: - "/metrics" verbs: - get EOF

3.3、创建ClusterRoleBinding【ServiceAccount与ClusterRole绑定关系】

cat > clusterrolebinding.yaml <<'EOF'
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: release-name-prometheus-server
subjects:
  - kind: ServiceAccount
    name: my-prom-prometheus-server
    namespace: default
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: release-name-prometheus-server
EOF

3.4、ServiceAccount与token关联【认证阶段】

kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
metadata:
  name: prometheus
  annotations:
    kubernetes.io/service-account.name: my-prom-prometheus-server
type: kubernetes.io/service-account-token
EOF

3.5、用户整个流程总结

用户/接口 -> token+sa -> 认证阶段 -> 鉴权阶段 ->ClusterRoleBinding【查询sa与clusterRole关联信息】 -> clusterRole【查询是否有权限,有权限就通过,没有的话,返回403】

4、增加监控K8S-node节点-示例

4.1、配置prometheus

- job_name: kubernetes-nodes
  kubernetes_sd_configs:
  - role: node
    api_server: https://192.168.10.26:6443
    bearer_token_file: /data/server/prometheus/etc/token
    tls_config:
      insecure_skip_verify: true
  relabel_configs:
  - action: labelmap
    regex: __meta_kubernetes_node_label_(.+)
  - replacement: 192.168.10.26:6443
    target_label: __address__
  - regex: (.+)
    replacement: /api/v1/nodes/$1/proxy/metrics
    source_labels:
    - __meta_kubernetes_node_name
    target_label: __metrics_path__
  scheme: https
  bearer_token_file: /data/server/prometheus/etc/token
  tls_config:
    insecure_skip_verify: true

4.2、重启prometheus服务

systemctl restart prometheus

4.3、查看监控效果

5、增加kube-state-metrics-示例

需要资源pods/proxy权限,记得增加哦

 

5.1、配置prometheus

- honor_labels: true
  job_name: kube-state-metrics
  kubernetes_sd_configs:
  - role: pod
    api_server: https://192.168.10.26:6443
    bearer_token_file: /data/server/prometheus/etc/token
    tls_config:
      insecure_skip_verify: true
  relabel_configs:
    - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
      action: keep
      regex: 'kube-state-metrics'
    - source_labels: [__address__]
      action: replace
      target_label: instance
    - target_label: __address__
      replacement: 192.168.10.26:6443
    - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name, __meta_kubernetes_pod_container_port_number]
      regex: ([^;]+);([^;]+);([^;]+)
      target_label: __metrics_path__
      replacement: /api/v1/namespaces/${1}/pods/http:${2}:${3}/proxy/metrics
    - action: labelmap
      regex: __meta_kubernetes_service_label_(.+)
    - source_labels: [__meta_kubernetes_namespace]
      action: replace
      target_label: kubernetes_namespace
    - source_labels: [__meta_kubernetes_service_name]
      action: replace
      target_label: service_name
  scheme: https
  bearer_token_file: /data/server/prometheus/etc/token
  tls_config:
    insecure_skip_verify: true

5.2、重启prometheus服务

systemctl restart prometheus

5.3、查看监控效果

6、增加coreDNS-示例

需要资源pods/proxy权限,记得增加哦

 

6.1、配置prometheus

- honor_labels: true
  job_name: kubernetes-coreDNS
  kubernetes_sd_configs:
  - role: endpoints
    api_server: https://192.168.10.26:6443
    bearer_token_file: /data/server/prometheus/etc/token
  relabel_configs:
    - source_labels: [__meta_kubernetes_endpoints_name]
      action: keep
      regex: 'kube-dns'
    - source_labels: [__meta_kubernetes_pod_container_port_number]
      action: keep
      regex: '9153'
    - target_label: __address__
      replacement: 192.168.10.26:6443
    - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name, __meta_kubernetes_pod_container_port_number]
      regex: ([^;]+);([^;]+);([^;]+)
      target_label: __metrics_path__
      replacement: /api/v1/namespaces/${1}/pods/http:${2}:${3}/proxy/metrics
  scheme: https
  bearer_token_file: /data/server/prometheus/etc/token
  tls_config:
    insecure_skip_verify: true

6.2、开放CoreDNS Metrics

]# kubectl -n kube-system edit svc kube-dns 
...
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/port: "9153"
    prometheus.io/scrape: "true"
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    kubernetes.io/name: CoreDNS

6.3、重启prometheus服务

systemctl restart prometheus

6.4、查看监控效果