Skip to content
Snippets Groups Projects
Commit 003828b3 authored by Tim Kreuzer's avatar Tim Kreuzer
Browse files

add istio

parent e86ccde2
No related branches found
No related tags found
No related merge requests found
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: cadvisor
name: cadvisor
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: cadvisor
name: cadvisor
rules:
- apiGroups:
- policy
resourceNames:
- cadvisor
resources:
- podsecuritypolicies
verbs:
- use
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app: cadvisor
name: cadvisor
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cadvisor
subjects:
- kind: ServiceAccount
name: cadvisor
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
annotations:
seccomp.security.alpha.kubernetes.io/pod: docker/default
labels:
app: cadvisor
name: cadvisor
namespace: kube-system
spec:
selector:
matchLabels:
app: cadvisor
name: cadvisor
template:
metadata:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
app: cadvisor
name: cadvisor
spec:
automountServiceAccountToken: false
containers:
- args:
- --housekeeping_interval=10s
- --max_housekeeping_interval=15s
- --event_storage_event_limit=default=0
- --event_storage_age_limit=default=0
- --enable_metrics=app,cpu,disk,diskIO,memory,network,process
- --docker_only
- --store_container_labels=false
- --whitelisted_container_labels=io.kubernetes.container.name,io.kubernetes.pod.name,io.kubernetes.pod.namespace
image: gcr.io/cadvisor/cadvisor:v0.45.0
name: cadvisor
ports:
- containerPort: 8080
name: http
protocol: TCP
resources:
requests:
cpu: 200m
memory: 50Mi
limits:
cpu: 800m
memory: 200Mi
volumeMounts:
- mountPath: /rootfs
name: rootfs
readOnly: true
- mountPath: /var/run
name: var-run
readOnly: true
- mountPath: /sys
name: sys
readOnly: true
- mountPath: /var/lib/docker
name: docker
readOnly: true
- mountPath: /dev/disk
name: disk
readOnly: true
priorityClassName: system-node-critical
serviceAccountName: cadvisor
terminationGracePeriodSeconds: 30
tolerations:
- key: node-role.kubernetes.io/controlplane
value: "true"
effect: NoSchedule
- key: node-role.kubernetes.io/etcd
value: "true"
effect: NoExecute
- key: node-role.kubernetes.io/master
effect: NoSchedule
- key: airflow
value: "true"
effect: NoExecute
- key: airflow
value: "true"
effect: NoSchedule
- key: loki
value: "true"
effect: NoSchedule
- key: usernode
value: "true"
effect: NoExecute
- key: monitoring
value: "true"
effect: NoExecute
volumes:
- hostPath:
path: /
name: rootfs
- hostPath:
path: /var/run
name: var-run
- hostPath:
path: /sys
name: sys
- hostPath:
path: /var/lib/docker
name: docker
- hostPath:
path: /dev/disk
name: disk
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
labels:
app: cadvisor
name: cadvisor
namespace: kube-system
spec:
allowedHostPaths:
- pathPrefix: /
- pathPrefix: /var/run
- pathPrefix: /sys
- pathPrefix: /var/lib/docker
- pathPrefix: /dev/disk
fsGroup:
rule: RunAsAny
runAsUser:
rule: RunAsAny
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
volumes:
- '*'
---
apiVersion: v1
kind: Service
metadata:
name: cadvisor
labels:
app: cadvisor
namespace: kube-system
spec:
selector:
app: cadvisor
ports:
- name: cadvisor
port: 8080
protocol: TCP
targetPort: 8080
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: cadvisor
name: cadvisor
namespace: kube-system
spec:
endpoints:
- metricRelabelings:
- sourceLabels:
- container_label_io_kubernetes_pod_name
targetLabel: pod
- sourceLabels:
- container_label_io_kubernetes_container_name
targetLabel: container
- sourceLabels:
- container_label_io_kubernetes_pod_namespace
targetLabel: namespace
- action: labeldrop
regex: container_label_io_kubernetes_pod_name
- action: labeldrop
regex: container_label_io_kubernetes_container_name
- action: labeldrop
regex: container_label_io_kubernetes_pod_namespace
port: cadvisor
relabelings:
- sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: node
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
replacement: /metrics/cadvisor
- sourceLabels:
- job
targetLabel: job
replacement: kubelet
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
app: cadvisor
defaultNamespace: kube-system
defaultNamespace: cattle-monitoring-system
helm:
releaseName: rancher-monitoring-crd
chart: rancher-monitoring-crd
repo: https://charts.rancher.io
version: 102.0.1+up40.1.2
defaultNamespace: default defaultNamespace: istio-system
helm:
releaseName: rancher-istio
repo: https://charts.rancher.io
chart: rancher-istio
version: 102.1.0+up1.16.3
values:
ingressGateways:
enabled: false
kiali:
enabled: false
dependsOn:
- name: monitoring-crd
defaultNamespace: cattle-monitoring-system
helm:
releaseName: rancher-monitoring
repo: https://charts.rancher.io
chart: rancher-monitoring
version: 102.0.1+up40.1.2
values:
alertmanager:
alertmanagerSpec:
configSecret: alertmanager-rancher-monitoring-alertmanager
useExistingSecret: true
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
cpu: 50m
memory: 250Mi
grafana:
persistence:
enabled: true
existingClaim: grafana-pvc
resources:
requests:
cpu: 10m
memory: 150Mi
limits:
cpu: 100m
memory: 250Mi
ingressNginx:
enabled: false
# workaround for broken grafana dashboards. Deploy manual cAdvisor ( https://github.com/rancher/rancher/issues/38934#issuecomment-1294585708 )
kubelet:
serviceMonitor:
cAdvisor: false
kube-state-metrics:
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
cpu: 50m
memory: 200Mi
prometheus:
prometheusSpec:
evaluationInterval: 1m
retentionSize: 45GiB
scrapeInterval: 1m
storageSpec:
volumeClaimTemplate:
spec:
resources:
requests:
storage: 50Gi
storageClassName: csi-cinder-sc-retain
resources:
requests:
memory: 2000Mi
cpu: 300m
limits:
memory: 4000Mi
cpu: 1000m
prometheusOperator:
resources:
requests:
cpu: 10m
memory: 150Mi
limits:
cpu: 50m
memory: 500Mi
rkeControllerManager:
enabled: true
rkeEtcd:
enabled: true
rkeProxy:
enabled: true
rkeScheduler:
enabled: true
prometheus-node-exporter:
resources:
requests:
cpu: 10m
memory: 20Mi
limits:
cpu: 50m
memory: 50Mi
targetCustomizations:
- name: jupyterjsc-staging
clusterSelector:
matchLabels:
name: jupyterjsc-staging
helm:
values:
alertmanager:
alertmanagerSpec:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
grafana:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
grafana.ini:
smtp:
enabled: true
from_address: jupyter.jsc@fz-juelich.de
from_name: Jupyter-JSC Monitoring
host: mail.fz-juelich.de:25
kube-state-metrics:
metricLabelsAllowlist:
# to select jupyterhub/singeuser-server metrics
- nodes=[nodepool]
- pods=[app,component,hub.jupyter.org/username,hub.jupyter.org/vo,hub.jupyter.org/name,hub.jupyter.org/service,hub.jupyter.org/system,hub.jupyter.org/account,hub.jupyter.org/project,hub.jupyter.org/partition,hub.jupyter.org/nodes,hub.jupyter.org/gpus,hub.jupyter.org/runtime,hub.jupyter.org/reservation]
- services=[app,component,hub.jupyter.org/username,hub.jupyter.org/vo,hub.jupyter.org/name,hub.jupyter.org/service,hub.jupyter.org/system,hub.jupyter.org/account,hub.jupyter.org/project,hub.jupyter.org/partition,hub.jupyter.org/nodes,hub.jupyter.org/gpus,hub.jupyter.org/runtime,hub.jupyter.org/reservation]
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
prometheus:
prometheusSpec:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
prometheus-adapter:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
prometheusOperator:
admissionWebhooks:
patch:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
rkeControllerManager:
proxy:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
rkeEtcd:
proxy:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
rkeIngressNginx:
clients:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
rkeProxy:
proxy:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
rkeScheduler:
proxy:
nodeSelector:
monitoring: "true"
tolerations:
- key: "monitoring"
value: "true"
effect: "NoExecute"
dependsOn:
- name: ingress-nginx
- name: monitoring-cadvisor
- name: monitoring-crd
- name: monitoring-storage
diff:
comparePatches:
- apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
name: rancher-monitoring-admission
operations:
- {"op":"remove", "path":"/webhooks"}
- apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
name: rancher-monitoring-admission
jsonPointers:
- "/webhooks"
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
apiVersion: v2
name: storage
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
defaultNamespace: cattle-monitoring-system
dependsOn:
- name: cinder-csi
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.grafana.name }}
spec:
storageClassName: {{ .Values.grafana.storageClassName }}
resources:
requests:
storage: {{ .Values.grafana.size }}
accessModes:
- ReadWriteOnce
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ .Values.prometheus.name }}
labels:
app: prometheus
prometheus: prometheus-operator-prometheus
spec:
storageClassName: {{ .Values.prometheus.storageClassName }}
resources:
requests:
storage: {{ .Values.prometheus.size }}
accessModes:
- ReadWriteOnce
# Default values for storage.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
grafana:
storageClassName: "csi-cinder-sc-retain"
size: "40Gi"
name: "grafana-pvc"
prometheus:
storageClassName: "csi-cinder-sc-retain"
size: "50Gi"
name: "prometheus-rancher-monitoring-prometheus-db-prometheus-rancher-monitoring-prometheus-0"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment