Cast commit

This commit is contained in:
ClusterForge 2025-10-06 09:34:03 +00:00
commit 033556c928
722 changed files with 287465 additions and 0 deletions

View File

@ -0,0 +1,14 @@
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: airm-tls-secret
namespace: airm
spec:
dnsNames:
- airm-rabbitmq.airm.svc.cluster.local
- '*.airm-rabbitmq-nodes.airm.svc.cluster.local'
issuerRef:
kind: Issuer
name: airm-selfsigned-issuer
secretName: airm-tls-secret

View File

@ -0,0 +1,36 @@
---
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: airm-project-namespace-rolebinding
spec:
background: false
rules:
- generate:
apiVersion: rbac.authorization.k8s.io/v1
data:
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: airm-project-member
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: Group
name: oidc{{request.object.metadata.name}}
kind: RoleBinding
name: '{{request.object.metadata.name}}-member-role-binding'
namespace: '{{request.object.metadata.name}}'
synchronize: true
match:
any:
- resources:
kinds:
- Namespace
operations:
- CREATE
name: generate-project-namespace-rolebinding
preconditions:
any:
- key: '{{request.object.metadata.labels."airm.silogen.ai/project-id" || '''' }}'
operator: NotEquals
value: ""

View File

@ -0,0 +1,92 @@
---
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: airm-quota-enforcement-for-workloads
spec:
background: false
rules:
- match:
resources:
kinds:
- Deployment
- StatefulSet
- Pod
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
labels:
kueue.x-k8s.io/queue-name: '{{request.namespace }}'
name: set-queue-name-from-namespace-default
preconditions:
all:
- key: '{{request.object.metadata.labels."kueue.x-k8s.io/queue-name" || '''' }}'
operator: NotEquals
value: '{{request.namespace }}'
- match:
resources:
kinds:
- Job
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
labels:
kueue.x-k8s.io/queue-name: '{{request.namespace }}'
spec:
suspend: true
name: set-queue-name-from-namespace-jobs
preconditions:
all:
- key: '{{request.object.metadata.labels."kueue.x-k8s.io/queue-name" || '''' }}'
operator: NotEquals
value: '{{request.namespace }}'
- match:
resources:
kinds:
- CronJob
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
spec:
jobTemplate:
metadata:
labels:
kueue.x-k8s.io/queue-name: '{{request.namespace }}'
spec:
suspend: true
name: set-queue-name-from-namespace-cronjobs
preconditions:
all:
- key: '{{request.object.spec.jobTemplate.metadata.labels."kueue.x-k8s.io/queue-name" || '''' }}'
operator: NotEquals
value: '{{request.namespace }}'
- match:
resources:
kinds:
- KaiwoJob
- KaiwoService
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
spec:
clusterQueue: '{{request.namespace }}'
name: set-queue-name-from-namespace-kaiwo
preconditions:
all:
- key: '{{request.object.spec.clusterQueue || '''' }}'
operator: NotEquals
value: '{{request.namespace }}'

View File

@ -0,0 +1,170 @@
---
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: airm-workload-tracking-policy
spec:
background: false
rules:
- match:
resources:
kinds:
- Job
- Deployment
- StatefulSet
- DaemonSet
- CronJob
- KaiwoJob
- KaiwoService
- Pod
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
annotations:
airm.silogen.ai/auto-discovered: "true"
airm.silogen.ai/discovered-component-type: '{{request.object.kind }}'
airm.silogen.ai/submitter: '{{request.userInfo.username }}'
name: add-discovery-annotations-for-supported-types
preconditions:
all:
- key: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || '''' }}'
operator: Equals
value: ""
- key: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || '''' }}'
operator: Equals
value: ""
- key: '{{request.object.metadata.annotations."airm.silogen.ai/auto-discovered" || '''' }}'
operator: Equals
value: ""
- match:
resources:
kinds:
- Job
- Deployment
- StatefulSet
- DaemonSet
- CronJob
- KaiwoJob
- KaiwoService
- Pod
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
annotations:
airm.silogen.ai/auto-discovered: "false"
name: remove-auto-discovered-annotations-inherited-from-parent
preconditions:
all:
- key: '{{request.object.metadata.annotations."airm.silogen.ai/auto-discovered" || '''' }}'
operator: Equals
value: "true"
- key: '{{request.object.metadata.annotations."airm.silogen.ai/discovered-component-type" || '''' }}'
operator: NotEquals
value: '{{request.object.kind }}'
- context:
- apiCall:
jmesPath: metadata.labels
urlPath: /api/v1/namespaces/{{request.namespace }}
name: ns_labels
match:
resources:
kinds:
- Job
- Deployment
- StatefulSet
- DaemonSet
- CronJob
- KaiwoJob
- KaiwoService
- Pod
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
labels:
airm.silogen.ai/project-id: '{{ns_labels."airm.silogen.ai/project-id" }}'
name: set-project-id-from-namespace-label
preconditions:
all:
- key: '{{request.object.metadata.labels."airm.silogen.ai/project-id" || '''' }}'
operator: NotEquals
value: '{{ns_labels."airm.silogen.ai/project-id" }}'
- match:
resources:
kinds:
- Pod
- KaiwoJob
- KaiwoService
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
labels:
airm.silogen.ai/component-id: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || request.oldObject.metadata.labels."airm.silogen.ai/component-id" || request.uid }}'
airm.silogen.ai/workload-id: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || request.oldObject.metadata.labels."airm.silogen.ai/workload-id" || request.uid }}'
name: add-workload-and-component-id-default
- match:
resources:
kinds:
- Job
- Deployment
- StatefulSet
- DaemonSet
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
labels:
airm.silogen.ai/component-id: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || request.oldObject.metadata.labels."airm.silogen.ai/component-id" || request.uid }}'
airm.silogen.ai/workload-id: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || request.oldObject.metadata.labels."airm.silogen.ai/workload-id" || request.uid }}'
spec:
template:
metadata:
labels:
airm.silogen.ai/component-id: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || request.oldObject.metadata.labels."airm.silogen.ai/component-id" || request.uid }}'
airm.silogen.ai/workload-id: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || request.oldObject.metadata.labels."airm.silogen.ai/workload-id" || request.uid }}'
name: add-workload-and-component-id-to-objects-with-template
- match:
resources:
kinds:
- CronJob
namespaceSelector:
matchExpressions:
- key: airm.silogen.ai/project-id
operator: Exists
mutate:
patchStrategicMerge:
metadata:
labels:
airm.silogen.ai/component-id: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || request.oldObject.metadata.labels."airm.silogen.ai/component-id" || request.uid }}'
airm.silogen.ai/workload-id: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || request.oldObject.metadata.labels."airm.silogen.ai/workload-id" || request.uid }}'
spec:
jobTemplate:
metadata:
labels:
airm.silogen.ai/component-id: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || request.oldObject.metadata.labels."airm.silogen.ai/component-id" || request.uid }}'
airm.silogen.ai/workload-id: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || request.oldObject.metadata.labels."airm.silogen.ai/workload-id" || request.uid }}'
spec:
template:
metadata:
labels:
airm.silogen.ai/component-id: '{{request.object.metadata.labels."airm.silogen.ai/component-id" || request.oldObject.metadata.labels."airm.silogen.ai/component-id" || request.uid }}'
airm.silogen.ai/workload-id: '{{request.object.metadata.labels."airm.silogen.ai/workload-id" || request.oldObject.metadata.labels."airm.silogen.ai/workload-id" || request.uid }}'
name: add-workload-and-component-id-cronjobs

View File

@ -0,0 +1,13 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: airm-configure-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: airm-configure-role
subjects:
- kind: ServiceAccount
name: airm-configure-sa
namespace: airm

View File

@ -0,0 +1,13 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: airm-dispatcher-cluster-access-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: airm-dispatcher-cluster-access-role
subjects:
- kind: ServiceAccount
name: airm-dispatcher-sa
namespace: airm

View File

@ -0,0 +1,13 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: airm-platform-admin-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: airm-platform-admin
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: Group
name: oidcairm-role:Platform Administrator

View File

@ -0,0 +1,16 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: pre-install
helm.sh/hook-weight: "-1"
name: airm-secret-generator-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: airm-secret-generator-role
subjects:
- kind: ServiceAccount
name: airm-secret-generator-sa
namespace: airm

View File

@ -0,0 +1,22 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: airm-configure-role
rules:
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
- list
- create
- delete
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list

View File

@ -0,0 +1,113 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: airm-dispatcher-cluster-access-role
rules:
- apiGroups:
- ""
resources:
- services
- namespaces
- configmaps
- pods
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
- deployments
- statefulsets
- daemonsets
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- kaiwo.silogen.ai
resources:
- kaiwojobs
- kaiwoservices
- kaiwoqueueconfigs
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- gateway.networking.k8s.io
resources:
- httproutes
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- networking.k8s.io
resources:
- ingresses
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- external-secrets.io
resources:
- externalsecrets
verbs:
- get
- list
- watch
- create
- delete
- update
- patch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- get
- list

View File

@ -0,0 +1,153 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: airm-platform-admin
rules:
- apiGroups:
- ""
resources:
- pods
- events
- services
- configmaps
- persistentvolumes
- persistentvolumeclaims
- namespaces
- serviceaccounts
verbs:
- get
- list
- watch
- delete
- apiGroups:
- ""
resources:
- pods/log
- pods/exec
- pods/attach
- pods/portforward
verbs:
- '*'
- apiGroups:
- apps
resources:
- deployments
- replicasets
- statefulsets
- daemonsets
verbs:
- get
- list
- watch
- delete
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- get
- list
- watch
- delete
- apiGroups:
- networking.k8s.io
resources:
- ingresses
- networkpolicies
- httproutes
verbs:
- get
- list
- watch
- delete
- apiGroups:
- kaiwo.silogen.ai
resources:
- kaiwojobs
- kaiwoservices
verbs:
- get
- list
- watch
- delete
- apiGroups:
- config.kaiwo.silogen.ai
resources:
- kaiwoconfigs
verbs:
- '*'
- apiGroups:
- kaiwo.silogen.ai
resources:
- kaiwojobs
- kaiwoservices
- kaiwoqueueconfigs
verbs:
- '*'
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
- list
- watch
- create
- delete
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- clusterroles
- rolebindings
- clusterrolebindings
verbs:
- get
- list
- watch
- apiGroups:
- external-secrets.io
resources:
- externalsecrets
verbs:
- get
- list
- watch
- create
- delete
- apiGroups:
- external-secrets.io
resources:
- clustersecretstores
verbs:
- get
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- get
- list
- watch
- apiGroups:
- kueue.x-k8s.io
resources:
- clusterqueues
- resourceflavors
- localqueues
verbs:
- get
- list
- watch

View File

@ -0,0 +1,96 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: airm-project-member
rules:
- apiGroups:
- ""
resources:
- pods
- pods/log
- pods/exec
- pods/attach
- pods/portforward
- events
- services
- configmaps
- persistentvolumes
- persistentvolumeclaims
verbs:
- '*'
- apiGroups:
- apps
resources:
- deployments
- replicasets
- statefulsets
- daemonsets
verbs:
- '*'
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- '*'
- apiGroups:
- networking.k8s.io
resources:
- ingresses
- networkpolicies
- httproutes
verbs:
- '*'
- apiGroups:
- kaiwo.silogen.ai
resources:
- kaiwojobs
- kaiwoservices
verbs:
- '*'
- apiGroups:
- ""
resources:
- secrets
verbs:
- list
- watch
- create
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- clusterroles
- rolebindings
- clusterrolebindings
verbs:
- get
- list
- watch
- apiGroups:
- external-secrets.io
resources:
- clustersecretstores
- externalsecrets
verbs:
- get
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- get
- list
- watch

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: pre-install
helm.sh/hook-weight: "-1"
name: airm-secret-generator-role
rules:
- apiGroups:
- external-secrets.io
resources:
- clustersecretstores
verbs:
- create
- update
- patch
- get
- list

View File

@ -0,0 +1,23 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
rbac.kyverno.io/aggregate-to-background-controller: "true"
name: kyverno:airm-policy-roles
rules:
- apiGroups:
- rbac.authorization.k8s.io
resources:
- clusterroles
- rolebindings
verbs:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- clusterroles
verbs:
- bind

View File

@ -0,0 +1,17 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
rbac.kyverno.io/aggregate-to-reports-controller: "true"
name: kyverno:airm-reports-policy-roles
rules:
- apiGroups:
- kaiwo.silogen.ai
resources:
- kaiwojobs
- kaiwoservices
verbs:
- get
- list
- watch

View File

@ -0,0 +1,50 @@
---
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: airm-cnpg
namespace: airm
spec:
affinity:
enablePodAntiAffinity: true
topologyKey: topology.kubernetes.io/zone
bootstrap:
initdb:
database: airm
owner: airm_user
postInitSQL:
- GRANT CREATE ON SCHEMA public TO airm_user
secret:
name: airm-cnpg-user
imageName: ghcr.io/cloudnative-pg/postgresql:17
instances: 1
nodeMaintenanceWindow:
inProgress: false
reusePVC: true
postgresql:
parameters:
auto_explain.log_min_duration: 10s
pg_stat_statements.max: "10000"
pg_stat_statements.track: all
shared_buffers: 256MB
wal_compression: pglz
pg_hba:
- host all all 10.244.0.0/16 md5
primaryUpdateStrategy: unsupervised
resources:
limits:
cpu: "2"
memory: 1Gi
requests:
cpu: "1"
memory: 512Mi
startDelay: 300
stopDelay: 300
storage:
size: 50Gi
storageClass: default
superuserSecret:
name: airm-cnpg-superuser
walStorage:
size: 50Gi
storageClass: default

View File

@ -0,0 +1,57 @@
---
apiVersion: v1
data:
generate-secrets.sh: |2
generate_secret() {
openssl rand -hex 16 | tr 'a-f' 'A-F' | head -c 32
}
AIRM_SUPERUSER_USERNAME=$(generate_secret)
AIRM_SUPERUSER_PASSWORD=$(generate_secret)
AIRM_USER_PASSWORD=$(generate_secret)
AIRM_RABBITMQ_USERNAME=$(generate_secret)
AIRM_RABBITMQ_PASSWORD=$(generate_secret)
AIRM_UI_AUTH_SECRET=$(generate_secret)
cat > /tmp/final-secret-store.yaml << EOF
apiVersion: external-secrets.io/v1beta1
kind: ClusterSecretStore
metadata:
name: airm-secret-store
spec:
provider:
fake:
data:
- key: airm-cnpg-superuser-username
value: ${AIRM_SUPERUSER_USERNAME}
- key: airm-cnpg-superuser-password
value: ${AIRM_SUPERUSER_PASSWORD}
- key: airm-cnpg-user-username
value: airm_user
- key: airm-cnpg-user-password
value: ${AIRM_USER_PASSWORD}
- key: airm-rabbitmq-user-username
value: ${AIRM_RABBITMQ_USERNAME}
- key: airm-rabbitmq-user-password
value: ${AIRM_RABBITMQ_PASSWORD}
- key: airm-ui-auth-nextauth-secret
value: ${AIRM_UI_AUTH_SECRET}
EOF
kubectl get clustersecretstores.external-secrets.io airm-secret-store > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "ClusterSecretStore 'airm-secret-store' already exists, skipping creation."
exit 0
fi
kubectl apply -f /tmp/final-secret-store.yaml
echo "ClusterSecretStore created successfully!"
echo "Generated secrets with alphanumeric values only"
exit 0
kind: ConfigMap
metadata:
annotations:
helm.sh/hook: pre-install
helm.sh/hook-weight: "-1"
name: airm-secret-generator-script
namespace: airm

View File

@ -0,0 +1,29 @@
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: airm-cluster-nodes-dispatcher
namespace: airm
spec:
concurrencyPolicy: Replace
jobTemplate:
spec:
template:
spec:
containers:
- command:
- curl
- -X
- POST
- http://airm-dispatcher:80/v1/clusters/nodes
image: ghcr.io/silogen/airm-dispatcher:v2025.09.001
imagePullPolicy: IfNotPresent
name: airm-cluster-nodes-cron
resources:
limits:
memory: 100Mi
requests:
cpu: 50m
memory: 100Mi
restartPolicy: OnFailure
schedule: 0 * * * *

View File

@ -0,0 +1,62 @@
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: airm-heartbeat-dispatcher
namespace: airm
spec:
concurrencyPolicy: Replace
jobTemplate:
spec:
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
spec:
containers:
- command:
- curl
- -X
- POST
- http://airm-dispatcher:80/v1/heartbeats
image: ghcr.io/silogen/airm-dispatcher:v2025.09.001
imagePullPolicy: IfNotPresent
name: airm-heartbeat-cron
resources:
limits:
memory: 100Mi
requests:
cpu: 50m
memory: 100Mi
initContainers:
- command:
- /bin/bash
- -c
- apt-get update -y 1> /dev/null 2>&1 && apt-get install ncat -y 1> /dev/null 2>&1 && while ! nc -z "$ENDPOINT_URL_TO_CHECK" "$ENDPOINT_PORT_TO_CHECK"; do echo "Waiting for Airm dispatcher at ${ENDPOINT_URL_TO_CHECK}:${ENDPOINT_PORT_TO_CHECK}..."; sleep 3; done; echo "Airm dispatcher is accepting connections at ${ENDPOINT_URL_TO_CHECK}:${ENDPOINT_PORT_TO_CHECK}."; sleep 12; exit 0
env:
- name: ENDPOINT_URL_TO_CHECK
value: airm-dispatcher.airm.svc.cluster.local
- name: ENDPOINT_PORT_TO_CHECK
value: "80"
image: ubuntu@sha256:09506232a8004baa32c47d68f1e5c307d648fdd59f5e7eaa42aaf87914100db3 # Original tag: 22.04
imagePullPolicy: IfNotPresent
name: check-dispatcher-is-ready
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- SETUID
- SETGID
- CHOWN
- DAC_OVERRIDE
- FOWNER
- FSETID
drop:
- ALL
runAsNonRoot: false
runAsUser: 0
seccompProfile:
type: RuntimeDefault
restartPolicy: OnFailure
schedule: '*/1 * * * *'
suspend: false

View File

@ -0,0 +1,257 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: airm-api
namespace: airm
spec:
replicas: 1
selector:
matchLabels:
app: airm-api
template:
metadata:
labels:
app: airm-api
auth-required: "true"
spec:
containers:
- env:
- name: OPENID_CLIENT_ID
value: 354a0fa1-35ac-4a6d-9c4d-d661129c2cd0
- name: OPENID_CONFIGURATION_URL
value: http://kc.not-a-domain/realms/airm/.well-known/openid-configuration
- name: POST_REGISTRATION_REDIRECT_URL
value: https://airmui.not-a-domain/
- name: DATABASE_HOST
value: airm-cnpg-rw.airm.svc.cluster.local
- name: DATABASE_PORT
value: "5432"
- name: DATABASE_USER
valueFrom:
secretKeyRef:
key: username
name: airm-cnpg-user
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: airm-cnpg-user
- name: RABBITMQ_HOST
value: airm-rabbitmq.airm.svc.cluster.local
- name: RABBITMQ_PORT
value: "5672"
- name: RABBITMQ_MANAGEMENT_URL
value: http://airm-rabbitmq.airm.svc.cluster.local:15672/api
- name: RABBITMQ_ADMIN_USER
valueFrom:
secretKeyRef:
key: username
name: airm-rabbitmq-admin
- name: RABBITMQ_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: airm-rabbitmq-admin
- name: KEYCLOAK_ADMIN_SERVER_URL
value: http://keycloak.keycloak.svc.cluster.local:8080
- name: KEYCLOAK_REALM
value: airm
- name: KEYCLOAK_ADMIN_CLIENT_ID
valueFrom:
secretKeyRef:
key: client-id
name: airm-keycloak-admin-client
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
valueFrom:
secretKeyRef:
key: client-secret
name: airm-keycloak-admin-client
- name: MINIO_URL
value: http://minio.minio-tenant-default.svc.cluster.local:80
- name: MINIO_BUCKET
value: default-bucket
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
key: minio-access-key
name: airm-api-minio-credentials
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
key: minio-secret-key
name: airm-api-minio-credentials
- name: PROMETHEUS_URL
value: http://lgtm-stack.otel-lgtm-stack.svc.cluster.local:9090
image: ghcr.io/silogen/airm-api:v2025.09.001
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /v1/health
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 2
name: airm
ports:
- containerPort: 8080
- containerPort: 9009
readinessProbe:
failureThreshold: 3
httpGet:
path: /v1/health
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 2
resources:
limits:
memory: 1Gi
requests:
cpu: 500m
memory: 1Gi
securityContext:
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
initContainers:
- command:
- sh
- -c
- |
until pg_isready -h "airm-cnpg-rw.airm.svc.cluster.local" -p 5432 -U postgres; do
echo "Waiting for database..."
sleep 2
done
echo "Database is ready!"
image: postgres@sha256:5d14c08a257610d8e27c52ce0f10de5d9cce4c232e1277d44d7d6fb628b3d1a7 # Original tag: 17-alpine
name: wait-for-db
- command:
- sh
- -c
- cp /code/migrations/* /mnt/code/migrations/
image: ghcr.io/silogen/airm-api:v2025.09.001
imagePullPolicy: IfNotPresent
name: init-migration-scripts
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
volumeMounts:
- mountPath: /mnt/code/migrations
name: airm-migration-volume
- command:
- liquibase
- --url=jdbc:postgresql://airm-cnpg-rw.airm.svc.cluster.local:5432/airm
- --username=$(DATABASE_USER)
- --password=$(DATABASE_PASSWORD)
- --logLevel=INFO
- --changeLogFile=changelog/changelog.xml
- update
env:
- name: DATABASE_USER
valueFrom:
secretKeyRef:
key: username
name: airm-cnpg-user
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: airm-cnpg-user
image: docker.io/liquibase/liquibase@sha256:dc2e5237941efb92cc6ae0cffd40a5b6f476559d5ed20fd7ca711df4895997a3 # Original tag: 4.31
imagePullPolicy: IfNotPresent
name: liquibase-migrate
volumeMounts:
- mountPath: /liquibase/changelog
name: airm-migration-volume
readOnly: true
- command:
- uv
- run
- -m
- app.charts.registration
env:
- name: DATABASE_HOST
value: airm-cnpg-rw.airm.svc.cluster.local
- name: DATABASE_PORT
value: "5432"
- name: DATABASE_USER
valueFrom:
secretKeyRef:
key: username
name: airm-cnpg-user
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: airm-cnpg-user
- name: RABBITMQ_HOST
value: airm-rabbitmq.airm.svc.cluster.local
- name: RABBITMQ_PORT
value: "5672"
- name: RABBITMQ_MANAGEMENT_URL
value: http://airm-rabbitmq.airm.svc.cluster.local:15672/api
- name: RABBITMQ_ADMIN_USER
valueFrom:
secretKeyRef:
key: username
name: airm-rabbitmq-admin
- name: RABBITMQ_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: airm-rabbitmq-admin
image: ghcr.io/silogen/airm-api:v2025.09.001
imagePullPolicy: IfNotPresent
name: charts-registration
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
- command:
- /bin/bash
- -c
- apt-get update -y 1> /dev/null 2>&1 && apt-get install ncat -y 1> /dev/null 2>&1 && while ! nc -z "$ENDPOINT_URL_TO_CHECK" "$ENDPOINT_PORT_TO_CHECK"; do echo "Waiting for Airm rabbitmq at ${ENDPOINT_URL_TO_CHECK}:${ENDPOINT_PORT_TO_CHECK}..."; sleep 3; done; echo "Airm rabbitmq is accepting connections at ${ENDPOINT_URL_TO_CHECK}:${ENDPOINT_PORT_TO_CHECK}."; sleep 3; exit 0
env:
- name: ENDPOINT_URL_TO_CHECK
value: airm-rabbitmq.airm.svc.cluster.local
- name: ENDPOINT_PORT_TO_CHECK
value: "15672"
image: ubuntu@sha256:09506232a8004baa32c47d68f1e5c307d648fdd59f5e7eaa42aaf87914100db3 # Original tag: 22.04
imagePullPolicy: IfNotPresent
name: check-rabbitmq-is-ready
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- SETUID
- SETGID
- CHOWN
- DAC_OVERRIDE
- FOWNER
- FSETID
drop:
- ALL
runAsNonRoot: false
runAsUser: 0
seccompProfile:
type: RuntimeDefault
volumes:
- emptyDir: {}
name: airm-migration-volume

View File

@ -0,0 +1,101 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: airm-dispatcher
namespace: airm
spec:
replicas: 1
selector:
matchLabels:
app: airm-dispatcher
template:
metadata:
labels:
app: airm-dispatcher
spec:
containers:
- env:
- name: KUBE_CLUSTER_NAME
value: demo-cluster
- name: ORG_NAME
value: demo
- name: RABBITMQ_HOST
value: airm-rabbitmq.airm.svc.cluster.local
- name: RABBITMQ_PORT
value: "5672"
- name: RABBITMQ_AIRM_COMMON_VHOST
value: vh_airm_common
- name: RABBITMQ_AIRM_COMMON_QUEUE
value: airm_common
- name: RABBITMQ_USER
valueFrom:
secretKeyRef:
key: username
name: airm-rabbitmq-common-vhost-user
- name: RABBITMQ_PASSWORD
valueFrom:
secretKeyRef:
key: password
name: airm-rabbitmq-common-vhost-user
image: ghcr.io/silogen/airm-dispatcher:v2025.09.001
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /v1/health
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 2
name: airm-dispatcher
ports:
- containerPort: 8080
readinessProbe:
failureThreshold: 3
httpGet:
path: /v1/health
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 2
resources:
limits:
memory: 1Gi
requests:
cpu: 500m
memory: 1Gi
securityContext:
runAsUser: 0
initContainers:
- command:
- /bin/bash
- -c
- apt-get update -y 1> /dev/null 2>&1 && apt-get install ncat -y 1> /dev/null 2>&1 && while ! nc -z "$ENDPOINT_URL_TO_CHECK" "$ENDPOINT_PORT_TO_CHECK"; do echo "Waiting for Airm UI at ${ENDPOINT_URL_TO_CHECK}:${ENDPOINT_PORT_TO_CHECK}..."; sleep 3; done; echo "Airm UI is accepting connections at ${ENDPOINT_URL_TO_CHECK}:${ENDPOINT_PORT_TO_CHECK}."; sleep 12; exit 0
env:
- name: ENDPOINT_URL_TO_CHECK
value: airm-api.airm.svc.cluster.local
- name: ENDPOINT_PORT_TO_CHECK
value: "80"
image: ubuntu@sha256:09506232a8004baa32c47d68f1e5c307d648fdd59f5e7eaa42aaf87914100db3 # Original tag: 22.04
imagePullPolicy: IfNotPresent
name: check-airm-api-is-ready
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- SETUID
- SETGID
- CHOWN
- DAC_OVERRIDE
- FOWNER
- FSETID
drop:
- ALL
runAsNonRoot: false
runAsUser: 0
seccompProfile:
type: RuntimeDefault
serviceAccountName: airm-dispatcher-sa

View File

@ -0,0 +1,65 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: airm-ui
namespace: airm
spec:
replicas: 1
selector:
matchLabels:
app: airm-ui
template:
metadata:
labels:
app: airm-ui
spec:
containers:
- env:
- name: NEXTAUTH_URL
value: https://airmui.not-a-domain
- name: KEYCLOAK_ISSUER
value: https://kc.not-a-domain/realms/airm
- name: KEYCLOAK_ID
value: 354a0fa1-35ac-4a6d-9c4d-d661129c2cd0
- name: KEYCLOAK_ISSUER_INTERNAL_URL
value: http://keycloak.keycloak:8080/realms/airm/
- name: AIRM_API_SERVICE_URL
value: http://airm-api.airm
- name: NEXTAUTH_SECRET
valueFrom:
secretKeyRef:
key: NEXTAUTH_SECRET
name: airm-secrets-airm
- name: KEYCLOAK_SECRET
valueFrom:
secretKeyRef:
key: KEYCLOAK_SECRET
name: airm-keycloak-ui-creds
image: ghcr.io/silogen/airm-ui:v2025.09.001
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /api/health
port: 8000
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 2
name: airm-ui
ports:
- containerPort: 8000
readinessProbe:
httpGet:
path: /api/health
port: 8000
initialDelaySeconds: 5
periodSeconds: 10
resources:
limits:
memory: 4Gi
requests:
cpu: 500m
memory: 4Gi
restartPolicy: Always
serviceAccountName: airm-ui-sa

View File

@ -0,0 +1,21 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-api-minio-credentials
namespace: airm
spec:
data:
- remoteRef:
key: default-user
property: API_ACCESS_KEY
secretKey: minio-access-key
- remoteRef:
key: default-user
property: API_SECRET_KEY
secretKey: minio-secret-key
secretStoreRef:
kind: ClusterSecretStore
name: k8s-secret-store
target:
name: airm-api-minio-credentials

View File

@ -0,0 +1,21 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-cnpg-superuser
namespace: airm
spec:
data:
- remoteRef:
key: airm-cnpg-superuser-username
secretKey: username
- remoteRef:
key: airm-cnpg-superuser-password
secretKey: password
secretStoreRef:
kind: ClusterSecretStore
name: airm-secret-store
target:
name: airm-cnpg-superuser
template:
type: Opaque

View File

@ -0,0 +1,21 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-cnpg-user
namespace: airm
spec:
data:
- remoteRef:
key: airm-cnpg-user-username
secretKey: username
- remoteRef:
key: airm-cnpg-user-password
secretKey: password
secretStoreRef:
kind: ClusterSecretStore
name: airm-secret-store
target:
name: airm-cnpg-user
template:
type: Opaque

View File

@ -0,0 +1,21 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-keycloak-admin-client
namespace: airm
spec:
data:
- remoteRef:
key: airm-keycloak-admin-client-id
secretKey: client-id
- remoteRef:
key: airm-keycloak-admin-client-secret
secretKey: client-secret
secretStoreRef:
kind: ClusterSecretStore
name: keycloak-secret-store
target:
name: airm-keycloak-admin-client
template:
type: Opaque

View File

@ -0,0 +1,18 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-keycloak-ui-client
namespace: airm
spec:
data:
- remoteRef:
key: airm-ui-keycloak-secret
secretKey: KEYCLOAK_SECRET
secretStoreRef:
kind: ClusterSecretStore
name: keycloak-secret-store
target:
name: airm-keycloak-ui-creds
template:
type: Opaque

View File

@ -0,0 +1,27 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-rabbitmq-admin
namespace: airm
spec:
data:
- remoteRef:
key: airm-rabbitmq-user-username
secretKey: username
- remoteRef:
key: airm-rabbitmq-user-password
secretKey: password
secretStoreRef:
kind: ClusterSecretStore
name: airm-secret-store
target:
name: airm-rabbitmq-admin
template:
data:
default_user.conf: |
default_user = {{ .username }}
default_pass = {{ .password }}
password: '{{ .password }}'
username: '{{ .username }}'
type: Opaque

View File

@ -0,0 +1,17 @@
---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: airm-secrets-airm
namespace: airm
spec:
data:
- remoteRef:
key: airm-ui-auth-nextauth-secret
secretKey: NEXTAUTH_SECRET
refreshInterval: 15s
secretStoreRef:
kind: ClusterSecretStore
name: airm-secret-store
target:
name: airm-secrets-airm

View File

@ -0,0 +1,30 @@
---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: airmapi-route
namespace: airm
spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: https
namespace: kgateway-system
rules:
- backendRefs:
- group: ""
kind: Service
name: airm-api
port: 80
weight: 1
matches:
- headers:
- name: Host
type: RegularExpression
value: airmapi\..*
path:
type: PathPrefix
value: /
timeouts:
backendRequest: 20s
request: 90s

View File

@ -0,0 +1,30 @@
---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: airmui-route
namespace: airm
spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: https
namespace: kgateway-system
rules:
- backendRefs:
- group: ""
kind: Service
name: airm-ui
port: 80
weight: 1
matches:
- headers:
- name: Host
type: RegularExpression
value: airmui\..*
path:
type: PathPrefix
value: /
timeouts:
backendRequest: 20s
request: 90s

View File

@ -0,0 +1,8 @@
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: airm-selfsigned-issuer
namespace: airm
spec:
selfSigned: {}

View File

@ -0,0 +1,120 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: airm-configure
namespace: airm
spec:
backoffLimit: 3
template:
metadata:
labels:
app: airm-configure
spec:
containers:
- env:
- name: DEBIAN_FRONTEND
value: noninteractive
- name: ORG_NAME
value: demo
- name: NEW_DOMAIN_NAME
value: not-a-domain
- name: KEYCLOAK_CLIENT_ID
value: 354a0fa1-35ac-4a6d-9c4d-d661129c2cd0
- name: KEYCLOAK_CLIENT_SECRET
valueFrom:
secretKeyRef:
key: KEYCLOAK_SECRET
name: airm-keycloak-ui-creds
- name: USER_EMAIL
value: devuser@not-a-domain
- name: KEYCLOAK_URL
value: http://keycloak.keycloak.svc.cluster.local:8080
- name: AIRM_API_URL
value: http://airm-api.airm.svc.cluster.local
- name: CLUSTER_BASE_URL
value: https://workspaces.not-a-domain/
image: ghcr.io/silogen/airm-configure:v2025.09.001
imagePullPolicy: IfNotPresent
name: configure
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- SETUID
- SETGID
- CHOWN
- DAC_OVERRIDE
- FOWNER
- FSETID
drop:
- ALL
runAsNonRoot: false
runAsUser: 0
seccompProfile:
type: RuntimeDefault
initContainers:
- args:
- |
apt-get update -y 1> /dev/null 2>&1
apt-get install curl -y 1> /dev/null 2>&1
echo "Checking dependencies..."
echo "Checking keycloak..."
while true; do
if curl -s --max-time 5 http://keycloak.keycloak.svc.cluster.local:8080/realms/airm/.well-known/openid-configuration > /dev/null 2>&1; then
echo "Keycloak is ready!"
break
else
echo "Waiting for Keycloak..."
sleep 10
fi
done
echo "Checking airm-api..."
while true; do
if curl -s --max-time 5 http://airm-api.airm.svc.cluster.local/v1/health > /dev/null 2>&1; then
echo "AIRM API is ready!"
break
else
echo "Waiting for AIRM API..."
sleep 10
fi
done
echo "Checking rabbitmq..."
while true; do
if curl -s --max-time 5 http://airm-rabbitmq.airm.svc.cluster.local:15672/api/overview > /dev/null 2>&1; then
echo "RabbitMQ is ready!"
break
else
echo "Waiting for RabbitMQ..."
sleep 10
fi
done
echo "All dependencies are ready!"
command:
- /bin/bash
- -c
image: ubuntu@sha256:09506232a8004baa32c47d68f1e5c307d648fdd59f5e7eaa42aaf87914100db3 # Original tag: 22.04
imagePullPolicy: IfNotPresent
name: wait-for-dependencies
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- SETUID
- SETGID
- CHOWN
- DAC_OVERRIDE
- FOWNER
- FSETID
drop:
- ALL
runAsNonRoot: false
runAsUser: 0
seccompProfile:
type: RuntimeDefault
restartPolicy: Never
serviceAccountName: airm-configure-sa

View File

@ -0,0 +1,31 @@
---
apiVersion: batch/v1
kind: Job
metadata:
annotations:
helm.sh/hook: pre-install
helm.sh/hook-weight: "1"
name: airm-secret-generator
namespace: airm
spec:
backoffLimit: 3
template:
spec:
containers:
- command:
- /bin/bash
- -c
- /scripts/generate-secrets.sh
image: ghcr.io/silogen/cluster-tool:latest
imagePullPolicy: IfNotPresent
name: secret-generator
volumeMounts:
- mountPath: /scripts
name: script-volume
restartPolicy: OnFailure
serviceAccountName: airm-secret-generator-sa
volumes:
- configMap:
defaultMode: 493
name: airm-secret-generator-script
name: script-volume

5
airm/Namespace_airm.yaml Normal file
View File

@ -0,0 +1,5 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: airm

View File

@ -0,0 +1,22 @@
---
apiVersion: rabbitmq.com/v1beta1
kind: RabbitmqCluster
metadata:
name: airm-rabbitmq
namespace: airm
spec:
persistence:
storage: 20Gi
storageClassName: default
replicas: 1
resources:
limits:
memory: 512Mi
requests:
cpu: 250m
memory: 256Mi
secretBackend:
externalSecret:
name: airm-rabbitmq-admin
tls:
secretName: airm-tls-secret

View File

@ -0,0 +1,6 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: airm-configure-sa
namespace: airm

View File

@ -0,0 +1,6 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: airm-dispatcher-sa
namespace: airm

View File

@ -0,0 +1,9 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
annotations:
helm.sh/hook: pre-install
helm.sh/hook-weight: "-1"
name: airm-secret-generator-sa
namespace: airm

View File

@ -0,0 +1,6 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: airm-ui-sa
namespace: airm

View File

@ -0,0 +1,19 @@
---
apiVersion: v1
kind: Service
metadata:
labels:
app: airm-api
name: airm-api
namespace: airm
spec:
ports:
- name: web
port: 80
targetPort: 8080
- name: metrics
port: 9009
targetPort: 9009
selector:
app: airm-api
type: ClusterIP

View File

@ -0,0 +1,16 @@
---
apiVersion: v1
kind: Service
metadata:
labels:
app: airm-dispatcher
name: airm-dispatcher
namespace: airm
spec:
ports:
- name: web
port: 80
targetPort: 8080
selector:
app: airm-dispatcher
type: ClusterIP

16
airm/Service_airm-ui.yaml Normal file
View File

@ -0,0 +1,16 @@
---
apiVersion: v1
kind: Service
metadata:
labels:
app: airm-ui
name: airm-ui
namespace: airm
spec:
ports:
- name: web
port: 80
targetPort: 8000
selector:
app: airm-ui
type: ClusterIP

View File

@ -0,0 +1,27 @@
---
apiVersion: amd.com/v1alpha1
kind: DeviceConfig
metadata:
name: gpu-operator
namespace: kube-amd-gpu
spec:
devicePlugin:
devicePluginImage: rocm/k8s-device-plugin:latest
enableNodeLabeller: true
nodeLabellerImage: rocm/k8s-device-plugin:labeller-latest
driver:
blacklist: false
enable: false
image: docker.io/username/repo
version: 6.3.2
metricsExporter:
config:
name: gpu-config
enable: true
image: docker.io/rocm/device-metrics-exporter@sha256:a82d52f44e09fe293cb49aed8a8047fcc43307ac1cf100ea50597a002d053ff9 # Original tag: v1.3.0.1
imagePullPolicy: IfNotPresent
nodePort: 32500
port: 5000
serviceType: NodePort
selector:
feature.node.kubernetes.io/amd-gpu: "true"

View File

@ -0,0 +1,18 @@
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-serving-cert
namespace: kube-amd-gpu
spec:
dnsNames:
- amd-gpu-operator-kmm-webhook-service.kube-amd-gpu.svc
- amd-gpu-operator-kmm-webhook-service.kube-amd-gpu.svc.cluster.local
issuerRef:
kind: Issuer
name: amd-gpu-operator-kmm-selfsigned-issuer
secretName: kmm-operator-webhook-server-cert

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-config-manager
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-config-manager
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-config-manager
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-event-recorder-clusterrolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-event-recorder-clusterrole
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-gpu-operator-charts-controller-manager
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.0.0
name: amd-gpu-operator-gpu-operator-charts-manager-rolebinding-for-secrets
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-kmm-manager-role
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-gpu-operator-charts-controller-manager
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-manager-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-manager-role
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-gpu-operator-charts-controller-manager
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-metrics-exporter-rbac-proxy
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-metrics-exporter-rbac-proxy
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-metrics-exporter-rbac-proxy
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-metrics-exporter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-metrics-exporter
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-metrics-exporter
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-node-labeller
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-node-labeller
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-node-labeller
namespace: kube-amd-gpu

View File

@ -0,0 +1,21 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: pre-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
helm.sh/hook-weight: "1"
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-pre-delete
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-pre-delete
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-gpu-operator-charts-pre-delete
namespace: kube-amd-gpu

View File

@ -0,0 +1,21 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: post-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
helm.sh/hook-weight: "1"
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-prune
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-prune
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-gpu-operator-charts-prune
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-test-runner
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-test-runner
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-test-runner
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-utils-container
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-gpu-operator-charts-utils-container
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-utils-container
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-event-recorder-clusterrolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-kmm-event-recorder-clusterrole
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-kmm-controller
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-manager-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-kmm-manager-role
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-kmm-controller
namespace: kube-amd-gpu

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-proxy-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-kmm-proxy-role
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-kmm-controller
namespace: kube-amd-gpu

View File

@ -0,0 +1,17 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery-gc
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-node-feature-discovery-gc
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-node-feature-discovery-gc
namespace: kube-amd-gpu

View File

@ -0,0 +1,20 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: post-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery-prune
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-node-feature-discovery-prune
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-node-feature-discovery-prune
namespace: kube-amd-gpu

View File

@ -0,0 +1,17 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: amd-gpu-operator-node-feature-discovery
subjects:
- kind: ServiceAccount
name: amd-gpu-operator-node-feature-discovery
namespace: kube-amd-gpu

View File

@ -0,0 +1,17 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: pre-upgrade,pre-rollback
helm.sh/hook-delete-policy: before-hook-creation, hook-succeeded
helm.sh/hook-weight: "1"
name: pre-upgrade-check-cluster-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: pre-upgrade-check-cluster-role
subjects:
- kind: ServiceAccount
name: pre-upgrade-check-sa
namespace: kube-amd-gpu

View File

@ -0,0 +1,17 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: pre-upgrade,pre-rollback
helm.sh/hook-delete-policy: before-hook-creation, hook-succeeded
helm.sh/hook-weight: "2"
name: upgrade-crd-hook-cluster-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: upgrade-crd-hook-cluster-role
subjects:
- kind: ServiceAccount
name: upgrade-crd-hook-sa
namespace: kube-amd-gpu

View File

@ -0,0 +1,52 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-config-manager
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- get
- list
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- update
- apiGroups:
- apps
resources:
- daemonsets
verbs:
- get
- list
- watch
- delete
- create
- update
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- delete
- create
- update

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-event-recorder-clusterrole
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch

View File

@ -0,0 +1,202 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-manager-role
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- services
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- nodes/finalizers
- nodes/status
verbs:
- get
- update
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods/eviction
verbs:
- create
- delete
- get
- list
- apiGroups:
- ""
resources:
- pods/finalizers
- pods/status
verbs:
- delete
- get
- list
- watch
- apiGroups:
- ""
resources:
- services/finalizers
verbs:
- create
- get
- update
- watch
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- amd.com
resources:
- deviceconfigs/finalizers
verbs:
- update
- apiGroups:
- amd.com
resources:
- deviceconfigs/status
verbs:
- get
- patch
- update
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- delete
- get
- list
- watch
- apiGroups:
- apps
resources:
- daemonsets
- daemonsets/status
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- apps
resources:
- daemonsets/finalizers
verbs:
- create
- get
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules/finalizers
- nodemodulesconfigs/finalizers
verbs:
- get
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules/status
verbs:
- get
- patch
- update
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- nodemodulesconfigs
- nodemodulesconfigs/status
verbs:
- get
- list
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- nfd.openshift.io
resources:
- nodefeaturediscoveries
verbs:
- delete
- get
- list
- apiGroups:
- nfd.openshift.io
resources:
- nodefeaturediscoveries/finalizers
- nodefeaturediscoveries/status
verbs:
- get
- update

View File

@ -0,0 +1,40 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-metrics-exporter-rbac-proxy
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- ""
resources:
- nodes
verbs:
- watch
- get
- list
- update
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list

View File

@ -0,0 +1,28 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-metrics-exporter
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- watch
- get
- list
- update
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list

View File

@ -0,0 +1,21 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-node-labeller
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- watch
- get
- list
- update

View File

@ -0,0 +1,22 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: pre-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
helm.sh/hook-weight: "0"
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-pre-delete
rules:
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- get
- list
- delete

View File

@ -0,0 +1,22 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: post-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
helm.sh/hook-weight: "0"
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-prune
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- delete
- get
- list

View File

@ -0,0 +1,27 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-test-runner
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- get
- list
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- patch

View File

@ -0,0 +1,20 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-utils-container
rules:
- apiGroups:
- security.openshift.io
resourceNames:
- privileged
resources:
- securitycontextconstraints
verbs:
- use

View File

@ -0,0 +1,19 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-event-recorder-clusterrole
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch

View File

@ -0,0 +1,121 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-manager-role
rules:
- apiGroups:
- apps
resources:
- daemonsets
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- cluster.open-cluster-management.io
resources:
- clusterclaims
verbs:
- create
- get
- list
- watch
- apiGroups:
- cluster.open-cluster-management.io
resourceNames:
- kernel-versions.kmm.node.kubernetes.io
resources:
- clusterclaims
verbs:
- delete
- patch
- update
- apiGroups:
- ""
resources:
- configmaps
- secrets
- serviceaccounts
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- namespaces
- nodes
verbs:
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- modules/status
- preflightvalidations/status
verbs:
- get
- patch
- update
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- nodemodulesconfigs
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- nodemodulesconfigs/status
verbs:
- patch
- apiGroups:
- kmm.sigs.x-k8s.io
resources:
- preflightvalidations
verbs:
- create
- delete
- get
- list
- patch
- update
- watch

View File

@ -0,0 +1,16 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-metrics-reader
rules:
- nonResourceURLs:
- /metrics
verbs:
- get

View File

@ -0,0 +1,24 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-proxy-role
rules:
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create

View File

@ -0,0 +1,37 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery-gc
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- topology.node.k8s.io
resources:
- noderesourcetopologies
verbs:
- delete
- list
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeatures
verbs:
- delete
- list

View File

@ -0,0 +1,23 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: post-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery-prune
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/status
verbs:
- get
- patch
- update
- list

View File

@ -0,0 +1,52 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/status
verbs:
- get
- patch
- update
- list
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeatures
- nodefeaturerules
- nodefeaturegroups
verbs:
- get
- list
- watch
- apiGroups:
- nfd.k8s-sigs.io
resources:
- nodefeaturegroups/status
verbs:
- patch
- update
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- apiGroups:
- coordination.k8s.io
resourceNames:
- nfd-master.nfd.kubernetes.io
resources:
- leases
verbs:
- get
- update

View File

@ -0,0 +1,17 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: pre-upgrade,pre-rollback
helm.sh/hook-delete-policy: before-hook-creation, hook-succeeded
helm.sh/hook-weight: "0"
name: pre-upgrade-check-cluster-role
rules:
- apiGroups:
- amd.com
resources:
- deviceconfigs
verbs:
- list
- get

View File

@ -0,0 +1,21 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: pre-upgrade,pre-rollback
helm.sh/hook-delete-policy: before-hook-creation, hook-succeeded
helm.sh/hook-weight: "1"
name: upgrade-crd-hook-cluster-role
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- list
- watch
- patch
- update

View File

@ -0,0 +1,19 @@
---
apiVersion: v1
data:
controller_manager_config.yaml: |-
healthProbeBindAddress: :8081
metricsBindAddress: 127.0.0.1:8080
leaderElection:
enabled: true
resourceID: gpu.amd.com
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: amd-gpu-operator-gpu-operator-charts-manager-config
namespace: kube-amd-gpu

View File

@ -0,0 +1,27 @@
---
apiVersion: v1
data:
controller_config.yaml: |-
healthProbeBindAddress: :8081
webhookPort: 9443
leaderElection:
enabled: true
resourceID: kmm.sigs.x-k8s.io
metrics:
enableAuthnAuthz: true
bindAddress: 0.0.0.0:8443
secureServing: true
worker:
runAsUser: 0
seLinuxType: spc_t
firmwareHostPath: /var/lib/firmware
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: amd-gpu-operator-kmm-manager-config
namespace: kube-amd-gpu

View File

@ -0,0 +1,12 @@
---
apiVersion: v1
data:
nfd-master.conf: "null"
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery-master-conf
namespace: kube-amd-gpu

View File

@ -0,0 +1,12 @@
---
apiVersion: v1
data:
nfd-worker.conf: "null"
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
name: amd-gpu-operator-node-feature-discovery-worker-conf
namespace: kube-amd-gpu

View File

@ -0,0 +1,16 @@
---
apiVersion: v1
data:
source.yaml: |
name: amd-gpu-operator
namespace: "kube-amd-gpu"
helm-chart-name: "gpu-operator-charts"
helm-name: amd-gpu-operator
helm-url: "https://rocm.github.io/gpu-operator"
helm-version: "1.3.0"
source-exclusions:
- Job_pre-upgrade-check.yaml
kind: ConfigMap
metadata:
name: clusterforge-amd-gpu-operator-info
namespace: kube-amd-gpu

View File

@ -0,0 +1,127 @@
---
apiVersion: v1
data:
config.json: |
{
"GPUConfig": {
"Fields": [
"GPU_NODES_TOTAL",
"GPU_PACKAGE_POWER",
"GPU_AVERAGE_PACKAGE_POWER",
"GPU_EDGE_TEMPERATURE",
"GPU_JUNCTION_TEMPERATURE",
"GPU_MEMORY_TEMPERATURE",
"GPU_HBM_TEMPERATURE",
"GPU_GFX_ACTIVITY",
"GPU_UMC_ACTIVITY",
"GPU_MMA_ACTIVITY",
"GPU_VCN_ACTIVITY",
"GPU_JPEG_ACTIVITY",
"GPU_VOLTAGE",
"GPU_GFX_VOLTAGE",
"GPU_MEMORY_VOLTAGE",
"PCIE_SPEED",
"PCIE_MAX_SPEED",
"PCIE_BANDWIDTH",
"GPU_ENERGY_CONSUMED",
"PCIE_REPLAY_COUNT",
"PCIE_RECOVERY_COUNT",
"PCIE_REPLAY_ROLLOVER_COUNT",
"PCIE_NACK_SENT_COUNT",
"PCIE_NAC_RECEIVED_COUNT",
"GPU_CLOCK",
"GPU_POWER_USAGE",
"GPU_TOTAL_VRAM",
"GPU_ECC_CORRECT_TOTAL",
"GPU_ECC_UNCORRECT_TOTAL",
"GPU_ECC_CORRECT_SDMA",
"GPU_ECC_UNCORRECT_SDMA",
"GPU_ECC_CORRECT_GFX",
"GPU_ECC_UNCORRECT_GFX",
"GPU_ECC_CORRECT_MMHUB",
"GPU_ECC_UNCORRECT_MMHUB",
"GPU_ECC_CORRECT_ATHUB",
"GPU_ECC_UNCORRECT_ATHUB",
"GPU_ECC_CORRECT_BIF",
"GPU_ECC_UNCORRECT_BIF",
"GPU_ECC_CORRECT_HDP",
"GPU_ECC_UNCORRECT_HDP",
"GPU_ECC_CORRECT_XGMI_WAFL",
"GPU_ECC_UNCORRECT_XGMI_WAFL",
"GPU_ECC_CORRECT_DF",
"GPU_ECC_UNCORRECT_DF",
"GPU_ECC_CORRECT_SMN",
"GPU_ECC_UNCORRECT_SMN",
"GPU_ECC_CORRECT_SEM",
"GPU_ECC_UNCORRECT_SEM",
"GPU_ECC_CORRECT_MP0",
"GPU_ECC_UNCORRECT_MP0",
"GPU_ECC_CORRECT_MP1",
"GPU_ECC_UNCORRECT_MP1",
"GPU_ECC_CORRECT_FUSE",
"GPU_ECC_UNCORRECT_FUSE",
"GPU_ECC_CORRECT_UMC",
"GPU_ECC_UNCORRECT_UMC",
"GPU_XGMI_NBR_0_NOP_TX",
"GPU_XGMI_NBR_0_REQ_TX",
"GPU_XGMI_NBR_0_RESP_TX",
"GPU_XGMI_NBR_0_BEATS_TX",
"GPU_XGMI_NBR_1_NOP_TX",
"GPU_XGMI_NBR_1_REQ_TX",
"GPU_XGMI_NBR_1_RESP_TX",
"GPU_XGMI_NBR_1_BEATS_TX",
"GPU_XGMI_NBR_0_TX_THRPUT",
"GPU_XGMI_NBR_1_TX_THRPUT",
"GPU_XGMI_NBR_2_TX_THRPUT",
"GPU_XGMI_NBR_3_TX_THRPUT",
"GPU_XGMI_NBR_4_TX_THRPUT",
"GPU_XGMI_NBR_5_TX_THRPUT",
"GPU_USED_VRAM",
"GPU_FREE_VRAM",
"GPU_TOTAL_VISIBLE_VRAM",
"GPU_USED_VISIBLE_VRAM",
"GPU_FREE_VISIBLE_VRAM",
"GPU_TOTAL_GTT",
"GPU_USED_GTT",
"GPU_FREE_GTT",
"GPU_ECC_CORRECT_MCA",
"GPU_ECC_UNCORRECT_MCA",
"GPU_ECC_CORRECT_VCN",
"GPU_ECC_UNCORRECT_VCN",
"GPU_ECC_CORRECT_JPEG",
"GPU_ECC_UNCORRECT_JPEG",
"GPU_ECC_CORRECT_IH",
"GPU_ECC_UNCORRECT_IH",
"GPU_ECC_CORRECT_MPIO",
"GPU_ECC_UNCORRECT_MPIO"
],
"Labels": [
"GPU_UUID",
"SERIAL_NUMBER",
"GPU_ID",
"POD",
"NAMESPACE",
"CONTAINER",
"CLUSTER_NAME",
"CARD_SERIES",
"CARD_MODEL",
"CARD_VENDOR",
"DRIVER_VERSION",
"VBIOS_VERSION",
"HOSTNAME"
],
"ExtraPodLabels" : {
"WORKLOAD_ID" : "airm.silogen.ai/workload-id",
"USERGROUP_ID" : "airm.silogen.ai/usergroup-id",
"PROJECT_ID" : "airm.silogen.ai/project-id"
},
"CustomLabels" : {
"ORG_NAME" : "demo",
"KUBE_CLUSTER_NAME" : "demo-cluster"
}
}
}
kind: ConfigMap
metadata:
name: gpu-config
namespace: kube-amd-gpu

View File

@ -0,0 +1,835 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.0
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
name: deviceconfigs.amd.com
spec:
group: amd.com
names:
kind: DeviceConfig
listKind: DeviceConfigList
plural: deviceconfigs
shortNames:
- gpue
singular: deviceconfig
scope: Namespaced
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
commonConfig:
properties:
initContainerImage:
type: string
utilsContainer:
properties:
image:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
imagePullPolicy:
enum:
- Always
- IfNotPresent
- Never
type: string
imageRegistrySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
type: object
type: object
configManager:
properties:
config:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
configManagerTolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
enable:
type: boolean
image:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
imagePullPolicy:
enum:
- Always
- IfNotPresent
- Never
type: string
imageRegistrySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
selector:
additionalProperties:
type: string
type: object
upgradePolicy:
properties:
maxUnavailable:
default: 1
format: int32
type: integer
upgradeStrategy:
enum:
- RollingUpdate
- OnDelete
type: string
type: object
type: object
devicePlugin:
properties:
devicePluginArguments:
additionalProperties:
type: string
type: object
devicePluginImage:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
devicePluginImagePullPolicy:
enum:
- Always
- IfNotPresent
- Never
type: string
devicePluginTolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
enableNodeLabeller:
default: true
type: boolean
imageRegistrySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
nodeLabellerArguments:
items:
type: string
type: array
nodeLabellerImage:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
nodeLabellerImagePullPolicy:
enum:
- Always
- IfNotPresent
- Never
type: string
nodeLabellerTolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
upgradePolicy:
properties:
maxUnavailable:
default: 1
format: int32
type: integer
upgradeStrategy:
enum:
- RollingUpdate
- OnDelete
type: string
type: object
type: object
driver:
properties:
amdgpuInstallerRepoURL:
type: string
blacklist:
type: boolean
enable:
default: true
type: boolean
image:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[$a-zA-Z0-9_]+(?:[._-][$a-zA-Z0-9_]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
imageRegistrySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
imageRegistryTLS:
properties:
insecure:
type: boolean
insecureSkipTLSVerify:
type: boolean
type: object
imageSign:
properties:
certSecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
keySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
type: object
upgradePolicy:
properties:
enable:
type: boolean
maxParallelUpgrades:
default: 1
minimum: 0
type: integer
maxUnavailableNodes:
anyOf:
- type: integer
- type: string
default: 25%
x-kubernetes-int-or-string: true
nodeDrainPolicy:
properties:
force:
default: false
type: boolean
gracePeriodSeconds:
default: -1
type: integer
timeoutSeconds:
default: 300
minimum: 0
type: integer
type: object
podDeletionPolicy:
properties:
force:
default: false
type: boolean
gracePeriodSeconds:
default: -1
type: integer
timeoutSeconds:
default: 300
minimum: 0
type: integer
type: object
rebootRequired:
default: true
type: boolean
type: object
version:
type: string
type: object
metricsExporter:
properties:
config:
properties:
name:
type: string
type: object
enable:
type: boolean
image:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
imagePullPolicy:
enum:
- Always
- IfNotPresent
- Never
type: string
imageRegistrySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
nodePort:
format: int32
maximum: 32767
minimum: 30000
type: integer
port:
default: 5000
format: int32
type: integer
prometheus:
properties:
serviceMonitor:
properties:
attachMetadata:
properties:
node:
type: boolean
type: object
authorization:
properties:
credentials:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type:
type: string
type: object
bearerTokenFile:
type: string
enable:
type: boolean
honorLabels:
default: true
type: boolean
honorTimestamps:
type: boolean
interval:
pattern: ^([0-9]+)(ms|s|m|h)$
type: string
labels:
additionalProperties:
type: string
type: object
metricRelabelings:
items:
properties:
action:
default: replace
enum:
- replace
- Replace
- keep
- Keep
- drop
- Drop
- hashmod
- HashMod
- labelmap
- LabelMap
- labeldrop
- LabelDrop
- labelkeep
- LabelKeep
- lowercase
- Lowercase
- uppercase
- Uppercase
- keepequal
- KeepEqual
- dropequal
- DropEqual
type: string
modulus:
format: int64
type: integer
regex:
type: string
replacement:
type: string
separator:
type: string
sourceLabels:
items:
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
type: string
type: array
targetLabel:
type: string
type: object
type: array
relabelings:
items:
properties:
action:
default: replace
enum:
- replace
- Replace
- keep
- Keep
- drop
- Drop
- hashmod
- HashMod
- labelmap
- LabelMap
- labeldrop
- LabelDrop
- labelkeep
- LabelKeep
- lowercase
- Lowercase
- uppercase
- Uppercase
- keepequal
- KeepEqual
- dropequal
- DropEqual
type: string
modulus:
format: int64
type: integer
regex:
type: string
replacement:
type: string
separator:
type: string
sourceLabels:
items:
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
type: string
type: array
targetLabel:
type: string
type: object
type: array
tlsConfig:
properties:
ca:
properties:
configMap:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
secret:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
caFile:
type: string
cert:
properties:
configMap:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
secret:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
certFile:
type: string
insecureSkipVerify:
type: boolean
keyFile:
type: string
keySecret:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
maxVersion:
enum:
- TLS10
- TLS11
- TLS12
- TLS13
type: string
minVersion:
enum:
- TLS10
- TLS11
- TLS12
- TLS13
type: string
serverName:
type: string
type: object
type: object
type: object
rbacConfig:
properties:
clientCAConfigMap:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
disableHttps:
type: boolean
enable:
type: boolean
image:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
secret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
staticAuthorization:
properties:
clientName:
type: string
enable:
type: boolean
type: object
type: object
selector:
additionalProperties:
type: string
type: object
serviceType:
default: ClusterIP
enum:
- ClusterIP
- NodePort
type: string
tolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
upgradePolicy:
properties:
maxUnavailable:
default: 1
format: int32
type: integer
upgradeStrategy:
enum:
- RollingUpdate
- OnDelete
type: string
type: object
type: object
selector:
additionalProperties:
type: string
type: object
testRunner:
properties:
config:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
enable:
type: boolean
image:
pattern: ^([a-z0-9]+(?:[._-][a-z0-9]+)*(:[0-9]+)?)(/[a-z0-9]+(?:[._-][a-z0-9]+)*)*(?::[a-z0-9._-]+)?(?:@[a-zA-Z0-9]+:[a-f0-9]+)?$
type: string
imagePullPolicy:
enum:
- Always
- IfNotPresent
- Never
type: string
imageRegistrySecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
logsLocation:
properties:
hostPath:
default: /var/log/amd-test-runner
type: string
logsExportSecrets:
items:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
type: array
mountPath:
default: /var/log/amd-test-runner
type: string
type: object
selector:
additionalProperties:
type: string
type: object
tolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
upgradePolicy:
properties:
maxUnavailable:
default: 1
format: int32
type: integer
upgradeStrategy:
enum:
- RollingUpdate
- OnDelete
type: string
type: object
type: object
type: object
status:
properties:
conditions:
items:
properties:
lastTransitionTime:
format: date-time
type: string
message:
maxLength: 32768
type: string
observedGeneration:
format: int64
minimum: 0
type: integer
reason:
maxLength: 1024
minLength: 1
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
type: string
status:
enum:
- "True"
- "False"
- Unknown
type: string
type:
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
required:
- lastTransitionTime
- message
- reason
- status
- type
type: object
type: array
configManager:
properties:
availableNumber:
format: int32
type: integer
desiredNumber:
format: int32
type: integer
nodesMatchingSelectorNumber:
format: int32
type: integer
type: object
devicePlugin:
properties:
availableNumber:
format: int32
type: integer
desiredNumber:
format: int32
type: integer
nodesMatchingSelectorNumber:
format: int32
type: integer
type: object
driver:
properties:
availableNumber:
format: int32
type: integer
desiredNumber:
format: int32
type: integer
nodesMatchingSelectorNumber:
format: int32
type: integer
type: object
metricsExporter:
properties:
availableNumber:
format: int32
type: integer
desiredNumber:
format: int32
type: integer
nodesMatchingSelectorNumber:
format: int32
type: integer
type: object
nodeModuleStatus:
additionalProperties:
properties:
bootId:
type: string
containerImage:
type: string
kernelVersion:
type: string
lastTransitionTime:
type: string
status:
type: string
upgradeStartTime:
type: string
type: object
type: object
observedGeneration:
format: int64
type: integer
type: object
type: object
served: true
storage: true
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,181 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.14.0
name: nodefeaturegroups.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
names:
kind: NodeFeatureGroup
listKind: NodeFeatureGroupList
plural: nodefeaturegroups
shortNames:
- nfg
singular: nodefeaturegroup
scope: Namespaced
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
featureGroupRules:
items:
properties:
matchAny:
items:
properties:
matchFeatures:
items:
properties:
feature:
type: string
matchExpressions:
additionalProperties:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
type: object
matchName:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
required:
- feature
type: object
type: array
required:
- matchFeatures
type: object
type: array
matchFeatures:
items:
properties:
feature:
type: string
matchExpressions:
additionalProperties:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
type: object
matchName:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
required:
- feature
type: object
type: array
name:
type: string
required:
- name
type: object
type: array
required:
- featureGroupRules
type: object
status:
properties:
nodes:
items:
properties:
name:
type: string
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
type: object
required:
- spec
type: object
served: true
storage: true
subresources:
status: {}

View File

@ -0,0 +1,201 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.14.0
name: nodefeaturerules.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
names:
kind: NodeFeatureRule
listKind: NodeFeatureRuleList
plural: nodefeaturerules
shortNames:
- nfr
singular: nodefeaturerule
scope: Cluster
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
rules:
items:
properties:
annotations:
additionalProperties:
type: string
type: object
extendedResources:
additionalProperties:
type: string
type: object
labels:
additionalProperties:
type: string
type: object
labelsTemplate:
type: string
matchAny:
items:
properties:
matchFeatures:
items:
properties:
feature:
type: string
matchExpressions:
additionalProperties:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
type: object
matchName:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
required:
- feature
type: object
type: array
required:
- matchFeatures
type: object
type: array
matchFeatures:
items:
properties:
feature:
type: string
matchExpressions:
additionalProperties:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
type: object
matchName:
properties:
op:
enum:
- In
- NotIn
- InRegexp
- Exists
- DoesNotExist
- Gt
- Lt
- GtLt
- IsTrue
- IsFalse
type: string
value:
items:
type: string
type: array
required:
- op
type: object
required:
- feature
type: object
type: array
name:
type: string
taints:
items:
properties:
effect:
type: string
key:
type: string
timeAdded:
format: date-time
type: string
value:
type: string
required:
- effect
- key
type: object
type: array
vars:
additionalProperties:
type: string
type: object
varsTemplate:
type: string
required:
- name
type: object
type: array
required:
- rules
type: object
required:
- spec
type: object
served: true
storage: true

View File

@ -0,0 +1,81 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.14.0
name: nodefeatures.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
names:
kind: NodeFeature
listKind: NodeFeatureList
plural: nodefeatures
singular: nodefeature
scope: Namespaced
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
features:
properties:
attributes:
additionalProperties:
properties:
elements:
additionalProperties:
type: string
type: object
required:
- elements
type: object
type: object
flags:
additionalProperties:
properties:
elements:
additionalProperties:
type: object
type: object
required:
- elements
type: object
type: object
instances:
additionalProperties:
properties:
elements:
items:
properties:
attributes:
additionalProperties:
type: string
type: object
required:
- attributes
type: object
type: array
required:
- elements
type: object
type: object
type: object
labels:
additionalProperties:
type: string
type: object
type: object
required:
- spec
type: object
served: true
storage: true

View File

@ -0,0 +1,269 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.1
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: nodemodulesconfigs.kmm.sigs.x-k8s.io
spec:
group: kmm.sigs.x-k8s.io
names:
kind: NodeModulesConfig
listKind: NodeModulesConfigList
plural: nodemodulesconfigs
shortNames:
- nmc
singular: nodemodulesconfig
scope: Cluster
versions:
- name: v1beta1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
modules:
items:
properties:
config:
properties:
containerImage:
type: string
imagePullPolicy:
default: IfNotPresent
type: string
inTreeModuleToRemove:
type: string
inTreeModulesToRemove:
items:
type: string
type: array
insecurePull:
type: boolean
kernelVersion:
type: string
modprobe:
properties:
args:
properties:
load:
items:
type: string
minItems: 1
type: array
unload:
items:
type: string
minItems: 1
type: array
type: object
dirName:
default: /opt
type: string
firmwarePath:
type: string
moduleName:
type: string
modulesLoadingOrder:
items:
type: string
type: array
parameters:
items:
type: string
type: array
rawArgs:
properties:
load:
items:
type: string
minItems: 1
type: array
unload:
items:
type: string
minItems: 1
type: array
type: object
type: object
tolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
required:
- containerImage
- imagePullPolicy
- insecurePull
- kernelVersion
- modprobe
type: object
imageRepoSecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
name:
type: string
namespace:
type: string
serviceAccountName:
type: string
required:
- config
- name
- namespace
- serviceAccountName
type: object
type: array
type: object
status:
properties:
modules:
items:
properties:
bootId:
type: string
config:
properties:
containerImage:
type: string
imagePullPolicy:
default: IfNotPresent
type: string
inTreeModuleToRemove:
type: string
inTreeModulesToRemove:
items:
type: string
type: array
insecurePull:
type: boolean
kernelVersion:
type: string
modprobe:
properties:
args:
properties:
load:
items:
type: string
minItems: 1
type: array
unload:
items:
type: string
minItems: 1
type: array
type: object
dirName:
default: /opt
type: string
firmwarePath:
type: string
moduleName:
type: string
modulesLoadingOrder:
items:
type: string
type: array
parameters:
items:
type: string
type: array
rawArgs:
properties:
load:
items:
type: string
minItems: 1
type: array
unload:
items:
type: string
minItems: 1
type: array
type: object
type: object
tolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
required:
- containerImage
- imagePullPolicy
- insecurePull
- kernelVersion
- modprobe
type: object
imageRepoSecret:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
lastTransitionTime:
format: date-time
type: string
name:
type: string
namespace:
type: string
serviceAccountName:
type: string
required:
- name
- namespace
- serviceAccountName
type: object
type: array
type: object
type: object
served: true
storage: true
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -0,0 +1,165 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
cert-manager.io/inject-ca-from: kube-amd-gpu/amd-gpu-operator-kmm-serving-cert
controller-gen.kubebuilder.io/version: v0.16.1
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
name: preflightvalidations.kmm.sigs.x-k8s.io
spec:
conversion:
strategy: Webhook
webhook:
clientConfig:
service:
name: amd-gpu-operator-kmm-webhook-service
namespace: kube-amd-gpu
path: /convert
conversionReviewVersions:
- v1beta2
- v1beta1
group: kmm.sigs.x-k8s.io
names:
kind: PreflightValidation
listKind: PreflightValidationList
plural: preflightvalidations
shortNames:
- pfv
singular: preflightvalidation
scope: Cluster
versions:
- deprecated: true
name: v1beta1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
kernelVersion:
type: string
pushBuiltImage:
type: boolean
required:
- kernelVersion
type: object
status:
properties:
crStatuses:
additionalProperties:
properties:
lastTransitionTime:
format: date-time
type: string
statusReason:
type: string
verificationStage:
enum:
- Image
- Build
- Sign
- Requeued
- Done
type: string
verificationStatus:
enum:
- "True"
- "False"
type: string
required:
- lastTransitionTime
- verificationStage
- verificationStatus
type: object
type: object
type: object
required:
- spec
type: object
served: true
storage: false
subresources:
status: {}
- name: v1beta2
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
kernelVersion:
type: string
pushBuiltImage:
type: boolean
required:
- kernelVersion
type: object
status:
properties:
modules:
items:
properties:
lastTransitionTime:
format: date-time
type: string
name:
type: string
namespace:
type: string
statusReason:
type: string
verificationStage:
enum:
- Image
- Build
- Sign
- Requeued
- Done
type: string
verificationStatus:
enum:
- "True"
- "False"
type: string
required:
- lastTransitionTime
- name
- namespace
- verificationStage
- verificationStatus
type: object
type: array
x-kubernetes-list-map-keys:
- namespace
- name
x-kubernetes-list-type: map
type: object
required:
- spec
type: object
served: true
storage: true
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

View File

@ -0,0 +1,141 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
app.kubernetes.io/version: v0.16.1
role: worker
name: amd-gpu-operator-node-feature-discovery-worker
namespace: kube-amd-gpu
spec:
selector:
matchLabels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: worker
template:
metadata:
labels:
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: node-feature-discovery
role: worker
spec:
containers:
- args:
- -feature-gates=NodeFeatureAPI=true
- -feature-gates=NodeFeatureGroupAPI=false
- -metrics=8081
command:
- nfd-worker
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
image: registry.k8s.io/nfd/node-feature-discovery@sha256:0bd331dc6acf082d3353a7ddad6b5725a13cd141b60b31e97a1cd4cf90da0d99 # Original tag: v0.16.1
imagePullPolicy: IfNotPresent
livenessProbe:
grpc:
port: 8082
initialDelaySeconds: 10
periodSeconds: 10
name: worker
ports:
- containerPort: 8081
name: metrics
readinessProbe:
failureThreshold: 10
grpc:
port: 8082
initialDelaySeconds: 5
periodSeconds: 10
resources:
limits:
memory: 512Mi
requests:
cpu: 5m
memory: 64Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /host-boot
name: host-boot
readOnly: true
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
- mountPath: /host-sys
name: host-sys
readOnly: true
- mountPath: /host-usr/lib
name: host-usr-lib
readOnly: true
- mountPath: /host-lib
name: host-lib
readOnly: true
- mountPath: /host-proc/swaps
name: host-proc-swaps
readOnly: true
- mountPath: /etc/kubernetes/node-feature-discovery/source.d/
name: source-d
readOnly: true
- mountPath: /etc/kubernetes/node-feature-discovery/features.d/
name: features-d
readOnly: true
- mountPath: /etc/kubernetes/node-feature-discovery
name: nfd-worker-conf
readOnly: true
dnsPolicy: ClusterFirstWithHostNet
securityContext: {}
serviceAccountName: amd-gpu-operator-node-feature-discovery-worker
tolerations:
- effect: NoExecute
key: amd-dcm
operator: Equal
value: up
volumes:
- hostPath:
path: /boot
name: host-boot
- hostPath:
path: /etc/os-release
name: host-os-release
- hostPath:
path: /sys
name: host-sys
- hostPath:
path: /usr/lib
name: host-usr-lib
- hostPath:
path: /lib
name: host-lib
- hostPath:
path: /proc/swaps
name: host-proc-swaps
- hostPath:
path: /etc/kubernetes/node-feature-discovery/source.d/
name: source-d
- hostPath:
path: /etc/kubernetes/node-feature-discovery/features.d/
name: features-d
- configMap:
items:
- key: nfd-worker.conf
path: nfd-worker.conf
name: amd-gpu-operator-node-feature-discovery-worker-conf
name: nfd-worker-conf

View File

@ -0,0 +1,99 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
app.kubernetes.io/version: v1.3.0
control-plane: controller-manager
name: amd-gpu-operator-gpu-operator-charts-controller-manager
namespace: kube-amd-gpu
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
control-plane: controller-manager
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: manager
labels:
app.kubernetes.io/component: amd-gpu
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: gpu-operator-charts
app.kubernetes.io/part-of: amd-gpu
control-plane: controller-manager
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
weight: 1
containers:
- args:
- --config=controller_manager_config.yaml
env:
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: KUBERNETES_CLUSTER_DOMAIN
value: cluster.local
- name: SIM_ENABLE
value: "false"
image: docker.io/rocm/gpu-operator@sha256:58ac66493b185891c228ccf7c88c88973ace8ec5b59716cc6ddffd3a428c3cef # Original tag: v1.3.0
imagePullPolicy: Always
livenessProbe:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
name: manager
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
resources:
limits:
cpu: 1000m
memory: 1Gi
requests:
cpu: 100m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /controller_manager_config.yaml
name: manager-config
subPath: controller_manager_config.yaml
nodeSelector: {}
securityContext:
runAsNonRoot: true
serviceAccountName: amd-gpu-operator-gpu-operator-charts-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ""
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ""
volumes:
- configMap:
name: amd-gpu-operator-gpu-operator-charts-manager-config
name: manager-config

View File

@ -0,0 +1,107 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
control-plane: controller
name: amd-gpu-operator-kmm-controller
namespace: kube-amd-gpu
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
control-plane: controller
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: manager
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
control-plane: controller
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
weight: 1
containers:
- args:
- --config=controller_config.yaml
env:
- name: RELATED_IMAGE_WORKER
value: docker.io/rocm/kernel-module-management-worker:v1.3.0
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: RELATED_IMAGE_BUILD
value: gcr.io/kaniko-project/executor:v1.23.2
- name: RELATED_IMAGE_SIGN
value: docker.io/rocm/kernel-module-management-signimage:v1.3.0
- name: KUBERNETES_CLUSTER_DOMAIN
value: cluster.local
image: docker.io/rocm/kernel-module-management-operator@sha256:f23a83159aa037a7511d2f8d881a96bfdda8c3b3efb121254a764319ebe94089 # Original tag: v1.3.0
imagePullPolicy: Always
livenessProbe:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
name: manager
ports:
- containerPort: 8443
name: metrics
protocol: TCP
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
resources:
limits:
cpu: 500m
memory: 384Mi
requests:
cpu: 10m
memory: 64Mi
securityContext:
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /controller_config.yaml
name: manager-config
subPath: controller_config.yaml
nodeSelector: {}
securityContext:
runAsNonRoot: true
serviceAccountName: amd-gpu-operator-kmm-controller
terminationGracePeriodSeconds: 10
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ""
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ""
volumes:
- configMap:
name: amd-gpu-operator-kmm-manager-config
name: manager-config

View File

@ -0,0 +1,107 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
app.kubernetes.io/version: v20240618-v2.1.1
control-plane: webhook-server
name: amd-gpu-operator-kmm-webhook-server
namespace: kube-amd-gpu
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
control-plane: webhook-server
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: webhook-server
labels:
app.kubernetes.io/component: kmm
app.kubernetes.io/instance: amd-gpu-operator
app.kubernetes.io/name: kmm
app.kubernetes.io/part-of: kmm
control-plane: webhook-server
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
weight: 1
containers:
- args:
- --config=controller_config.yaml
- --enable-module
- --enable-namespace
- --enable-preflightvalidation
env:
- name: KUBERNETES_CLUSTER_DOMAIN
value: cluster.local
image: docker.io/rocm/kernel-module-management-webhook-server@sha256:8b31959473da74e9d94c81c2e529cef74b1c9b19006430f038cfd51bf6bc839e # Original tag: v1.3.0
imagePullPolicy: Always
livenessProbe:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
name: webhook-server
ports:
- containerPort: 9443
name: webhook-server
protocol: TCP
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
resources:
limits:
cpu: 500m
memory: 384Mi
requests:
cpu: 10m
memory: 64Mi
securityContext:
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /tmp/k8s-webhook-server/serving-certs
name: cert
readOnly: true
- mountPath: /controller_config.yaml
name: manager-config
subPath: controller_config.yaml
nodeSelector: {}
securityContext:
runAsNonRoot: true
serviceAccountName: amd-gpu-operator-kmm-controller
terminationGracePeriodSeconds: 10
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Equal
value: ""
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Equal
value: ""
volumes:
- name: cert
secret:
defaultMode: 420
secretName: kmm-operator-webhook-server-cert
- configMap:
name: amd-gpu-operator-kmm-manager-config
name: manager-config

Some files were not shown because too many files have changed in this diff Show More