766 lines
		
	
	
		
			48 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			766 lines
		
	
	
		
			48 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| ---
 | |
| apiVersion: apiextensions.k8s.io/v1
 | |
| kind: CustomResourceDefinition
 | |
| metadata:
 | |
|   annotations:
 | |
|     controller-gen.kubebuilder.io/version: v0.17.1
 | |
|   name: kaiwoqueueconfigs.kaiwo.silogen.ai
 | |
| spec:
 | |
|   group: kaiwo.silogen.ai
 | |
|   names:
 | |
|     kind: KaiwoQueueConfig
 | |
|     listKind: KaiwoQueueConfigList
 | |
|     plural: kaiwoqueueconfigs
 | |
|     singular: kaiwoqueueconfig
 | |
|   scope: Cluster
 | |
|   versions:
 | |
|     - additionalPrinterColumns:
 | |
|         - jsonPath: .status.status
 | |
|           name: WorkloadStatus
 | |
|           type: string
 | |
|       name: v1alpha1
 | |
|       schema:
 | |
|         openAPIV3Schema:
 | |
|           description: |-
 | |
|             KaiwoQueueConfig manages Kueue resources like ClusterQueues, ResourceFlavors, and WorkloadPriorityClasses based on its spec. It acts as a central configuration point for Kaiwo's integration with Kueue. Typically, only one cluster-scoped resource named 'kaiwo' should exist. The controller ensures that the specified Kueue resources are created, updated, or deleted to match the desired state defined here.
 | |
|             KaiwoQueueConfig manages Kueue resources.            
 | |
|           properties:
 | |
|             apiVersion:
 | |
|               description: |-
 | |
|                 APIVersion defines the versioned schema of this representation of an object.
 | |
|                 Servers should convert recognized schemas to the latest internal value, and
 | |
|                 may reject unrecognized values.
 | |
|                 More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources                
 | |
|               type: string
 | |
|             kind:
 | |
|               description: |-
 | |
|                 Kind is a string value representing the REST resource this object represents.
 | |
|                 Servers may infer this from the endpoint the client submits requests to.
 | |
|                 Cannot be updated.
 | |
|                 In CamelCase.
 | |
|                 More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds                
 | |
|               type: string
 | |
|             metadata:
 | |
|               type: object
 | |
|             spec:
 | |
|               description: Spec defines the desired state for Kueue resources managed by Kaiwo.
 | |
|               properties:
 | |
|                 clusterQueues:
 | |
|                   description: ClusterQueues defines a list of Kueue ClusterQueues that Kaiwo should manage. Kaiwo ensures these ClusterQueues exist and match the provided specs.
 | |
|                   items:
 | |
|                     description: ClusterQueue defines the configuration for a Kueue ClusterQueue managed by Kaiwo.
 | |
|                     properties:
 | |
|                       name:
 | |
|                         description: Name specifies the name of the Kueue ClusterQueue resource.
 | |
|                         type: string
 | |
|                       namespaces:
 | |
|                         description: |-
 | |
|                           Namespaces optionally lists Kubernetes namespaces where Kaiwo should automatically create a Kueue `LocalQueue` resource pointing to this ClusterQueue.
 | |
|                           If one or more namespaces are provided, the KaiwoQueueConfig controller takes over managing the LocalQueues for this ClusterQueue.
 | |
|                           Leave this empty if you want to be able to create your own LocalQueues for this ClusterQueue.                          
 | |
|                         items:
 | |
|                           type: string
 | |
|                         type: array
 | |
|                       spec:
 | |
|                         description: Spec contains the desired Kueue `ClusterQueueSpec`. Kaiwo ensures the corresponding ClusterQueue resource matches this spec. See Kueue documentation for `ClusterQueueSpec` fields like `resourceGroups`, `cohort`, `preemption`, etc.
 | |
|                         properties:
 | |
|                           admissionChecks:
 | |
|                             description: |-
 | |
|                               admissionChecks lists the AdmissionChecks required by this ClusterQueue.
 | |
|                               Cannot be used along with AdmissionCheckStrategy.                              
 | |
|                             items:
 | |
|                               description: AdmissionCheckReference is the name of an AdmissionCheck.
 | |
|                               maxLength: 316
 | |
|                               type: string
 | |
|                             type: array
 | |
|                           admissionChecksStrategy:
 | |
|                             description: |-
 | |
|                               admissionCheckStrategy defines a list of strategies to determine which ResourceFlavors require AdmissionChecks.
 | |
|                               This property cannot be used in conjunction with the 'admissionChecks' property.                              
 | |
|                             properties:
 | |
|                               admissionChecks:
 | |
|                                 description: admissionChecks is a list of strategies for AdmissionChecks
 | |
|                                 items:
 | |
|                                   description: AdmissionCheckStrategyRule defines rules for a single AdmissionCheck
 | |
|                                   properties:
 | |
|                                     name:
 | |
|                                       description: name is an AdmissionCheck's name.
 | |
|                                       maxLength: 316
 | |
|                                       type: string
 | |
|                                     onFlavors:
 | |
|                                       description: |-
 | |
|                                         onFlavors is a list of ResourceFlavors' names that this AdmissionCheck should run for.
 | |
|                                         If empty, the AdmissionCheck will run for all workloads submitted to the ClusterQueue.                                        
 | |
|                                       items:
 | |
|                                         description: ResourceFlavorReference is the name of the ResourceFlavor.
 | |
|                                         maxLength: 253
 | |
|                                         pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
 | |
|                                         type: string
 | |
|                                       type: array
 | |
|                                   required:
 | |
|                                     - name
 | |
|                                   type: object
 | |
|                                 type: array
 | |
|                             type: object
 | |
|                           cohort:
 | |
|                             description: |-
 | |
|                               cohort that this ClusterQueue belongs to. CQs that belong to the
 | |
|                               same cohort can borrow unused resources from each other.
 | |
| 
 | |
|                               A CQ can be a member of a single borrowing cohort. A workload submitted
 | |
|                               to a queue referencing this CQ can borrow quota from any CQ in the cohort.
 | |
|                               Only quota for the [resource, flavor] pairs listed in the CQ can be
 | |
|                               borrowed.
 | |
|                               If empty, this ClusterQueue cannot borrow from any other ClusterQueue and
 | |
|                               vice versa.                              
 | |
|                             maxLength: 253
 | |
|                             pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
 | |
|                             type: string
 | |
|                           fairSharing:
 | |
|                             description: |-
 | |
|                               fairSharing defines the properties of the ClusterQueue when
 | |
|                               participating in FairSharing.  The values are only relevant
 | |
|                               if FairSharing is enabled in the Kueue configuration.                              
 | |
|                             properties:
 | |
|                               weight:
 | |
|                                 anyOf:
 | |
|                                   - type: integer
 | |
|                                   - type: string
 | |
|                                 default: 1
 | |
|                                 description: |-
 | |
|                                   weight gives a comparative advantage to this ClusterQueue
 | |
|                                   or Cohort when competing for unused resources in the
 | |
|                                   Cohort.  The share is based on the dominant resource usage
 | |
|                                   above nominal quotas for each resource, divided by the
 | |
|                                   weight.  Admission prioritizes scheduling workloads from
 | |
|                                   ClusterQueues and Cohorts with the lowest share and
 | |
|                                   preempting workloads from the ClusterQueues and Cohorts
 | |
|                                   with the highest share.                                  
 | |
|                                 pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
 | |
|                                 x-kubernetes-int-or-string: true
 | |
|                             type: object
 | |
|                           flavorFungibility:
 | |
|                             default: {}
 | |
|                             description: |-
 | |
|                               flavorFungibility defines whether a workload should try the next flavor
 | |
|                               before borrowing or preempting in the flavor being evaluated.                              
 | |
|                             properties:
 | |
|                               whenCanBorrow:
 | |
|                                 default: Borrow
 | |
|                                 description: |-
 | |
|                                   whenCanBorrow determines whether a workload should try the next flavor
 | |
|                                   before borrowing in current flavor. The possible values are:
 | |
| 
 | |
|                                   - `Borrow` (default): allocate in current flavor if borrowing
 | |
|                                     is possible.
 | |
|                                   - `TryNextFlavor`: try next flavor even if the current
 | |
|                                     flavor has enough resources to borrow.                                  
 | |
|                                 enum:
 | |
|                                   - Borrow
 | |
|                                   - TryNextFlavor
 | |
|                                 type: string
 | |
|                               whenCanPreempt:
 | |
|                                 default: TryNextFlavor
 | |
|                                 description: |-
 | |
|                                   whenCanPreempt determines whether a workload should try the next flavor
 | |
|                                   before borrowing in current flavor. The possible values are:
 | |
| 
 | |
|                                   - `Preempt`: allocate in current flavor if it's possible to preempt some workloads.
 | |
|                                   - `TryNextFlavor` (default): try next flavor even if there are enough
 | |
|                                     candidates for preemption in the current flavor.                                  
 | |
|                                 enum:
 | |
|                                   - Preempt
 | |
|                                   - TryNextFlavor
 | |
|                                 type: string
 | |
|                             type: object
 | |
|                           namespaceSelector:
 | |
|                             description: |-
 | |
|                               namespaceSelector defines which namespaces are allowed to submit workloads to
 | |
|                               this clusterQueue. Beyond this basic support for policy, a policy agent like
 | |
|                               Gatekeeper should be used to enforce more advanced policies.
 | |
|                               Defaults to null which is a nothing selector (no namespaces eligible).
 | |
|                               If set to an empty selector `{}`, then all namespaces are eligible.                              
 | |
|                             properties:
 | |
|                               matchExpressions:
 | |
|                                 description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
 | |
|                                 items:
 | |
|                                   description: |-
 | |
|                                     A label selector requirement is a selector that contains values, a key, and an operator that
 | |
|                                     relates the key and values.                                    
 | |
|                                   properties:
 | |
|                                     key:
 | |
|                                       description: key is the label key that the selector applies to.
 | |
|                                       type: string
 | |
|                                     operator:
 | |
|                                       description: |-
 | |
|                                         operator represents a key's relationship to a set of values.
 | |
|                                         Valid operators are In, NotIn, Exists and DoesNotExist.                                        
 | |
|                                       type: string
 | |
|                                     values:
 | |
|                                       description: |-
 | |
|                                         values is an array of string values. If the operator is In or NotIn,
 | |
|                                         the values array must be non-empty. If the operator is Exists or DoesNotExist,
 | |
|                                         the values array must be empty. This array is replaced during a strategic
 | |
|                                         merge patch.                                        
 | |
|                                       items:
 | |
|                                         type: string
 | |
|                                       type: array
 | |
|                                       x-kubernetes-list-type: atomic
 | |
|                                   required:
 | |
|                                     - key
 | |
|                                     - operator
 | |
|                                   type: object
 | |
|                                 type: array
 | |
|                                 x-kubernetes-list-type: atomic
 | |
|                               matchLabels:
 | |
|                                 additionalProperties:
 | |
|                                   type: string
 | |
|                                 description: |-
 | |
|                                   matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
 | |
|                                   map is equivalent to an element of matchExpressions, whose key field is "key", the
 | |
|                                   operator is "In", and the values array contains only "value". The requirements are ANDed.                                  
 | |
|                                 type: object
 | |
|                             type: object
 | |
|                             x-kubernetes-map-type: atomic
 | |
|                           preemption:
 | |
|                             default: {}
 | |
|                             description: |-
 | |
|                               ClusterQueuePreemption contains policies to preempt Workloads from this
 | |
|                               ClusterQueue or the ClusterQueue's cohort.
 | |
| 
 | |
|                               Preemption may be configured to work in the following scenarios:
 | |
| 
 | |
|                                 - When a Workload fits within the nominal quota of the ClusterQueue, but
 | |
|                                   the quota is currently borrowed by other ClusterQueues in the cohort.
 | |
|                                   We preempt workloads in other ClusterQueues to allow this ClusterQueue to
 | |
|                                   reclaim its nominal quota. Configured using reclaimWithinCohort.                              
 | |
|                             properties:
 | |
|                               borrowWithinCohort:
 | |
|                                 default: {}
 | |
|                                 description: |-
 | |
|                                   BorrowWithinCohort contains configuration which allows to preempt workloads
 | |
|                                   within cohort while borrowing. It only works with Classical Preemption,
 | |
|                                   __not__ with Fair Sharing.                                  
 | |
|                                 properties:
 | |
|                                   maxPriorityThreshold:
 | |
|                                     description: |-
 | |
|                                       maxPriorityThreshold allows to restrict the set of workloads which
 | |
|                                       might be preempted by a borrowing workload, to only workloads with
 | |
|                                       priority less than or equal to the specified threshold priority.
 | |
|                                       When the threshold is not specified, then any workload satisfying the
 | |
|                                       policy can be preempted by the borrowing workload.                                      
 | |
|                                     format: int32
 | |
|                                     type: integer
 | |
|                                   policy:
 | |
|                                     default: Never
 | |
|                                     description: |-
 | |
|                                       policy determines the policy for preemption to reclaim quota within cohort while borrowing.
 | |
|                                       Possible values are:
 | |
|                                       - `Never` (default): do not allow for preemption, in other
 | |
|                                          ClusterQueues within the cohort, for a borrowing workload.
 | |
|                                       - `LowerPriority`: allow preemption, in other ClusterQueues
 | |
|                                          within the cohort, for a borrowing workload, but only if
 | |
|                                          the preempted workloads are of lower priority.                                      
 | |
|                                     enum:
 | |
|                                       - Never
 | |
|                                       - LowerPriority
 | |
|                                     type: string
 | |
|                                 type: object
 | |
|                               reclaimWithinCohort:
 | |
|                                 default: Never
 | |
|                                 description: |-
 | |
|                                   reclaimWithinCohort determines whether a pending Workload can preempt
 | |
|                                   Workloads from other ClusterQueues in the cohort that are using more than
 | |
|                                   their nominal quota. The possible values are:
 | |
| 
 | |
|                                   - `Never` (default): do not preempt Workloads in the cohort.
 | |
|                                   - `LowerPriority`: **Classic Preemption** if the pending Workload
 | |
|                                     fits within the nominal quota of its ClusterQueue, only preempt
 | |
|                                     Workloads in the cohort that have lower priority than the pending
 | |
|                                     Workload.                                  
 | |
|                                 enum:
 | |
|                                   - Never
 | |
|                                   - LowerPriority
 | |
|                                   - Any
 | |
|                                 type: string
 | |
|                               withinClusterQueue:
 | |
|                                 default: Never
 | |
|                                 description: |-
 | |
|                                   withinClusterQueue determines whether a pending Workload that doesn't fit
 | |
|                                   within the nominal quota for its ClusterQueue, can preempt active Workloads in
 | |
|                                   the ClusterQueue. The possible values are:
 | |
| 
 | |
|                                   - `Never` (default): do not preempt Workloads in the ClusterQueue.
 | |
|                                   - `LowerPriority`: only preempt Workloads in the ClusterQueue that have
 | |
|                                     lower priority than the pending Workload.                                  
 | |
|                                 enum:
 | |
|                                   - Never
 | |
|                                   - LowerPriority
 | |
|                                   - LowerOrNewerEqualPriority
 | |
|                                 type: string
 | |
|                             type: object
 | |
|                             x-kubernetes-validations:
 | |
|                               - message: reclaimWithinCohort=Never and borrowWithinCohort.Policy!=Never
 | |
|                                 rule: '!(self.reclaimWithinCohort == ''Never'' && has(self.borrowWithinCohort) &&  self.borrowWithinCohort.policy != ''Never'')'
 | |
|                           queueingStrategy:
 | |
|                             default: BestEffortFIFO
 | |
|                             description: |-
 | |
|                               QueueingStrategy indicates the queueing strategy of the workloads
 | |
|                               across the queues in this ClusterQueue.
 | |
|                               Current Supported Strategies:
 | |
| 
 | |
|                               - StrictFIFO: workloads are ordered strictly by creation time.
 | |
|                               Older workloads that can't be admitted will block admitting newer
 | |
|                               workloads even if they fit available quota.
 | |
|                               - BestEffortFIFO: workloads are ordered by creation time,
 | |
|                               however older workloads that can't be admitted will not block
 | |
|                               admitting newer workloads that fit existing quota.                              
 | |
|                             enum:
 | |
|                               - StrictFIFO
 | |
|                               - BestEffortFIFO
 | |
|                             type: string
 | |
|                           resourceGroups:
 | |
|                             description: |-
 | |
|                               resourceGroups describes groups of resources.
 | |
|                               Each resource group defines the list of resources and a list of flavors
 | |
|                               that provide quotas for these resources.
 | |
|                               Each resource and each flavor can only form part of one resource group.
 | |
|                               resourceGroups can be up to 16.                              
 | |
|                             items:
 | |
|                               properties:
 | |
|                                 coveredResources:
 | |
|                                   description: |-
 | |
|                                     coveredResources is the list of resources covered by the flavors in this
 | |
|                                     group.
 | |
|                                     Examples: cpu, memory, vendor.com/gpu.
 | |
|                                     The list cannot be empty and it can contain up to 16 resources.                                    
 | |
|                                   items:
 | |
|                                     description: ResourceName is the name identifying various resources in a ResourceList.
 | |
|                                     type: string
 | |
|                                   maxItems: 16
 | |
|                                   minItems: 1
 | |
|                                   type: array
 | |
|                                 flavors:
 | |
|                                   description: |-
 | |
|                                     flavors is the list of flavors that provide the resources of this group.
 | |
|                                     Typically, different flavors represent different hardware models
 | |
|                                     (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot
 | |
|                                     cpus).
 | |
|                                     Each flavor MUST list all the resources listed for this group in the same
 | |
|                                     order as the .resources field.
 | |
|                                     The list cannot be empty and it can contain up to 16 flavors.                                    
 | |
|                                   items:
 | |
|                                     properties:
 | |
|                                       name:
 | |
|                                         description: |-
 | |
|                                           name of this flavor. The name should match the .metadata.name of a
 | |
|                                           ResourceFlavor. If a matching ResourceFlavor does not exist, the
 | |
|                                           ClusterQueue will have an Active condition set to False.                                          
 | |
|                                         maxLength: 253
 | |
|                                         pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
 | |
|                                         type: string
 | |
|                                       resources:
 | |
|                                         description: |-
 | |
|                                           resources is the list of quotas for this flavor per resource.
 | |
|                                           There could be up to 16 resources.                                          
 | |
|                                         items:
 | |
|                                           properties:
 | |
|                                             borrowingLimit:
 | |
|                                               anyOf:
 | |
|                                                 - type: integer
 | |
|                                                 - type: string
 | |
|                                               description: |-
 | |
|                                                 borrowingLimit is the maximum amount of quota for the [flavor, resource]
 | |
|                                                 combination that this ClusterQueue is allowed to borrow from the unused
 | |
|                                                 quota of other ClusterQueues in the same cohort.
 | |
|                                                 In total, at a given time, Workloads in a ClusterQueue can consume a
 | |
|                                                 quantity of quota equal to nominalQuota+borrowingLimit, assuming the other
 | |
|                                                 ClusterQueues in the cohort have enough unused quota.
 | |
|                                                 If null, it means that there is no borrowing limit.
 | |
|                                                 If not null, it must be non-negative.                                                
 | |
|                                               pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
 | |
|                                               x-kubernetes-int-or-string: true
 | |
|                                             lendingLimit:
 | |
|                                               anyOf:
 | |
|                                                 - type: integer
 | |
|                                                 - type: string
 | |
|                                               description: |-
 | |
|                                                 lendingLimit is the maximum amount of unused quota for the [flavor, resource]
 | |
|                                                 combination that this ClusterQueue can lend to other ClusterQueues in the same cohort.
 | |
|                                                 In total, at a given time, ClusterQueue reserves for its exclusive use
 | |
|                                                 a quantity of quota equals to nominalQuota - lendingLimit.
 | |
|                                                 If null, it means that there is no lending limit, meaning that
 | |
|                                                 all the nominalQuota can be borrowed by other clusterQueues in the cohort.
 | |
|                                                 If not null, it must be non-negative.
 | |
|                                                 lendingLimit must be null if spec.                                                
 | |
|                                               pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
 | |
|                                               x-kubernetes-int-or-string: true
 | |
|                                             name:
 | |
|                                               description: name of this resource.
 | |
|                                               type: string
 | |
|                                             nominalQuota:
 | |
|                                               anyOf:
 | |
|                                                 - type: integer
 | |
|                                                 - type: string
 | |
|                                               description: |-
 | |
|                                                 nominalQuota is the quantity of this resource that is available for
 | |
|                                                 Workloads admitted by this ClusterQueue at a point in time.
 | |
|                                                 The nominalQuota must be non-negative.
 | |
|                                                 nominalQuota should represent the resources in the cluster available for
 | |
|                                                 running jobs (after discounting resources consumed by system components
 | |
|                                                 and pods not managed by kueue). In an autoscaled cluster, nominalQuota
 | |
|                                                 should account for resources that can be provided by a component such as
 | |
|                                                 Kubernetes cluster-autoscaler.                                                
 | |
|                                               pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
 | |
|                                               x-kubernetes-int-or-string: true
 | |
|                                           required:
 | |
|                                             - name
 | |
|                                             - nominalQuota
 | |
|                                           type: object
 | |
|                                         maxItems: 16
 | |
|                                         minItems: 1
 | |
|                                         type: array
 | |
|                                         x-kubernetes-list-map-keys:
 | |
|                                           - name
 | |
|                                         x-kubernetes-list-type: map
 | |
|                                     required:
 | |
|                                       - name
 | |
|                                       - resources
 | |
|                                     type: object
 | |
|                                   maxItems: 16
 | |
|                                   minItems: 1
 | |
|                                   type: array
 | |
|                                   x-kubernetes-list-map-keys:
 | |
|                                     - name
 | |
|                                   x-kubernetes-list-type: map
 | |
|                               required:
 | |
|                                 - coveredResources
 | |
|                                 - flavors
 | |
|                               type: object
 | |
|                               x-kubernetes-validations:
 | |
|                                 - message: flavors must have the same number of resources as the coveredResources
 | |
|                                   rule: self.flavors.all(x, size(x.resources) == size(self.coveredResources))
 | |
|                             maxItems: 16
 | |
|                             type: array
 | |
|                             x-kubernetes-list-type: atomic
 | |
|                           stopPolicy:
 | |
|                             default: None
 | |
|                             description: |-
 | |
|                               stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being
 | |
|                               made.
 | |
| 
 | |
|                               Depending on its value, its associated workloads will:
 | |
| 
 | |
|                               - None - Workloads are admitted
 | |
|                               - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation.
 | |
|                               - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation.                              
 | |
|                             enum:
 | |
|                               - None
 | |
|                               - Hold
 | |
|                               - HoldAndDrain
 | |
|                             type: string
 | |
|                         type: object
 | |
|                     required:
 | |
|                       - name
 | |
|                     type: object
 | |
|                   maxItems: 1000
 | |
|                   type: array
 | |
|                 resourceFlavors:
 | |
|                   description: ResourceFlavors defines a list of Kueue ResourceFlavors that Kaiwo should manage. Kaiwo ensures these ResourceFlavors exist and match the provided specs. If omitted or empty, Kaiwo attempts to automatically discover node pools and create default flavors based on node labels.
 | |
|                   items:
 | |
|                     description: ResourceFlavorSpec defines the configuration for a Kueue ResourceFlavor managed by Kaiwo.
 | |
|                     properties:
 | |
|                       name:
 | |
|                         description: Name specifies the name of the Kueue ResourceFlavor resource (e.g., "amd-mi300-8gpu").
 | |
|                         type: string
 | |
|                       nodeLabels:
 | |
|                         additionalProperties:
 | |
|                           type: string
 | |
|                         description: 'NodeLabels specifies the labels that pods requesting this flavor must match on nodes. This is used by Kueue for scheduling decisions. Keys and values should correspond to actual node labels. Example: `{"kaiwo/nodepool": "amd-gpu-nodes"}`'
 | |
|                         maxProperties: 10
 | |
|                         type: object
 | |
|                       taints:
 | |
|                         description: Taints specifies a list of taints associated with this flavor.
 | |
|                         items:
 | |
|                           description: |-
 | |
|                             The node this Taint is attached to has the "effect" on
 | |
|                             any pod that does not tolerate the Taint.                            
 | |
|                           properties:
 | |
|                             effect:
 | |
|                               description: |-
 | |
|                                 Required. The effect of the taint on pods
 | |
|                                 that do not tolerate the taint.
 | |
|                                 Valid effects are NoSchedule, PreferNoSchedule and NoExecute.                                
 | |
|                               type: string
 | |
|                             key:
 | |
|                               description: Required. The taint key to be applied to a node.
 | |
|                               type: string
 | |
|                             timeAdded:
 | |
|                               description: |-
 | |
|                                 TimeAdded represents the time at which the taint was added.
 | |
|                                 It is only written for NoExecute taints.                                
 | |
|                               format: date-time
 | |
|                               type: string
 | |
|                             value:
 | |
|                               description: The taint value corresponding to the taint key.
 | |
|                               type: string
 | |
|                           required:
 | |
|                             - effect
 | |
|                             - key
 | |
|                           type: object
 | |
|                         maxItems: 5
 | |
|                         type: array
 | |
|                       tolerations:
 | |
|                         description: Tolerations specifies a list of tolerations associated with this flavor. This is less common than using Taints; Kueue primarily uses Taints to derive Tolerations.
 | |
|                         items:
 | |
|                           description: |-
 | |
|                             The pod this Toleration is attached to tolerates any taint that matches
 | |
|                             the triple <key,value,effect> using the matching operator <operator>.                            
 | |
|                           properties:
 | |
|                             effect:
 | |
|                               description: |-
 | |
|                                 Effect indicates the taint effect to match. Empty means match all taint effects.
 | |
|                                 When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.                                
 | |
|                               type: string
 | |
|                             key:
 | |
|                               description: |-
 | |
|                                 Key is the taint key that the toleration applies to. Empty means match all taint keys.
 | |
|                                 If the key is empty, operator must be Exists; this combination means to match all values and all keys.                                
 | |
|                               type: string
 | |
|                             operator:
 | |
|                               description: |-
 | |
|                                 Operator represents a key's relationship to the value.
 | |
|                                 Valid operators are Exists and Equal. Defaults to Equal.
 | |
|                                 Exists is equivalent to wildcard for value, so that a pod can
 | |
|                                 tolerate all taints of a particular category.                                
 | |
|                               type: string
 | |
|                             tolerationSeconds:
 | |
|                               description: |-
 | |
|                                 TolerationSeconds represents the period of time the toleration (which must be
 | |
|                                 of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
 | |
|                                 it is not set, which means tolerate the taint forever (do not evict). Zero and
 | |
|                                 negative values will be treated as 0 (evict immediately) by the system.                                
 | |
|                               format: int64
 | |
|                               type: integer
 | |
|                             value:
 | |
|                               description: |-
 | |
|                                 Value is the taint value the toleration matches to.
 | |
|                                 If the operator is Exists, the value should be empty, otherwise just a regular string.                                
 | |
|                               type: string
 | |
|                           type: object
 | |
|                         maxItems: 5
 | |
|                         type: array
 | |
|                       topologyName:
 | |
|                         description: |-
 | |
|                           TopologyName specifies the name of the Kueue Topology that this flavor belongs to. If specified, it must match one of the Topologies defined in the KaiwoQueueConfig.
 | |
|                           This is used to group flavors by topology for scheduling purposes.                          
 | |
|                         type: string
 | |
|                     required:
 | |
|                       - name
 | |
|                     type: object
 | |
|                   maxItems: 20
 | |
|                   type: array
 | |
|                 topologies:
 | |
|                   description: Topologies defines a list of Kueue Topologies that Kaiwo should manage. Kaiwo ensures these Topologies exist with the specified values. See Kueue documentation for `Topology`.
 | |
|                   items:
 | |
|                     description: Topology is the Schema for the topology API
 | |
|                     properties:
 | |
|                       apiVersion:
 | |
|                         description: |-
 | |
|                           APIVersion defines the versioned schema of this representation of an object.
 | |
|                           Servers should convert recognized schemas to the latest internal value, and
 | |
|                           may reject unrecognized values.
 | |
|                           More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources                          
 | |
|                         type: string
 | |
|                       kind:
 | |
|                         description: |-
 | |
|                           Kind is a string value representing the REST resource this object represents.
 | |
|                           Servers may infer this from the endpoint the client submits requests to.
 | |
|                           Cannot be updated.
 | |
|                           In CamelCase.
 | |
|                           More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds                          
 | |
|                         type: string
 | |
|                       metadata:
 | |
|                         properties:
 | |
|                           annotations:
 | |
|                             additionalProperties:
 | |
|                               type: string
 | |
|                             type: object
 | |
|                           finalizers:
 | |
|                             items:
 | |
|                               type: string
 | |
|                             type: array
 | |
|                           labels:
 | |
|                             additionalProperties:
 | |
|                               type: string
 | |
|                             type: object
 | |
|                           name:
 | |
|                             type: string
 | |
|                           namespace:
 | |
|                             type: string
 | |
|                         type: object
 | |
|                       spec:
 | |
|                         properties:
 | |
|                           levels:
 | |
|                             description: levels define the levels of topology.
 | |
|                             items:
 | |
|                               description: TopologyLevel defines the desired state of TopologyLevel
 | |
|                               properties:
 | |
|                                 nodeLabel:
 | |
|                                   description: |-
 | |
|                                     nodeLabel indicates the name of the node label for a specific topology
 | |
|                                     level.
 | |
| 
 | |
|                                     Examples:
 | |
|                                     - cloud.provider.com/topology-block
 | |
|                                     - cloud.provider.com/topology-rack                                    
 | |
|                                   maxLength: 316
 | |
|                                   minLength: 1
 | |
|                                   pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
 | |
|                                   type: string
 | |
|                               required:
 | |
|                                 - nodeLabel
 | |
|                               type: object
 | |
|                             maxItems: 8
 | |
|                             minItems: 1
 | |
|                             type: array
 | |
|                             x-kubernetes-list-type: atomic
 | |
|                             x-kubernetes-validations:
 | |
|                               - message: must be unique
 | |
|                                 rule: size(self.filter(i, size(self.filter(j, j == i)) > 1)) == 0
 | |
|                               - message: the kubernetes.io/hostname label can only be used at the lowest level of topology
 | |
|                                 rule: size(self.filter(i, i.nodeLabel == 'kubernetes.io/hostname')) == 0 || self[size(self) - 1].nodeLabel == 'kubernetes.io/hostname'
 | |
|                         required:
 | |
|                           - levels
 | |
|                         type: object
 | |
|                     required:
 | |
|                       - spec
 | |
|                     type: object
 | |
|                   maxItems: 10
 | |
|                   type: array
 | |
|                 workloadPriorityClasses:
 | |
|                   description: WorkloadPriorityClasses defines a list of Kueue WorkloadPriorityClasses that Kaiwo should manage. Kaiwo ensures these priority classes exist with the specified values. See Kueue documentation for `WorkloadPriorityClass`.
 | |
|                   items:
 | |
|                     description: WorkloadPriorityClass is the Schema for the workloadPriorityClass API
 | |
|                     properties:
 | |
|                       apiVersion:
 | |
|                         description: |-
 | |
|                           APIVersion defines the versioned schema of this representation of an object.
 | |
|                           Servers should convert recognized schemas to the latest internal value, and
 | |
|                           may reject unrecognized values.
 | |
|                           More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources                          
 | |
|                         type: string
 | |
|                       description:
 | |
|                         description: |-
 | |
|                           description is an arbitrary string that usually provides guidelines on
 | |
|                           when this workloadPriorityClass should be used.                          
 | |
|                         type: string
 | |
|                       kind:
 | |
|                         description: |-
 | |
|                           Kind is a string value representing the REST resource this object represents.
 | |
|                           Servers may infer this from the endpoint the client submits requests to.
 | |
|                           Cannot be updated.
 | |
|                           In CamelCase.
 | |
|                           More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds                          
 | |
|                         type: string
 | |
|                       metadata:
 | |
|                         properties:
 | |
|                           annotations:
 | |
|                             additionalProperties:
 | |
|                               type: string
 | |
|                             type: object
 | |
|                           finalizers:
 | |
|                             items:
 | |
|                               type: string
 | |
|                             type: array
 | |
|                           labels:
 | |
|                             additionalProperties:
 | |
|                               type: string
 | |
|                             type: object
 | |
|                           name:
 | |
|                             type: string
 | |
|                           namespace:
 | |
|                             type: string
 | |
|                         type: object
 | |
|                       value:
 | |
|                         description: |-
 | |
|                           value represents the integer value of this workloadPriorityClass. This is the actual priority that workloads
 | |
|                           receive when jobs have the name of this class in their workloadPriorityClass label.
 | |
|                           Changing the value of workloadPriorityClass doesn't affect the priority of workloads that were already created.                          
 | |
|                         format: int32
 | |
|                         type: integer
 | |
|                     required:
 | |
|                       - value
 | |
|                     type: object
 | |
|                   maxItems: 20
 | |
|                   type: array
 | |
|               type: object
 | |
|             status:
 | |
|               description: Status reflects the most recently observed state of the Kueue resource synchronization.
 | |
|               properties:
 | |
|                 conditions:
 | |
|                   description: Conditions lists the observed conditions of the KaiwoQueueConfig resource, such as whether the managed Kueue resources are synchronized and ready.
 | |
|                   items:
 | |
|                     description: Condition contains details for one aspect of the current state of this API Resource.
 | |
|                     properties:
 | |
|                       lastTransitionTime:
 | |
|                         description: |-
 | |
|                           lastTransitionTime is the last time the condition transitioned from one status to another.
 | |
|                           This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.                          
 | |
|                         format: date-time
 | |
|                         type: string
 | |
|                       message:
 | |
|                         description: |-
 | |
|                           message is a human readable message indicating details about the transition.
 | |
|                           This may be an empty string.                          
 | |
|                         maxLength: 32768
 | |
|                         type: string
 | |
|                       observedGeneration:
 | |
|                         description: |-
 | |
|                           observedGeneration represents the .metadata.generation that the condition was set based upon.
 | |
|                           For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
 | |
|                           with respect to the current state of the instance.                          
 | |
|                         format: int64
 | |
|                         minimum: 0
 | |
|                         type: integer
 | |
|                       reason:
 | |
|                         description: |-
 | |
|                           reason contains a programmatic identifier indicating the reason for the condition's last transition.
 | |
|                           Producers of specific condition types may define expected values and meanings for this field,
 | |
|                           and whether the values are considered a guaranteed API.
 | |
|                           The value should be a CamelCase string.
 | |
|                           This field may not be empty.                          
 | |
|                         maxLength: 1024
 | |
|                         minLength: 1
 | |
|                         pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
 | |
|                         type: string
 | |
|                       status:
 | |
|                         description: status of the condition, one of True, False, Unknown.
 | |
|                         enum:
 | |
|                           - "True"
 | |
|                           - "False"
 | |
|                           - Unknown
 | |
|                         type: string
 | |
|                       type:
 | |
|                         description: type of condition in CamelCase or in foo.example.com/CamelCase.
 | |
|                         maxLength: 316
 | |
|                         pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
 | |
|                         type: string
 | |
|                     required:
 | |
|                       - lastTransitionTime
 | |
|                       - message
 | |
|                       - reason
 | |
|                       - status
 | |
|                       - type
 | |
|                     type: object
 | |
|                   type: array
 | |
|                 status:
 | |
|                   description: Status reflects the overall status of the Kueue resource synchronization managed by this config (e.g., READY, FAILED).
 | |
|                   type: string
 | |
|               type: object
 | |
|           type: object
 | |
|       served: true
 | |
|       storage: true
 | |
|       subresources:
 | |
|         status: {}
 | 
