Run Commands
Read the HELM series >

PachD HCVs

Configure the core settings.

Values #

pachd:
  enabled: true
  preflightChecks:
    # if enabled runs kube validation preflight checks.
    enabled: true
  affinity: {}
  annotations: {}
  # clusterDeploymentID sets the Pachyderm cluster ID.
  clusterDeploymentID: ""
  configJob:
    annotations: {}
  # goMaxProcs is passed as GOMAXPROCS to the pachd container.  pachd can automatically pick an
  # optimal GOMAXPROCS from the configured CPU limit, but this overrides it.
  goMaxProcs: 0
  # goMemLimit is passed as GOMEMLIMIT to the pachd container. pachd can automatically pick an
  # optimal GOMEMLIMIT from the configured memory request or limit, but this overrides it.  This is a string
  # because it can be something like '256MiB'.
  goMemLimit: ""
  # gcPercent sets the initial garbage collection target percentage.
  gcPercent: 0
  image:
    repository: "pachyderm/pachd"
    pullPolicy: "IfNotPresent"
    # tag defaults to the chart’s specified appVersion.
    # This sets the worker image tag as well (they should be kept in lock step)
    tag: ""
  logLevel: "info"
  disableLogSampling: false
  developmentLogger: false
  # If true, log every SQL query at debug level.  This can potentially log sensitive information
  # without redaction, so it's not recommended for production.
  sqlQueryLogs: false
  # If lokiDeploy is true, a Pachyderm-specific instance of Loki will
  # be deployed.
  lokiDeploy: true
  # lokiLogging enables Loki logging if set.
  lokiLogging: true
  # lokiHost and lokiPort should only be set when using an external Loki instance. lokiDeploy should be false.
  # lokiHost should be the hostname of the Loki instance to use.
  lokiHost: ""
  # lokiPort should be the port of the Loki instance to use.
  lokiPort: 0
  metrics:
    # enabled sets the METRICS environment variable if set.
    enabled: true
    # endpoint should be the URL of the metrics endpoint.
    endpoint: ""
  priorityClassName: ""
  nodeSelector: {}
  # podLabels specifies labels to add to the pachd pod.
  podLabels: {}
  # resources specifies the resource requests and limits
  # replicas sets the number of pachd running pods
  replicas: 1
  resources:
    {}
    #limits:
    #  cpu: "1"
    #  memory: "2G"
    #requests:
    #  cpu: "1"
    #  memory: "2G"
  # requireCriticalServersOnly only requires the critical pachd
  # servers to startup and run without errors.  It is analogous to the
  # --require-critical-servers-only argument to pachctl deploy.
  requireCriticalServersOnly: false
  # If enabled, External service creates a service which is safe to
  # be exposed externally
  externalService:
    enabled: false
    # (Optional) specify the existing IP Address of the load balancer
    loadBalancerIP: ""
    apiGRPCPort: 30650
    s3GatewayPort: 30600
    annotations: {}
  service:
    # labels specifies labels to add to the pachd service.
    labels: {}
    # type specifies the Kubernetes type of the pachd service.
    type: "ClusterIP"
    annotations: {}
    apiGRPCPort: 30650
    prometheusPort: 30656
    oidcPort: 30657
    identityPort: 30658
    s3GatewayPort: 30600
    #apiGrpcPort:
    #  expose: true
    #  port: 30650
  # DEPRECATED: activateEnterprise is no longer used.
  activateEnterprise: false
  ## if pachd.activateEnterpriseMember is set, enterprise will be activated and connected to an existing enterprise server.
  ## if pachd.enterpriseLicenseKey is set, enterprise will be activated.
  activateEnterpriseMember: false
  ## if pachd.activateAuth is set, auth will be bootstrapped by the config-job.
  activateAuth: true
  ## the license key used to activate enterprise features
  enterpriseLicenseKey: ""
  # enterpriseLicenseKeySecretName is used to pass the enterprise license key value via an existing k8s secret.
  # The value is pulled from the key, "enterprise-license-key".
  enterpriseLicenseKeySecretName: ""
  # if a token is not provided, a secret will be autogenerated on install and stored in the k8s secret 'pachyderm-bootstrap-config.rootToken'
  rootToken: ""
  # rootTokenSecretName is used to pass the rootToken value via an existing k8s secret
  # The value is pulled from the key, "root-token".
  rootTokenSecretName: ""
  # if a secret is not provided, a secret will be autogenerated on install and stored in the k8s secret 'pachyderm-bootstrap-config.enterpriseSecret'
  enterpriseSecret: ""
  # enterpriseSecretSecretName is used to pass the enterprise secret value via an existing k8s secret.
  # The value is pulled from the key, "enterprise-secret".
  enterpriseSecretSecretName: ""
  # if a secret is not provided, a secret will be autogenerated on install and stored in the k8s secret 'pachyderm-bootstrap-config.authConfig.clientSecret'
  oauthClientID: pachd
  oauthClientSecret: ""
  # oauthClientSecretSecretName is used to set the OAuth Client Secret via an existing k8s secret.
  # The value is pulled from the key, "pachd-oauth-client-secret".
  oauthClientSecretSecretName: ""
  oauthRedirectURI: ""
  # DEPRECATED: enterpriseRootToken is deprecated, in favor of enterpriseServerToken
  # NOTE only used if pachd.activateEnterpriseMember == true
  enterpriseRootToken: ""
  # DEPRECATED: enterpriseRootTokenSecretName is deprecated in favor of enterpriseServerTokenSecretName
  # enterpriseRootTokenSecretName is used to pass the enterpriseRootToken value via an existing k8s secret.
  # The value is pulled from the key, "enterprise-root-token".
  enterpriseRootTokenSecretName: ""
  # enterpriseServerToken represents a token that can authenticate to a separate pachyderm enterprise server,
  # and is used to complete the enterprise member registration process for this pachyderm cluster.
  # The user backing this token should have either the licenseAdmin & identityAdmin roles assigned, or
  # the clusterAdmin role.
  # NOTE: only used if pachd.activateEnterpriseMember == true
  enterpriseServerToken: ""
  # enterpriseServerTokenSecretName is used to pass the enterpriseServerToken value via an existing k8s secret.
  # The value is pulled from the key, "enterprise-server-token".
  enterpriseServerTokenSecretName: ""
  # only used if pachd.activateEnterpriseMember == true
  enterpriseServerAddress: ""
  enterpriseCallbackAddress: ""
  # Indicates to pachd whether dex is embedded in its process.
  localhostIssuer: "" # "true", "false", or "" (used string as bool doesn't support empty value)
  # set the initial pachyderm cluster role bindings, mapping a user to their list of roles
  # ex.
  # pachAuthClusterRoleBindings:
  #   robot:wallie:
  #   - repoReader
  #   robot:eve:
  #   - repoWriter
  pachAuthClusterRoleBindings: {}
  # additionalTrustedPeers is used to configure the identity service to recognize additional OIDC clients as trusted peers of pachd.
  # For example, see the following example or the dex docs (https://dexidp.io/docs/custom-scopes-claims-clients/#cross-client-trust-and-authorized-party).
  # additionalTrustedPeers:
  #   - example-app
  additionalTrustedPeers: []
  serviceAccount:
    create: true
    additionalAnnotations: {}
    name: "pachyderm" #TODO Set default in helpers / Wire up in templates
  storage:
    # backend configures the storage backend to use.  It must be one
    # of GOOGLE, AMAZON, MINIO, MICROSOFT or LOCAL. This is set automatically
    # if deployTarget is GOOGLE, AMAZON, MICROSOFT, or LOCAL
    backend: ""
    # If 'gocdkEnabled' is true, 'storageURL' is used to configure the backend for object storage.
    # The 'backend' configuration options should be used in conjunction with 'storageURL' to
    # pass in secrets and identity information.
    # When using gocdkEnabled, 'backend' cannot be 'MINIO'.
    # Users who desire to use minio as their storage backend should set the 'backend' to 'AMAZON' instead
    # and set 'storageURL' to the s3 compatible URL representation for their minio endpoint.
    gocdkEnabled: false
    # storageURL is a URL endpoint for object storage such as "s3://myamazonbucket" or "gcs://mygooglebucket.
    # Depending on the storage provider, some configuration options may also be passed via URL query parameters.
    storageURL: ""
    amazon:
      # bucket sets the S3 bucket to use.
      bucket: ""
      # cloudFrontDistribution sets the CloudFront distribution in the
      # storage secrets.  It is analogous to the
      # --cloudfront-distribution argument to pachctl deploy.
      cloudFrontDistribution: ""
      customEndpoint: ""
      # disableSSL disables SSL.  It is analogous to the --disable-ssl
      # argument to pachctl deploy.
      disableSSL: false
      # id sets the Amazon access key ID to use.  Together with secret
      # and token, it implements the functionality of the
      # --credentials argument to pachctl deploy.
      id: ""
      # logOptions sets various log options in Pachyderm’s internal S3
      # client.  Comma-separated list containing zero or more of:
      # 'Debug', 'Signing', 'HTTPBody', 'RequestRetries',
      # 'RequestErrors', 'EventStreamBody', or 'all'
      # (case-insensitive).  See 'AWS SDK for Go' docs for details.
      # logOptions is analogous to the --obj-log-options argument to
      # pachctl deploy.
      logOptions: ""
      # maxUploadParts sets the maximum number of upload parts.  It is
      # analogous to the --max-upload-parts argument to pachctl
      # deploy.
      maxUploadParts: 10000
      # verifySSL performs SSL certificate verification.  It is the
      # inverse of the --no-verify-ssl argument to pachctl deploy.
      verifySSL: true
      # partSize sets the part size for object storage uploads.  It is
      # analogous to the --part-size argument to pachctl deploy.  It
      # has to be a string due to Helm and YAML parsing integers as
      # floats.  Cf. https://github.com/helm/helm/issues/1707
      partSize: "5242880"
      # region sets the AWS region to use.
      region: ""
      # retries sets the number of retries for object storage
      # requests.  It is analogous to the --retries argument to
      # pachctl deploy.
      retries: 10
      # reverse reverses object storage paths.  It is analogous to the
      # --reverse argument to pachctl deploy.
      reverse: true
      # secret sets the Amazon secret access key to use.  Together with id
      # and token, it implements the functionality of the
      # --credentials argument to pachctl deploy.
      secret: ""
      # timeout sets the timeout for object storage requests.  It is
      # analogous to the --timeout argument to pachctl deploy.
      timeout: "5m"
      # token optionally sets the Amazon token to use.  Together with
      # id and secret, it implements the functionality of the
      # --credentials argument to pachctl deploy.
      token: ""
      # uploadACL sets the upload ACL for object storage uploads.  It
      # is analogous to the --upload-acl argument to pachctl deploy.
      uploadACL: "bucket-owner-full-control"
    google:
      bucket: ""
      # cred is a string containing a GCP service account private key,
      # in object (JSON or YAML) form.  A simple way to pass this on
      # the command line is with the set-file flag, e.g.:
      #
      #  helm install pachd -f my-values.yaml --set-file storage.google.cred=creds.json pachyderm/pachyderm
      cred: ""
      # Example:
      # cred: |
      #  {
      #    "type": "service_account",
      #    "project_id": "…",
      #    "private_key_id": "…",
      #    "private_key": "-----BEGIN PRIVATE KEY-----\n…\n-----END PRIVATE KEY-----\n",
      #    "client_email": "…@….iam.gserviceaccount.com",
      #    "client_id": "…",
      #    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
      #    "token_uri": "https://oauth2.googleapis.com/token",
      #    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
      #    "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/…%40….iam.gserviceaccount.com"
      #  }
    local:
      # hostPath indicates the path on the host where the PFS metadata
      # will be stored.  It must end in /.  It is analogous to the
      # --host-path argument to pachctl deploy.
      hostPath: ""
      requireRoot: true #Root required for hostpath, but we run rootless in CI
    microsoft:
      container: ""
      id: ""
      secret: ""
    minio:
      # minio bucket name
      bucket: ""
      # the minio endpoint. Should only be the hostname:port, no http/https.
      endpoint: ""
      # the username/id with readwrite access to the bucket.
      id: ""
      # the secret/password of the user with readwrite access to the bucket.
      secret: ""
      # enable https for minio with "true" defaults to "false"
      secure: ""
      # Enable S3v2 support by setting signature to "1". This feature is being deprecated
      signature: ""
    # putFileConcurrencyLimit sets the maximum number of files to
    # upload or fetch from remote sources (HTTP, blob storage) using
    # PutFile concurrently.  It is analogous to the
    # --put-file-concurrency-limit argument to pachctl deploy.
    putFileConcurrencyLimit: 100
    # uploadConcurrencyLimit sets the maximum number of concurrent
    # object storage uploads per Pachd instance.  It is analogous to
    # the --upload-concurrency-limit argument to pachctl deploy.
    uploadConcurrencyLimit: 100
    # The shard size corresponds to the total size of the files in a shard.
    # The shard count corresponds to the total number of files in a shard.
    # If either criteria is met, a shard will be created.
    # values are strings
    compactionShardSizeThreshold: "0"
    compactionShardCountThreshold: "0"
    memoryThreshold: 0
    levelFactor: 0
    maxFanIn: 10
    maxOpenFileSets: 50
    # diskCacheSize and memoryCacheSize are defined in units of 8 Mb chunks. The default is 100 chunks which is 800 Mb.
    diskCacheSize: 100
    memoryCacheSize: 100
  ppsWorkerGRPCPort: 1080
  # the number of seconds between pfs's garbage collection cycles.
  # if this value is set to 0, it will default to pachyderm's internal configuration.
  # if this value is less than 0, it will turn off garbage collection.
  storageGCPeriod: 0
  # the number of seconds between chunk garbage colletion cycles.
  # if this value is set to 0, it will default to pachyderm's internal configuration.
  # if this value is less than 0, it will turn off chunk garbage collection.
  storageChunkGCPeriod: 0
  # There are three options for TLS:
  # 1. Disabled
  # 2. Enabled, existingSecret, specify secret name
  # 3. Enabled, newSecret, must specify cert, key and name
  tls:
    enabled: false
    secretName: ""
    newSecret:
      create: false
      crt: ""
      key: ""
  tolerations: []
  worker:
    image:
      repository: "pachyderm/worker"
      pullPolicy: "IfNotPresent"
      # Worker tag is set under pachd.image.tag (they should be kept in lock step)
    serviceAccount:
      create: true
      additionalAnnotations: {}
      # name sets the name of the worker service account.  Analogous to
      # the --worker-service-account argument to pachctl deploy.
      name: "pachyderm-worker" #TODO Set default in helpers / Wire up in templates
  rbac:
    # create indicates whether RBAC resources should be created.
    # Setting it to false is analogous to passing --no-rbac to pachctl
    # deploy.
    create: true
  # Set up default resources for pipelines that don't include any requests or limits.  The values
  # are k8s resource quantities, so "1Gi", "2", etc.  Set to "0" to disable setting any defaults.
  defaultPipelineCPURequest: ""
  defaultPipelineMemoryRequest: ""
  defaultPipelineStorageRequest: ""
  defaultSidecarCPURequest: ""
  defaultSidecarMemoryRequest: ""
  defaultSidecarStorageRequest: ""
  determined:
    # a determined's cluster API URL
    apiEndpoint: ""
    # the name of the kubernetes secret containing the credentials for the determined user representing pachyderm.
    # the secret is expected to contain the keys "determined-username" & "determined-password"
    credentialsSecretName: ""
  # readinessProbe, livenessProbe, startupProbe: Configure the probe settings.
  # Acceptable inputs for each probe include initialDelaySeconds, timeoutSeconds, periodSeconds, successThreshold, and failureThreshold.
  readinessProbe:
    initialDelaySeconds: 0
    timeoutSeconds: 1
    periodSeconds: 10
    successThreshold: 1
    failureThreshold: 3
  livenessProbe:
    initialDelaySeconds: 0
    failureThreshold: 10
    periodSeconds: 10
    successThreshold: 1
    timeoutSeconds: 30
  startupProbe:
    initialDelaySeconds: 0
    failureThreshold: 10
    periodSeconds: 10
    successThreshold: 1
    timeoutSeconds: 30