Deploy OCS 4.5 on OCP 4.6.1 on baremetal IPI

Nov 4, 2020 · 889 words · 5 minute read

Preparation 🔗

Label the nodes you want to use for OCS, in my case:

for node in $(oc get nodes -o name | grep kni1-worker); do
  oc label ${node} cluster.ocs.openshift.io/openshift-storage=''
done

Local storage operator 🔗

Deploy the local storage operator

cat <<EOF | oc apply -f -
apiVersion: v1
kind: Namespace
metadata:
  name: local-storage
spec: {}
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
  annotations:
    olm.providedAPIs: LocalVolume.v1.local.storage.openshift.io
  name: local-storage
  namespace: local-storage
spec:
  targetNamespaces:
  - local-storage
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
  name: local-storage-operator
  namespace: local-storage
spec:
  channel: "4.6"
  installPlanApproval: Automatic
  name: local-storage-operator
  source: redhat-operators
  sourceNamespace: openshift-marketplace
EOF

Identify the devices you want to use:

for node in $(oc get nodes -l cluster.ocs.openshift.io/openshift-storage='' -o name); do
  oc debug ${node} -- chroot /host lsblk
  oc debug ${node} -- chroot /host ls -l /dev/disk/by-id/
done

In my case:

# worker-0
/dev/disk/by-id/scsi-3614187704eb90500208bc515198bb342
/dev/disk/by-id/scsi-3614187704eb90500208bc5341b5caaf0
/dev/disk/by-id/scsi-3614187704eb90500208bc54c1ccbdb1a
# worker-1
/dev/disk/by-id/scsi-3614187704e9cb600208bc5e926ae396c
/dev/disk/by-id/scsi-3614187704e9cb600208bc60628727f51
/dev/disk/by-id/scsi-3614187704e9cb600208bc62029ff9651
# worker-2
/dev/disk/by-id/scsi-3614187704e9cb1002541dcad156b083e
/dev/disk/by-id/scsi-3614187704e9cb1002541dd83222a3680
/dev/disk/by-id/scsi-3614187704e9cb1002541de562ec148dd

So the localvolume object looks like:

cat <<EOF | oc apply -f -
apiVersion: local.storage.openshift.io/v1
kind: LocalVolume
metadata:
  name: local-block
  namespace: local-storage
  labels:
    app: ocs-storagecluster
spec:
  nodeSelector:
    nodeSelectorTerms:
    - matchExpressions:
        - key: cluster.ocs.openshift.io/openshift-storage
          operator: In
          values:
          - ""
  storageClassDevices:
    - storageClassName: localblock
      volumeMode: Block
      devicePaths:
        # worker-0
        - /dev/disk/by-id/scsi-3614187704eb90500208bc515198bb342
        - /dev/disk/by-id/scsi-3614187704eb90500208bc5341b5caaf0
        - /dev/disk/by-id/scsi-3614187704eb90500208bc54c1ccbdb1a
        # worker-1
        - /dev/disk/by-id/scsi-3614187704e9cb600208bc5e926ae396c
        - /dev/disk/by-id/scsi-3614187704e9cb600208bc60628727f51
        - /dev/disk/by-id/scsi-3614187704e9cb600208bc62029ff9651
        # worker-2
        - /dev/disk/by-id/scsi-3614187704e9cb1002541dcad156b083e
        - /dev/disk/by-id/scsi-3614187704e9cb1002541dd83222a3680
        - /dev/disk/by-id/scsi-3614187704e9cb1002541de562ec148dd
EOF

Verify the pvs, pods, etc. are properly ok before continuing.

OCS 🔗

Deploy the operator:

cat <<EOF | oc apply -f -
apiVersion: v1
kind: Namespace
metadata:
  labels:
    openshift.io/cluster-monitoring: "true"
  name: openshift-storage
spec: {}
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
  name: openshift-storage-operatorgroup
  namespace: openshift-storage
spec:
  targetNamespaces:
  - openshift-storage
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
  name: ocs-operator
  namespace: openshift-storage
spec:
  channel: "stable-4.5"
  installPlanApproval: Automatic
  name: ocs-operator
  source: redhat-operators
  sourceNamespace: openshift-marketplace
EOF

Create the StorageCluster object. Basically set the count parameter to the number of devices you want to use with replica 3. In my case, 9 disks with replica 3 means, count = 3.

Also, set the spec.storageDeviceSets.dataPVCTemplate.spec.resources.requests.storage to the size of the PVs you already have:

cat <<EOF | oc apply -f -
apiVersion: ocs.openshift.io/v1
kind: StorageCluster
metadata:
  name: ocs-storagecluster
  namespace: openshift-storage
spec:
  manageNodes: false
  resources:
    mds:
      limits:
        cpu: "3"
        memory: "8Gi"
      requests:
        cpu: "3"
        memory: "8Gi"
  monDataDirHostPath: /var/lib/rook
  storageDeviceSets:
  - count: 3
    dataPVCTemplate:
      spec:
        accessModes:
        - ReadWriteOnce
        resources:
          requests:
            storage: "2234Gi"
        storageClassName: localblock
        volumeMode: Block
    name: ocs-deviceset
    placement: {}
    portable: false
    replica: 3
    resources:
      limits:
        cpu: "2"
        memory: "5Gi"
      requests:
        cpu: "2"
        memory: "5Gi"
EOF

After a while, a bunch of pods are created in the openshift-storage namespace, and after a while, the cephcluster would be ok:

$ oc get cephcluster
NAME                             DATADIRHOSTPATH   MONCOUNT   AGE   PHASE   MESSAGE                        HEALTH
ocs-storagecluster-cephcluster   /var/lib/rook     3          17m   Ready   Cluster created successfully   HEALTH_OK

This is the output of oc get pods -n openshift-storage after a successful installation in my cluster:

NAME                                                              READY   STATUS      RESTARTS   AGE
csi-cephfsplugin-2jpxg                                            3/3     Running     0          27m
csi-cephfsplugin-59lpk                                            3/3     Running     0          27m
csi-cephfsplugin-bznnv                                            3/3     Running     0          27m
csi-cephfsplugin-provisioner-c74cf4556-lpkhm                      5/5     Running     0          27m
csi-cephfsplugin-provisioner-c74cf4556-zvhf6                      5/5     Running     0          27m
csi-cephfsplugin-x66pj                                            3/3     Running     0          27m
csi-rbdplugin-58gqc                                               3/3     Running     0          27m
csi-rbdplugin-8w545                                               3/3     Running     0          27m
csi-rbdplugin-provisioner-6f9fdcb766-4swn5                        5/5     Running     0          27m
csi-rbdplugin-provisioner-6f9fdcb766-ptjfj                        5/5     Running     0          27m
csi-rbdplugin-skzn2                                               3/3     Running     0          27m
csi-rbdplugin-vwdzz                                               3/3     Running     0          27m
noobaa-core-0                                                     1/1     Running     0          25m
noobaa-db-0                                                       1/1     Running     0          25m
noobaa-endpoint-7cbb46fc6b-6tjq6                                  1/1     Running     0          23m
noobaa-operator-5b446f8bf8-dmql7                                  1/1     Running     0          30m
ocs-operator-7f65d46d8-bjt9p                                      1/1     Running     0          30m
rook-ceph-crashcollector-2fb730682f6039a2d0b411a88d709aed-2qlmb   1/1     Running     0          26m
rook-ceph-crashcollector-69eb2d18659838e54c5b7f06c601bd82-jq4hf   1/1     Running     0          26m
rook-ceph-crashcollector-f41787eb25803823d31e48c031d54565-sxt7j   1/1     Running     0          26m
rook-ceph-drain-canary-2fb730682f6039a2d0b411a88d709aed-7bnmddt   1/1     Running     0          25m
rook-ceph-drain-canary-69eb2d18659838e54c5b7f06c601bd82-fdmk265   1/1     Running     0          25m
rook-ceph-drain-canary-f41787eb25803823d31e48c031d54565-946wtwv   1/1     Running     0          25m
rook-ceph-mds-ocs-storagecluster-cephfilesystem-a-7d78d7b4jtw7r   1/1     Running     0          24m
rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-77cdf977ntw65   1/1     Running     0          24m
rook-ceph-mgr-a-567fcd9b6-2rspl                                   1/1     Running     0          26m
rook-ceph-mon-a-869f79fb98-fvd75                                  1/1     Running     0          26m
rook-ceph-mon-b-f46f566f8-lnl2t                                   1/1     Running     0          26m
rook-ceph-mon-c-85cfccd99-f99jw                                   1/1     Running     0          26m
rook-ceph-operator-598b46794-pt8g9                                1/1     Running     0          30m
rook-ceph-osd-0-5cf65d69f4-z5wwb                                  1/1     Running     0          25m
rook-ceph-osd-1-6c5cc7f745-vk6hg                                  1/1     Running     0          25m
rook-ceph-osd-2-6f9dbfd454-s2xl6                                  1/1     Running     0          25m
rook-ceph-osd-3-6fb59687b7-4vw7s                                  1/1     Running     0          25m
rook-ceph-osd-4-5f46f8b76-szs8d                                   1/1     Running     0          25m
rook-ceph-osd-5-b978c7889-6pbnh                                   1/1     Running     0          25m
rook-ceph-osd-6-6c5775f76b-79b7n                                  1/1     Running     0          25m
rook-ceph-osd-7-7b8c99f894-cs77x                                  1/1     Running     0          25m
rook-ceph-osd-8-5fdbccb647-x9252                                  1/1     Running     0          25m
rook-ceph-osd-prepare-ocs-deviceset-0-data-0-6xxp6-9875m          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-0-data-1-g4bcl-mjk89          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-0-data-2-5bhg7-pgds7          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-1-data-0-zsj5t-tshnb          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-1-data-1-f2x25-zxv44          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-1-data-2-xg2r8-plrx4          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-2-data-0-vt8j4-zcl2w          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-2-data-1-6vwcr-cldvb          0/1     Completed   0          25m
rook-ceph-osd-prepare-ocs-deviceset-2-data-2-c4rhk-lp4ds          0/1     Completed   0          25m
rook-ceph-rgw-ocs-storagecluster-cephobjectstore-a-dd6db95psjqm   1/1     Running     0          24m
rook-ceph-rgw-ocs-storagecluster-cephobjectstore-b-7499698l7b2j   1/1     Running     0          24m

Make an storageclass the default one, in this case, ocs-storagecluster-cephfs:

oc patch storageclass ocs-storagecluster-cephfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

Tips & tricks 🔗

Ensure the devices are free, there are no pvs, vgs, lvs with dmsetup ls in every node
Ensure there are no pods in error state… I prefer to uninstall and reinstall as it takes less time than debugging it…
Ensure the disks are clean… sgdisk --zap-all is your friend

Registry using OCS 🔗

Create a PVC to request some storage:

cat <<EOF | oc apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: ocs4registry
  namespace: openshift-image-registry
spec:
  storageClassName: ocs-storagecluster-cephfs
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 300Gi
EOF

Make the registry ‘managed’:

oc patch configs.imageregistry.operator.openshift.io cluster --type merge --patch '{"spec":{"managementState":"Managed"}}'

Modify it to use the pvc:

oc patch configs.imageregistry.operator.openshift.io cluster -p '{"spec":{"storage":{"emptyDir":null,"pvc":{"claim":"ocs4registry"}}}}' --type='merge'

Then, check if it is already using the volume:

oc exec $(oc get po -l docker-registry=default -n openshift-image-registry -o name) -- df -h /registry

...

Filesystem                                                                                                                                               Size  Used Avail Use% Mounted on
172.30.43.180:6789,172.30.104.161:6789,172.30.34.67:6789:/volumes/csi/csi-vol-00ecba5e-1a07-11eb-8070-0a580a800238/7f3909f3-f806-467d-90d8-ed081a8f5dc2  300G     0  300G   0% /registry

References 🔗