Preparation ๐
Label the nodes you want to use for OCS, in my case:
for node in $(oc get nodes -o name | grep kni1-worker); do
oc label ${node} cluster.ocs.openshift.io/openshift-storage=''
done
Local storage operator ๐
Deploy the local storage operator
cat <<EOF | oc apply -f -
apiVersion: v1
kind: Namespace
metadata:
name: local-storage
spec: {}
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
annotations:
olm.providedAPIs: LocalVolume.v1.local.storage.openshift.io
name: local-storage
namespace: local-storage
spec:
targetNamespaces:
- local-storage
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: local-storage-operator
namespace: local-storage
spec:
channel: "4.6"
installPlanApproval: Automatic
name: local-storage-operator
source: redhat-operators
sourceNamespace: openshift-marketplace
EOF
Identify the devices you want to use:
for node in $(oc get nodes -l cluster.ocs.openshift.io/openshift-storage='' -o name); do
oc debug ${node} -- chroot /host lsblk
oc debug ${node} -- chroot /host ls -l /dev/disk/by-id/
done
In my case:
# worker-0
/dev/disk/by-id/scsi-3614187704eb90500208bc515198bb342
/dev/disk/by-id/scsi-3614187704eb90500208bc5341b5caaf0
/dev/disk/by-id/scsi-3614187704eb90500208bc54c1ccbdb1a
# worker-1
/dev/disk/by-id/scsi-3614187704e9cb600208bc5e926ae396c
/dev/disk/by-id/scsi-3614187704e9cb600208bc60628727f51
/dev/disk/by-id/scsi-3614187704e9cb600208bc62029ff9651
# worker-2
/dev/disk/by-id/scsi-3614187704e9cb1002541dcad156b083e
/dev/disk/by-id/scsi-3614187704e9cb1002541dd83222a3680
/dev/disk/by-id/scsi-3614187704e9cb1002541de562ec148dd
So the localvolume object looks like:
cat <<EOF | oc apply -f -
apiVersion: local.storage.openshift.io/v1
kind: LocalVolume
metadata:
name: local-block
namespace: local-storage
labels:
app: ocs-storagecluster
spec:
nodeSelector:
nodeSelectorTerms:
- matchExpressions:
- key: cluster.ocs.openshift.io/openshift-storage
operator: In
values:
- ""
storageClassDevices:
- storageClassName: localblock
volumeMode: Block
devicePaths:
# worker-0
- /dev/disk/by-id/scsi-3614187704eb90500208bc515198bb342
- /dev/disk/by-id/scsi-3614187704eb90500208bc5341b5caaf0
- /dev/disk/by-id/scsi-3614187704eb90500208bc54c1ccbdb1a
# worker-1
- /dev/disk/by-id/scsi-3614187704e9cb600208bc5e926ae396c
- /dev/disk/by-id/scsi-3614187704e9cb600208bc60628727f51
- /dev/disk/by-id/scsi-3614187704e9cb600208bc62029ff9651
# worker-2
- /dev/disk/by-id/scsi-3614187704e9cb1002541dcad156b083e
- /dev/disk/by-id/scsi-3614187704e9cb1002541dd83222a3680
- /dev/disk/by-id/scsi-3614187704e9cb1002541de562ec148dd
EOF
Verify the pvs, pods, etc. are properly ok before continuing.
OCS ๐
Deploy the operator:
cat <<EOF | oc apply -f -
apiVersion: v1
kind: Namespace
metadata:
labels:
openshift.io/cluster-monitoring: "true"
name: openshift-storage
spec: {}
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: openshift-storage-operatorgroup
namespace: openshift-storage
spec:
targetNamespaces:
- openshift-storage
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: ocs-operator
namespace: openshift-storage
spec:
channel: "stable-4.5"
installPlanApproval: Automatic
name: ocs-operator
source: redhat-operators
sourceNamespace: openshift-marketplace
EOF
Create the StorageCluster
object. Basically set the count
parameter to
the number of devices you want to use with replica 3. In my case, 9 disks
with replica 3 means, count = 3.
Also, set the spec.storageDeviceSets.dataPVCTemplate.spec.resources.requests.storage
to the size of the PVs you already have:
cat <<EOF | oc apply -f -
apiVersion: ocs.openshift.io/v1
kind: StorageCluster
metadata:
name: ocs-storagecluster
namespace: openshift-storage
spec:
manageNodes: false
resources:
mds:
limits:
cpu: "3"
memory: "8Gi"
requests:
cpu: "3"
memory: "8Gi"
monDataDirHostPath: /var/lib/rook
storageDeviceSets:
- count: 3
dataPVCTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: "2234Gi"
storageClassName: localblock
volumeMode: Block
name: ocs-deviceset
placement: {}
portable: false
replica: 3
resources:
limits:
cpu: "2"
memory: "5Gi"
requests:
cpu: "2"
memory: "5Gi"
EOF
After a while, a bunch of pods are created in the openshift-storage
namespace, and
after a while, the cephcluster would be ok:
$ oc get cephcluster
NAME DATADIRHOSTPATH MONCOUNT AGE PHASE MESSAGE HEALTH
ocs-storagecluster-cephcluster /var/lib/rook 3 17m Ready Cluster created successfully HEALTH_OK
This is the output of oc get pods -n openshift-storage
after a successful installation in my cluster:
NAME READY STATUS RESTARTS AGE
csi-cephfsplugin-2jpxg 3/3 Running 0 27m
csi-cephfsplugin-59lpk 3/3 Running 0 27m
csi-cephfsplugin-bznnv 3/3 Running 0 27m
csi-cephfsplugin-provisioner-c74cf4556-lpkhm 5/5 Running 0 27m
csi-cephfsplugin-provisioner-c74cf4556-zvhf6 5/5 Running 0 27m
csi-cephfsplugin-x66pj 3/3 Running 0 27m
csi-rbdplugin-58gqc 3/3 Running 0 27m
csi-rbdplugin-8w545 3/3 Running 0 27m
csi-rbdplugin-provisioner-6f9fdcb766-4swn5 5/5 Running 0 27m
csi-rbdplugin-provisioner-6f9fdcb766-ptjfj 5/5 Running 0 27m
csi-rbdplugin-skzn2 3/3 Running 0 27m
csi-rbdplugin-vwdzz 3/3 Running 0 27m
noobaa-core-0 1/1 Running 0 25m
noobaa-db-0 1/1 Running 0 25m
noobaa-endpoint-7cbb46fc6b-6tjq6 1/1 Running 0 23m
noobaa-operator-5b446f8bf8-dmql7 1/1 Running 0 30m
ocs-operator-7f65d46d8-bjt9p 1/1 Running 0 30m
rook-ceph-crashcollector-2fb730682f6039a2d0b411a88d709aed-2qlmb 1/1 Running 0 26m
rook-ceph-crashcollector-69eb2d18659838e54c5b7f06c601bd82-jq4hf 1/1 Running 0 26m
rook-ceph-crashcollector-f41787eb25803823d31e48c031d54565-sxt7j 1/1 Running 0 26m
rook-ceph-drain-canary-2fb730682f6039a2d0b411a88d709aed-7bnmddt 1/1 Running 0 25m
rook-ceph-drain-canary-69eb2d18659838e54c5b7f06c601bd82-fdmk265 1/1 Running 0 25m
rook-ceph-drain-canary-f41787eb25803823d31e48c031d54565-946wtwv 1/1 Running 0 25m
rook-ceph-mds-ocs-storagecluster-cephfilesystem-a-7d78d7b4jtw7r 1/1 Running 0 24m
rook-ceph-mds-ocs-storagecluster-cephfilesystem-b-77cdf977ntw65 1/1 Running 0 24m
rook-ceph-mgr-a-567fcd9b6-2rspl 1/1 Running 0 26m
rook-ceph-mon-a-869f79fb98-fvd75 1/1 Running 0 26m
rook-ceph-mon-b-f46f566f8-lnl2t 1/1 Running 0 26m
rook-ceph-mon-c-85cfccd99-f99jw 1/1 Running 0 26m
rook-ceph-operator-598b46794-pt8g9 1/1 Running 0 30m
rook-ceph-osd-0-5cf65d69f4-z5wwb 1/1 Running 0 25m
rook-ceph-osd-1-6c5cc7f745-vk6hg 1/1 Running 0 25m
rook-ceph-osd-2-6f9dbfd454-s2xl6 1/1 Running 0 25m
rook-ceph-osd-3-6fb59687b7-4vw7s 1/1 Running 0 25m
rook-ceph-osd-4-5f46f8b76-szs8d 1/1 Running 0 25m
rook-ceph-osd-5-b978c7889-6pbnh 1/1 Running 0 25m
rook-ceph-osd-6-6c5775f76b-79b7n 1/1 Running 0 25m
rook-ceph-osd-7-7b8c99f894-cs77x 1/1 Running 0 25m
rook-ceph-osd-8-5fdbccb647-x9252 1/1 Running 0 25m
rook-ceph-osd-prepare-ocs-deviceset-0-data-0-6xxp6-9875m 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-0-data-1-g4bcl-mjk89 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-0-data-2-5bhg7-pgds7 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-1-data-0-zsj5t-tshnb 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-1-data-1-f2x25-zxv44 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-1-data-2-xg2r8-plrx4 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-2-data-0-vt8j4-zcl2w 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-2-data-1-6vwcr-cldvb 0/1 Completed 0 25m
rook-ceph-osd-prepare-ocs-deviceset-2-data-2-c4rhk-lp4ds 0/1 Completed 0 25m
rook-ceph-rgw-ocs-storagecluster-cephobjectstore-a-dd6db95psjqm 1/1 Running 0 24m
rook-ceph-rgw-ocs-storagecluster-cephobjectstore-b-7499698l7b2j 1/1 Running 0 24m
- Make an storageclass the default one, in this case,
ocs-storagecluster-cephfs
:
oc patch storageclass ocs-storagecluster-cephfs -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
Tips & tricks ๐
- Ensure the devices are free, there are no pvs, vgs, lvs with
dmsetup ls
in every node - Ensure there are no pods in error state… I prefer to uninstall and reinstall as it takes less time than debugging it…
- Ensure the disks are clean…
sgdisk --zap-all
is your friend
Registry using OCS ๐
Create a PVC to request some storage:
cat <<EOF | oc apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ocs4registry
namespace: openshift-image-registry
spec:
storageClassName: ocs-storagecluster-cephfs
accessModes:
- ReadWriteMany
resources:
requests:
storage: 300Gi
EOF
Make the registry ‘managed’:
oc patch configs.imageregistry.operator.openshift.io cluster --type merge --patch '{"spec":{"managementState":"Managed"}}'
Modify it to use the pvc:
oc patch configs.imageregistry.operator.openshift.io cluster -p '{"spec":{"storage":{"emptyDir":null,"pvc":{"claim":"ocs4registry"}}}}' --type='merge'
Then, check if it is already using the volume:
oc exec $(oc get po -l docker-registry=default -n openshift-image-registry -o name) -- df -h /registry
...
Filesystem Size Used Avail Use% Mounted on
172.30.43.180:6789,172.30.104.161:6789,172.30.34.67:6789:/volumes/csi/csi-vol-00ecba5e-1a07-11eb-8070-0a580a800238/7f3909f3-f806-467d-90d8-ed081a8f5dc2 300G 0 300G 0% /registry