From d58339d148ed5d3a6da31577cc50cf28af572163 Mon Sep 17 00:00:00 2001 From: james Date: Wed, 3 Jun 2026 12:00:09 +0800 Subject: [PATCH 1/2] support biren Signed-off-by: james --- .../biren-device/enable-biren-sharing.md | 167 ++++++++++++++++++ .../biren-device/examples/default-use.md | 23 +++ docs/userguide/device-supported.md | 1 + .../biren-device/enable-biren-sharing.md | 167 ++++++++++++++++++ .../biren-device/examples/default-use.md | 23 +++ sidebars.js | 15 ++ 6 files changed, 396 insertions(+) create mode 100644 docs/userguide/biren-device/enable-biren-sharing.md create mode 100644 docs/userguide/biren-device/examples/default-use.md create mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md create mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md diff --git a/docs/userguide/biren-device/enable-biren-sharing.md b/docs/userguide/biren-device/enable-biren-sharing.md new file mode 100644 index 00000000..b2a9a4d1 --- /dev/null +++ b/docs/userguide/biren-device/enable-biren-sharing.md @@ -0,0 +1,167 @@ +--- +title: Enable Biren Sharing +--- + +## Introduction + +HAMi now supports sharing `birentech.com/gpu` (Birentech) devices and provides the following capabilities: + +**Supports both full-card and SVI partitioning**: You can use either the full-card device or the SVI-based partitioning device. + +**Device UUID selection**: You can specify or exclude particular devices through annotations. + +## Using Biren Devices + +### Enabling Biren Device Sharing + +#### Label the Node + +```bash +kubectl label node {biren-node} biren=on +``` + +#### Deploy the `biren-device-plugin` + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: biren-gpu +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: device-plugin-sa + namespace: biren-gpu +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: birentech-device-plugin +rules: +- apiGroups: [""] + resources: + - nodes + - pods + verbs: ["get", "list", "watch", "update", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: birentech-device-plugin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: birentech-device-plugin +subjects: +- kind: ServiceAccount + name: device-plugin-sa + namespace: biren-gpu + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: biren-device-plugin-daemonset + namespace: biren-gpu +spec: + selector: + matchLabels: + name: biren-device-plugin + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: biren-device-plugin + app.kubernetes.io/component: exporter + app.kubernetes.io/name: gpu-exporter + spec: + nodeSelector: + birentech.com: gpu + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: birentech.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: "system-node-critical" + containers: + - name: k8s-device-plugin + image: projecthami/biren-device-plugin:latest + imagePullPolicy: Always + env: + - name: LD_LIBRARY_PATH + value: /usr/lib + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: ["/root/k8s-device-plugin"] + args: ["--pulse", "300", "--container-runtime", "runc"] + securityContext: + privileged: true + volumeMounts: + - name: dp + mountPath: /var/lib/kubelet/device-plugins + - name: sys + mountPath: /sys + - name: brml + mountPath: /usr/lib + - name: brml-lib + mountPath: /usr/local/birensupa/driver/biren-smi/lib + readOnly: true + - name: brsmi + mountPath: /opt/birentech/bin + - mountPath: /dev + name: device + - name: cdi-config + mountPath: /etc/cdi + serviceAccountName: device-plugin-sa + volumes: + - name: dp + hostPath: + path: /var/lib/kubelet/device-plugins + - name: sys + hostPath: + path: /sys + - name: brml + hostPath: + path: /usr/lib + - name: brsmi + hostPath: + path: /usr/bin + - name: device + hostPath: + path: /dev + - name: cdi-config + hostPath: + path: /etc/cdi + - name: brml-lib + hostPath: + path: /usr/local/birensupa/driver/biren-smi/lib +``` + +### Run Biren jobs + +```yaml +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` + +## Notes +1. When requesting Biren resources, you cannot specify the memory size. +2. SVI partitioning can only split a single card into either two or four partitions. diff --git a/docs/userguide/biren-device/examples/default-use.md b/docs/userguide/biren-device/examples/default-use.md new file mode 100644 index 00000000..002138fc --- /dev/null +++ b/docs/userguide/biren-device/examples/default-use.md @@ -0,0 +1,23 @@ +--- +title: Allocate Biren Device +--- + +This example shows how to request a single Biren device in a plain Kubernetes Pod. +The Pod runs a long-running container image provided by Birentech and requests one `birentech.com/gpu` device through the `resources.limits` section. +You can use this as a starting point and adjust the image and resource limits to fit your own workloads. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` \ No newline at end of file diff --git a/docs/userguide/device-supported.md b/docs/userguide/device-supported.md index c80c4fdb..4bf1a2a0 100644 --- a/docs/userguide/device-supported.md +++ b/docs/userguide/device-supported.md @@ -16,4 +16,5 @@ The table below lists the devices supported by HAMi: | GCU | Enflame | S60 | Yes | Yes | No | | XPU | Kunlunxin | P800 | Yes | Yes | No | | GPU | Vastai | VA16 | Yes | Yes | No | +| GPU | Biren | Biren166M | Yes | Yes | No | | DPU | Teco | Checking | In progress | In progress | No | diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md new file mode 100644 index 00000000..859191fd --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md @@ -0,0 +1,167 @@ +--- +title: 启用壁仞设备共享 +--- + +## Introduction + +HAMi 现在支持共享 `birentech.com/gpu` (壁仞科技) 设备,并提供以下能力: + +**支持整卡和 SVI 切分 SVI**: 可以在 HAMi 中使用整卡和SVI切分出来的卡。 + +**设备 UUID 选择**: 可以通过注解指定或排除某些特定设备。 + +## 使用壁仞设备 + +### 启用壁仞设备共享 + +#### 给节点打标签 + +```bash +kubectl label node {biren-node} biren=on +``` + +#### 部署 `biren-device-plugin` + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: biren-gpu +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: device-plugin-sa + namespace: biren-gpu +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: birentech-device-plugin +rules: +- apiGroups: [""] + resources: + - nodes + - pods + verbs: ["get", "list", "watch", "update", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: birentech-device-plugin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: birentech-device-plugin +subjects: +- kind: ServiceAccount + name: device-plugin-sa + namespace: biren-gpu + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: biren-device-plugin-daemonset + namespace: biren-gpu +spec: + selector: + matchLabels: + name: biren-device-plugin + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: biren-device-plugin + app.kubernetes.io/component: exporter + app.kubernetes.io/name: gpu-exporter + spec: + nodeSelector: + birentech.com: gpu + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: birentech.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: "system-node-critical" + containers: + - name: k8s-device-plugin + image: projecthami/biren-device-plugin:latest + imagePullPolicy: Always + env: + - name: LD_LIBRARY_PATH + value: /usr/lib + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: ["/root/k8s-device-plugin"] + args: ["--pulse", "300", "--container-runtime", "runc"] + securityContext: + privileged: true + volumeMounts: + - name: dp + mountPath: /var/lib/kubelet/device-plugins + - name: sys + mountPath: /sys + - name: brml + mountPath: /usr/lib + - name: brml-lib + mountPath: /usr/local/birensupa/driver/biren-smi/lib + readOnly: true + - name: brsmi + mountPath: /opt/birentech/bin + - mountPath: /dev + name: device + - name: cdi-config + mountPath: /etc/cdi + serviceAccountName: device-plugin-sa + volumes: + - name: dp + hostPath: + path: /var/lib/kubelet/device-plugins + - name: sys + hostPath: + path: /sys + - name: brml + hostPath: + path: /usr/lib + - name: brsmi + hostPath: + path: /usr/bin + - name: device + hostPath: + path: /dev + - name: cdi-config + hostPath: + path: /etc/cdi + - name: brml-lib + hostPath: + path: /usr/local/birensupa/driver/biren-smi/lib +``` + +### 运行壁仞任务 + +```yaml +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` + +## 注意事项 +1. 在申请壁仞资源时,**不能**指定显存大小。 +2. 使用 SVI 切分时,一张卡只能切成两份或者四份。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md new file mode 100644 index 00000000..79d56681 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md @@ -0,0 +1,23 @@ +--- +title: 申请壁仞设备 +--- + +下面的示例展示了如何在一个普通的 Kubernetes Pod 中申请一个翰博半导体的设备。 +该 Pod 以长时间运行的方式启动容器,并通过 `resources.limits` 中声明一个 `birentech.com/gpu` 设备。 +你可以在此基础上替换镜像、命令或资源配额,以适配自己的业务场景。 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index 32ad688e..ae5d6b23 100644 --- a/sidebars.js +++ b/sidebars.js @@ -226,6 +226,21 @@ module.exports = { } ] }, + { + "type": "category", + "label": "Managing Biren devices", + "items": [ + "userguide/biren-device/enable-biren-sharing", + { + "type": "category", + "label": "Examples", + "key": "biren-examples", + "items": [ + "userguide/biren-device/examples/default-use" + ] + } + ] + }, { "type": "category", "label": "Optimize Kunlunxin devices scheduling", From 0db201b87135d14dc87731c8774517b4a8f701bd Mon Sep 17 00:00:00 2001 From: james Date: Mon, 22 Jun 2026 10:38:39 +0800 Subject: [PATCH 2/2] fix comment Signed-off-by: james --- .../biren-device/enable-biren-sharing.md | 91 +++++++++---------- .../biren-device/examples/default-use.md | 6 +- .../biren-device/enable-biren-sharing.md | 91 +++++++++---------- .../biren-device/examples/default-use.md | 6 +- sidebars.js | 8 +- 5 files changed, 97 insertions(+), 105 deletions(-) diff --git a/docs/userguide/biren-device/enable-biren-sharing.md b/docs/userguide/biren-device/enable-biren-sharing.md index b2a9a4d1..3fdb35a5 100644 --- a/docs/userguide/biren-device/enable-biren-sharing.md +++ b/docs/userguide/biren-device/enable-biren-sharing.md @@ -28,24 +28,22 @@ kind: Namespace metadata: name: biren-gpu --- - apiVersion: v1 kind: ServiceAccount metadata: name: device-plugin-sa namespace: biren-gpu --- - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: birentech-device-plugin rules: -- apiGroups: [""] - resources: - - nodes - - pods - verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: + - nodes + - pods + verbs: ["get", "list", "watch", "update", "patch"] --- apiVersion: rbac.authorization.k8s.io/v1 @@ -57,9 +55,9 @@ roleRef: kind: ClusterRole name: birentech-device-plugin subjects: -- kind: ServiceAccount - name: device-plugin-sa - namespace: biren-gpu + - kind: ServiceAccount + name: device-plugin-sa + namespace: biren-gpu --- apiVersion: apps/v1 @@ -81,45 +79,45 @@ spec: app.kubernetes.io/name: gpu-exporter spec: nodeSelector: - birentech.com: gpu + biren: "on" tolerations: - - key: CriticalAddonsOnly - operator: Exists - - key: birentech.com/gpu - operator: Exists - effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + - key: birentech.com/gpu + operator: Exists + effect: NoSchedule priorityClassName: "system-node-critical" containers: - - name: k8s-device-plugin - image: projecthami/biren-device-plugin:latest - imagePullPolicy: Always - env: - - name: LD_LIBRARY_PATH - value: /usr/lib - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - command: ["/root/k8s-device-plugin"] - args: ["--pulse", "300", "--container-runtime", "runc"] - securityContext: - privileged: true - volumeMounts: - - name: dp - mountPath: /var/lib/kubelet/device-plugins - - name: sys - mountPath: /sys - - name: brml - mountPath: /usr/lib - - name: brml-lib - mountPath: /usr/local/birensupa/driver/biren-smi/lib - readOnly: true - - name: brsmi - mountPath: /opt/birentech/bin - - mountPath: /dev - name: device - - name: cdi-config - mountPath: /etc/cdi + - name: k8s-device-plugin + image: projecthami/biren-device-plugin:latest + imagePullPolicy: Always + env: + - name: LD_LIBRARY_PATH + value: /usr/lib + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: ["/root/k8s-device-plugin"] + args: ["--pulse", "300", "--container-runtime", "runc"] + securityContext: + privileged: true + volumeMounts: + - name: dp + mountPath: /var/lib/kubelet/device-plugins + - name: sys + mountPath: /sys + - name: brml + mountPath: /usr/lib + - name: brml-lib + mountPath: /usr/local/birensupa/driver/biren-smi/lib + readOnly: true + - name: brsmi + mountPath: /opt/birentech/bin + - mountPath: /dev + name: device + - name: cdi-config + mountPath: /etc/cdi serviceAccountName: device-plugin-sa volumes: - name: dp @@ -163,5 +161,6 @@ spec: ``` ## Notes + 1. When requesting Biren resources, you cannot specify the memory size. 2. SVI partitioning can only split a single card into either two or four partitions. diff --git a/docs/userguide/biren-device/examples/default-use.md b/docs/userguide/biren-device/examples/default-use.md index 002138fc..40e95415 100644 --- a/docs/userguide/biren-device/examples/default-use.md +++ b/docs/userguide/biren-device/examples/default-use.md @@ -2,9 +2,7 @@ title: Allocate Biren Device --- -This example shows how to request a single Biren device in a plain Kubernetes Pod. -The Pod runs a long-running container image provided by Birentech and requests one `birentech.com/gpu` device through the `resources.limits` section. -You can use this as a starting point and adjust the image and resource limits to fit your own workloads. +This example shows how to request a single Biren device in a plain Kubernetes Pod. The Pod runs a long-running container image provided by Birentech and requests one `birentech.com/gpu` device through the `resources.limits` section. You can use this as a starting point and adjust the image and resource limits to fit your own workloads. ```yaml apiVersion: v1 @@ -20,4 +18,4 @@ spec: resources: limits: birentech.com/gpu: 1 -``` \ No newline at end of file +``` diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md index 859191fd..493e0f21 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md @@ -28,24 +28,22 @@ kind: Namespace metadata: name: biren-gpu --- - apiVersion: v1 kind: ServiceAccount metadata: name: device-plugin-sa namespace: biren-gpu --- - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: birentech-device-plugin rules: -- apiGroups: [""] - resources: - - nodes - - pods - verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: + - nodes + - pods + verbs: ["get", "list", "watch", "update", "patch"] --- apiVersion: rbac.authorization.k8s.io/v1 @@ -57,9 +55,9 @@ roleRef: kind: ClusterRole name: birentech-device-plugin subjects: -- kind: ServiceAccount - name: device-plugin-sa - namespace: biren-gpu + - kind: ServiceAccount + name: device-plugin-sa + namespace: biren-gpu --- apiVersion: apps/v1 @@ -81,45 +79,45 @@ spec: app.kubernetes.io/name: gpu-exporter spec: nodeSelector: - birentech.com: gpu + biren: "on" tolerations: - - key: CriticalAddonsOnly - operator: Exists - - key: birentech.com/gpu - operator: Exists - effect: NoSchedule + - key: CriticalAddonsOnly + operator: Exists + - key: birentech.com/gpu + operator: Exists + effect: NoSchedule priorityClassName: "system-node-critical" containers: - - name: k8s-device-plugin - image: projecthami/biren-device-plugin:latest - imagePullPolicy: Always - env: - - name: LD_LIBRARY_PATH - value: /usr/lib - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - command: ["/root/k8s-device-plugin"] - args: ["--pulse", "300", "--container-runtime", "runc"] - securityContext: - privileged: true - volumeMounts: - - name: dp - mountPath: /var/lib/kubelet/device-plugins - - name: sys - mountPath: /sys - - name: brml - mountPath: /usr/lib - - name: brml-lib - mountPath: /usr/local/birensupa/driver/biren-smi/lib - readOnly: true - - name: brsmi - mountPath: /opt/birentech/bin - - mountPath: /dev - name: device - - name: cdi-config - mountPath: /etc/cdi + - name: k8s-device-plugin + image: projecthami/biren-device-plugin:latest + imagePullPolicy: Always + env: + - name: LD_LIBRARY_PATH + value: /usr/lib + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: ["/root/k8s-device-plugin"] + args: ["--pulse", "300", "--container-runtime", "runc"] + securityContext: + privileged: true + volumeMounts: + - name: dp + mountPath: /var/lib/kubelet/device-plugins + - name: sys + mountPath: /sys + - name: brml + mountPath: /usr/lib + - name: brml-lib + mountPath: /usr/local/birensupa/driver/biren-smi/lib + readOnly: true + - name: brsmi + mountPath: /opt/birentech/bin + - mountPath: /dev + name: device + - name: cdi-config + mountPath: /etc/cdi serviceAccountName: device-plugin-sa volumes: - name: dp @@ -163,5 +161,6 @@ spec: ``` ## 注意事项 + 1. 在申请壁仞资源时,**不能**指定显存大小。 2. 使用 SVI 切分时,一张卡只能切成两份或者四份。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md index 79d56681..3ad01ffd 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md @@ -2,9 +2,7 @@ title: 申请壁仞设备 --- -下面的示例展示了如何在一个普通的 Kubernetes Pod 中申请一个翰博半导体的设备。 -该 Pod 以长时间运行的方式启动容器,并通过 `resources.limits` 中声明一个 `birentech.com/gpu` 设备。 -你可以在此基础上替换镜像、命令或资源配额,以适配自己的业务场景。 +下面的示例展示了如何在一个普通的 Kubernetes Pod 中申请一个翰博半导体的设备。该 Pod 以长时间运行的方式启动容器,并通过 `resources.limits` 中声明一个 `birentech.com/gpu` 设备。你可以在此基础上替换镜像、命令或资源配额,以适配自己的业务场景。 ```yaml apiVersion: v1 @@ -20,4 +18,4 @@ spec: resources: limits: birentech.com/gpu: 1 -``` \ No newline at end of file +``` diff --git a/sidebars.js b/sidebars.js index 0aad5985..c4ed97ca 100644 --- a/sidebars.js +++ b/sidebars.js @@ -229,11 +229,9 @@ module.exports = { type: "category", label: "Examples", key: "biren-examples", - items: [ - "userguide/biren-device/examples/default-use" - ] - } - ] + items: ["userguide/biren-device/examples/default-use"], + }, + ], }, { type: "category",