Infinibandを使ったときにどんなコマンドを実行したかのメモ
環境
1:1の対向
master (subnetmanager)
lumen
(base) testuser@lumen:~$ uname -a
Linux lumen 5.15.0-102-generic #112~20.04.1-Ubuntu SMP Thu Mar 14 14:28:24 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux
(base) testuser@lumen:~$ cat /etc/os-release
NAME="Ubuntu"
VERSION="20.04.6 LTS (Focal Fossa)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 20.04.6 LTS"
VERSION_ID="20.04"
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
VERSION_CODENAME=focal
UBUNTU_CODENAME=focal
client
root@ampere:~# uname -a
Linux ampere 6.5.0-27-generic #28~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Mar 15 10:51:06 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
root@ampere:~# cat /etc/os-release
PRETTY_NAME="Ubuntu 22.04.2 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04.2 LTS (Jammy Jellyfish)"
VERSION_CODENAME=jammy
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
UBUNTU_CODENAME=jammy
初期設定
master
apt install ibverbs-utils infiniband-diags perftest opensm
モジュール追記 + 読み込み
root@ampere:~# nano /etc/modules
# For Infiniband
mlx4_ib
rdma_ucm
ib_umad
ib_uverbs
ib_ipoib
root@ampere:~# modprobe mlx4_ib
root@ampere:~# modprobe rdma_ucm
root@ampere:~# modprobe ib_umad
root@ampere:~# modprobe ib_uverbs
root@ampere:~# modprobe ib_ipoib
opensm start
client
apt install ibverbs-utils infiniband-diags perftest
モジュール追記 + 読み込み
root@ampere:~# nano /etc/modules
# For Infiniband
mlx4_ib
rdma_ucm
ib_umad
ib_uverbs
ib_ipoib
root@ampere:~# modprobe mlx4_ib
root@ampere:~# modprobe rdma_ucm
root@ampere:~# modprobe ib_umad
root@ampere:~# modprobe ib_uverbs
root@ampere:~# modprobe ib_ipoib
動作確認
ibping
node1(Pingサーバー側)
root@ampere:~# ibstat
CA 'mlx4_0'
CA type: MT26428
Number of ports: 1
Firmware version: 2.9.1000
Hardware version: b0
Node GUID: 0x0002c903002b9476
System image GUID: 0x0002c903002b9479
Port 1:
State: Active
Physical state: LinkUp
Rate: 40
Base lid: 2
LMC: 0
SM lid: 1
Capability mask: 0x02500868
Port GUID: 0x0002c903002b9477
Link layer: InfiniBand
root@ampere:~# ibping -S
node2(Pingクライアント側)
root@lumen:~# ibnodes
Ca : 0x0002c903002b9476 ports 1 "MT25408 ConnectX Mellanox Technologies"
Ca : 0x0002c903002b94c6 ports 1 "MT25408 ConnectX Mellanox Technologies"
root@lumen:~# ibping 0002c903002b94c6
Pong from ampere.(none) (Lid 2): time 0.057 ms
Pong from ampere.(none) (Lid 2): time 0.073 ms
(中略)
Pong from ampere.(none) (Lid 2): time 0.080 ms
Pong from ampere.(none) (Lid 2): time 0.075 ms
^C
--- ampere.(none) (Lid 2) ibping statistics ---
28 packets transmitted, 28 received, 0% packet loss, time 27905 ms
rtt min/avg/max = 0.057/0.075/0.081 ms
qperf
どちらにもqperfを入れる
apt install qperf
サーバー側でqperfを立ち上げる
qperf
クライアント側でオプションを指定する
root@ampere:~# qperf -vv -t 10 10.0.0.1 tcp_bw tcp_lat
tcp_bw:
bw = 1.7 GB/sec
msg_rate = 26 K/sec
msg_size = 64 KiB (65,536)
time = 10 sec
timeout = 5 sec
send_cost = 956 ms/GB
recv_cost = 1.36 sec/GB
send_cpus_used = 163 % cpus
send_cpus_user = 1.5 % cpus
send_cpus_intr = 95.1 % cpus
send_cpus_kernel = 66.1 % cpus
send_cpus_iowait = 0.1 % cpus
send_real_time = 10 sec
send_cpu_time = 16.3 sec
send_bytes = 17 GB
send_msgs = 259,717
recv_cpus_used = 231 % cpus
recv_cpus_user = 2.3 % cpus
recv_cpus_intr = 136 % cpus
recv_cpus_kernel = 92.1 % cpus
recv_cpus_iowait = 0.1 % cpus
recv_real_time = 10 sec
recv_cpu_time = 23.1 sec
recv_bytes = 17 GB
recv_msgs = 259,676
tcp_lat:
latency = 23.4 us
msg_rate = 42.7 K/sec
msg_size = 1 bytes
time = 10 sec
timeout = 5 sec
loc_cpus_used = 41.4 % cpus
loc_cpus_user = 1.9 % cpus
loc_cpus_intr = 18.6 % cpus
loc_cpus_kernel = 20.9 % cpus
loc_real_time = 10 sec
loc_cpu_time = 4.14 sec
loc_send_bytes = 213 KB
loc_recv_bytes = 213 KB
loc_send_msgs = 213,395
loc_recv_msgs = 213,394
rem_cpus_used = 41.1 % cpus
rem_cpus_user = 2.3 % cpus
rem_cpus_intr = 19.3 % cpus
rem_cpus_kernel = 19.5 % cpus
rem_real_time = 10 sec
rem_cpu_time = 4.11 sec
rem_send_bytes = 213 KB
rem_recv_bytes = 213 KB
rem_send_msgs = 213,395
rem_recv_msgs = 213,395
root@ampere:~#
root@ampere:~# qperf -vv -t 120 10.0.0.1 rc_rdma_read_bw
rc_rdma_read_bw:
bw = 3.51 GB/sec
msg_rate = 53.5 K/sec
msg_size = 64 KiB (65,536)
mtu_size = 2 KiB (2,048)
time = 120 sec
timeout = 5 sec
send_cost = 23 ms/GB
recv_cost = 180 ms/GB
send_cpus_used = 8.07 % cpus
send_cpus_user = 1.67 % cpus
send_cpus_intr = 0.01 % cpus
send_cpus_kernel = 2.68 % cpus
send_cpus_iowait = 3.71 % cpus
send_real_time = 120 sec
send_cpu_time = 9.7 sec
send_bytes = 421 GB
send_msgs = 6.42 million
recv_cpus_used = 63.1 % cpus
recv_cpus_user = 17 % cpus
recv_cpus_intr = 16.8 % cpus
recv_cpus_kernel = 29 % cpus
recv_cpus_iowait = 0.34 % cpus
recv_real_time = 120 sec
recv_cpu_time = 75.7 sec
recv_bytes = 421 GB
recv_msgs = 6.42 million
recv_max_cqe = 21
iperf/iperf3
iperf3の場合、コマンドはiperf3 iperf2の場合、コマンドはiperfに置き換えて読むこと。そこまで詳しくは掘らない
サーバー側
root@lumen:~# iperf -s
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 128 KByte (default)
------------------------------------------------------------
[ 4] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49692 (peer 2.1.5)
[ 5] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49706 (peer 2.1.5)
[ 6] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49708 (peer 2.1.5)
[ 11] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49720 (peer 2.1.5)
[ 14] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49728 (peer 2.1.5)
[ 16] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49738 (peer 2.1.5)
[ 7] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49712 (peer 2.1.5)
[ 12] local 10.0.0.1 port 5001 connected with 10.0.0.2 port 49724 (peer 2.1.5)
[ ID] Interval Transfer Bandwidth
[ 4] 0.0-10.0 sec 1.76 GBytes 1.51 Gbits/sec
[ 5] 0.0-10.0 sec 1.62 GBytes 1.39 Gbits/sec
[ 6] 0.0-10.0 sec 1.56 GBytes 1.34 Gbits/sec
[ 11] 0.0-10.0 sec 1.69 GBytes 1.45 Gbits/sec
[ 14] 0.0-10.0 sec 1.70 GBytes 1.46 Gbits/sec
[ 16] 0.0-10.0 sec 1.66 GBytes 1.42 Gbits/sec
[ 7] 0.0-10.0 sec 1.48 GBytes 1.27 Gbits/sec
[ 12] 0.0-10.0 sec 1.93 GBytes 1.66 Gbits/sec
[SUM] 0.0-10.0 sec 13.4 GBytes 11.5 Gbits/sec
クライアント側
root@ampere:~# iperf -c 10.0.0.1 -P 8
[ 2] local 10.0.0.2 port 49692 connected with 10.0.0.1 port 5001
------------------------------------------------------------
Client connecting to 10.0.0.1, TCP port 5001
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[ 4] local 10.0.0.2 port 49720 connected with 10.0.0.1 port 5001
[ 8] local 10.0.0.2 port 49724 connected with 10.0.0.1 port 5001
[ 7] local 10.0.0.2 port 49738 connected with 10.0.0.1 port 5001
[ 3] local 10.0.0.2 port 49706 connected with 10.0.0.1 port 5001
[ 6] local 10.0.0.2 port 49708 connected with 10.0.0.1 port 5001
[ 1] local 10.0.0.2 port 49712 connected with 10.0.0.1 port 5001
[ 5] local 10.0.0.2 port 49728 connected with 10.0.0.1 port 5001
[ ID] Interval Transfer Bandwidth
[ 1] 0.0000-10.0221 sec 1.48 GBytes 1.27 Gbits/sec
[ 5] 0.0000-10.0220 sec 1.70 GBytes 1.45 Gbits/sec
[ 2] 0.0000-10.0218 sec 1.76 GBytes 1.50 Gbits/sec
[ 4] 0.0000-10.0218 sec 1.69 GBytes 1.45 Gbits/sec
[ 6] 0.0000-10.0219 sec 1.56 GBytes 1.33 Gbits/sec
[ 3] 0.0000-10.0218 sec 1.62 GBytes 1.38 Gbits/sec
[ 7] 0.0000-10.0221 sec 1.66 GBytes 1.42 Gbits/sec
[ 8] 0.0000-10.0220 sec 1.93 GBytes 1.66 Gbits/sec
[SUM] 0.0000-10.0102 sec 13.4 GBytes 11.5 Gbits/sec
[ CT] final connect times (min/avg/max/stdev) = 0.153/0.279/0.383/0.075 ms (tot/err) = 8/0