On one of my ubuntu server, top
command start very slow, when I run top
command on terminal, it will display info after more than 10 seconds.
Then I use strace -yy top
to analysis the problem, I found top
hang at the following point:
getsockopt(6<UNIX:[366379599]>, SOL_SOCKET, SO_SNDBUF, [212992], [4]) = 0 [0/1911]
setsockopt(6<UNIX:[366379599]>, SOL_SOCKET, SO_SNDBUFFORCE, [8388608], 4) = 0
connect(6<UNIX:[366379599]>, {sa_family=AF_UNIX, sun_path="/run/dbus/system_bus_socket"}, 29) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_PEERCRED, {pid=1, uid=0, gid=0}, [12]) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_PEERSEC, "unconfined", [64->10]) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_PEERGROUPS, "", [256->0]) = 0
fstat(6<UNIX:[366379599->366381191]>, {st_mode=S_IFSOCK|0777, st_size=0, ...}) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_ACCEPTCONN, [0], [4]) = 0
getsockname(6<UNIX:[366379599->366381191]>, {sa_family=AF_UNIX}, [128->2]) = 0
geteuid() = 0
sendmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0AUTH EXTERNAL ", iov_len=15}, {iov_base="30", iov_len
=2}, {iov_base="\r\nNEGOTIATE_UNIX_FD\r\nBEGIN\r\n", iov_len=28}], msg_iovlen=3, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 45
gettid() = 25127
getrandom("\x00\x3c\x86\xd4\x34\xa0\xb2\x91\xee\x2d\x4b\x2c\x1b\x56\xda\xa8", 16, GRND_NONBLOCK) = 16
futex(0x7f00826d9ff0, FUTEX_WAKE_PRIVATE, 2147483647) = 0
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="OK 141ec8c80f29bf83dc576f5e61641"..., iov_len=256}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 52
sendmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\0\0\0\0\1\0\0\0m\0\0\0\1\1o\0\25\0\0\0/org/fre"
..., iov_len=128}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 128
recvmsg(6<UNIX:[366379599->366381191]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable
)
ppoll([{fd=6<UNIX:[366379599->366381191]>, events=POLLIN}], 1, {tv_sec=24, tv_nsec=999897000}, NULL, 8) = 1 ([{fd=6, revents=POLLIN}], left {tv_sec=24, tv_nsec=999818992})
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\17\0\0\0\1\0\0\0E\0\0\0\6\1s\0\n\0\0\0", iov_le
n=24}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 24
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base=":1.2843938\0\0\0\0\0\0\5\1u\0\1\0\0\0\10\1g\0\1s\0\0"..
., iov_len=79}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 79
sendmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\4\0\0\0\2\0\0\0\247\0\0\0\1\1o\0\31\0\0\0/org/f
re"..., iov_len=184}, {iov_base="w\364\0\0", iov_len=4}], msg_iovlen=2, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 188
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\4\1\1\17\0\0\0\2\0\0\0\225\0\0\0\1\1o\0\25\0\0\0", io
v_len=24}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 24
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="/org/freedesktop/DBus\0\0\0\2\1s\0\24\0\0\0"..., iov_le
n=159}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 159
recvmsg(6<UNIX:[366379599->366381191]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable
)
ppoll([{fd=6<UNIX:[366379599->366381191]>, events=POLLIN}], 1, {tv_sec=24, tv_nsec=999853000}, NULL, 8
And the command ls -l /proc/$(pgrep top)/fd/
get the following output:
lrwx------ 1 root root 64 1月 25 18:11 0 -> /dev/pts/9
lrwx------ 1 root root 64 1月 25 18:11 1 -> /dev/pts/9
l-wx------ 1 root root 64 1月 25 18:11 2 -> /dev/null
lrwx------ 1 root root 64 1月 25 18:11 3 -> /dev/pts/9
lr-x------ 1 root root 64 1月 25 18:11 4 -> /proc/uptime
lr-x------ 1 root root 64 1月 25 18:11 5 -> /proc
lrwx------ 1 root root 64 1月 25 18:11 6 -> 'socket:[364876425]'
lsof -d 6 -U -a +E -p $(pgrep top)
get following output:
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
dbus-daem 874 messagebus 12u unix 0xffff9545f6fee400 0t0 366381191 /var/run/dbus/system_bus_socket type=STREAM ->INO=366379599 25127,top,6u
top 25127 root 6u unix 0xffff9545f6fefc00 0t0 366379599 type=STREAM ->INO=366381191 874,dbus-daem,12u
After top
works as normal, the command ls -l /proc/$(pgrep top)/fd/
get the following output:
lrwx------ 1 root root 64 1月 25 18:11 0 -> /dev/pts/9
lrwx------ 1 root root 64 1月 25 18:11 1 -> /dev/pts/9
l-wx------ 1 root root 64 1月 25 18:11 2 -> /dev/null
lrwx------ 1 root root 64 1月 25 18:11 3 -> /dev/pts/9
lr-x------ 1 root root 64 1月 25 18:11 4 -> /proc/uptime
lr-x------ 1 root root 64 1月 25 18:11 5 -> /proc/meminfo
lr-x------ 1 root root 64 1月 25 18:11 6 -> /proc/loadavg
lr-x------ 1 root root 64 1月 25 18:12 7 -> /proc/stat
lsof -d 6 -U -a +E -p $(pgrep top)
output nothing.
I want to know the reason that why top
command start so slow, can anyone help me?
-yy
to strace, it will show you the socket details. And next time you can run lsof -pstrace
. You should at least show thesocket
andconnect
system calls that created the socket. Secondly, you can runlsof -U -a +E -p $(pgrep top)
to see the unix socket information and what's the end points of the socket. Or even, if the stuck file descriptor is always 6, you can add-d6
to your lsof command to see only the socket information and the endpoint for this file descriptor. – aviro Jan 25 '22 at 12:22